1/* 2 * Copyright 2011 Joakim Sindholt <opensource@zhasha.com> 3 * Copyright 2013 Christoph Bumiller 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 23 24#include "nine_shader.h" 25 26#include "device9.h" 27#include "nine_debug.h" 28#include "nine_state.h" 29#include "vertexdeclaration9.h" 30 31#include "util/macros.h" 32#include "util/u_memory.h" 33#include "util/u_inlines.h" 34#include "pipe/p_shader_tokens.h" 35#include "tgsi/tgsi_ureg.h" 36#include "tgsi/tgsi_dump.h" 37#include "nir/tgsi_to_nir.h" 38 39#define DBG_CHANNEL DBG_SHADER 40 41#define DUMP(args...) _nine_debug_printf(DBG_CHANNEL, NULL, args) 42 43 44struct shader_translator; 45 46typedef HRESULT (*translate_instruction_func)(struct shader_translator *); 47 48static inline const char *d3dsio_to_string(unsigned opcode); 49 50 51#define NINED3D_SM1_VS 0xfffe 52#define NINED3D_SM1_PS 0xffff 53 54#define NINE_MAX_COND_DEPTH 64 55#define NINE_MAX_LOOP_DEPTH 64 56 57#define NINED3DSP_END 0x0000ffff 58 59#define NINED3DSPTYPE_FLOAT4 0 60#define NINED3DSPTYPE_INT4 1 61#define NINED3DSPTYPE_BOOL 2 62 63#define NINED3DSPR_IMMEDIATE (D3DSPR_PREDICATE + 1) 64 65#define NINED3DSP_WRITEMASK_MASK D3DSP_WRITEMASK_ALL 66#define NINED3DSP_WRITEMASK_SHIFT 16 67 68#define NINED3DSHADER_INST_PREDICATED (1 << 28) 69 70#define NINED3DSHADER_REL_OP_GT 1 71#define NINED3DSHADER_REL_OP_EQ 2 72#define NINED3DSHADER_REL_OP_GE 3 73#define NINED3DSHADER_REL_OP_LT 4 74#define NINED3DSHADER_REL_OP_NE 5 75#define NINED3DSHADER_REL_OP_LE 6 76 77#define NINED3DSIO_OPCODE_FLAGS_SHIFT 16 78#define NINED3DSIO_OPCODE_FLAGS_MASK (0xff << NINED3DSIO_OPCODE_FLAGS_SHIFT) 79 80#define NINED3DSI_TEXLD_PROJECT 0x1 81#define NINED3DSI_TEXLD_BIAS 0x2 82 83#define NINED3DSP_WRITEMASK_0 0x1 84#define NINED3DSP_WRITEMASK_1 0x2 85#define NINED3DSP_WRITEMASK_2 0x4 86#define NINED3DSP_WRITEMASK_3 0x8 87#define NINED3DSP_WRITEMASK_ALL 0xf 88 89#define NINED3DSP_NOSWIZZLE ((0 << 0) | (1 << 2) | (2 << 4) | (3 << 6)) 90 91#define NINE_SWIZZLE4(x,y,z,w) \ 92 TGSI_SWIZZLE_##x, TGSI_SWIZZLE_##y, TGSI_SWIZZLE_##z, TGSI_SWIZZLE_##w 93 94#define NINE_APPLY_SWIZZLE(src, s) \ 95 ureg_swizzle(src, NINE_SWIZZLE4(s, s, s, s)) 96 97#define NINED3DSPDM_SATURATE (D3DSPDM_SATURATE >> D3DSP_DSTMOD_SHIFT) 98#define NINED3DSPDM_PARTIALP (D3DSPDM_PARTIALPRECISION >> D3DSP_DSTMOD_SHIFT) 99#define NINED3DSPDM_CENTROID (D3DSPDM_MSAMPCENTROID >> D3DSP_DSTMOD_SHIFT) 100 101/* 102 * NEG all, not ps: m3x2, m3x3, m3x4, m4x3, m4x4 103 * BIAS <= PS 1.4 (x-0.5) 104 * BIASNEG <= PS 1.4 (-(x-0.5)) 105 * SIGN <= PS 1.4 (2(x-0.5)) 106 * SIGNNEG <= PS 1.4 (-2(x-0.5)) 107 * COMP <= PS 1.4 (1-x) 108 * X2 = PS 1.4 (2x) 109 * X2NEG = PS 1.4 (-2x) 110 * DZ <= PS 1.4, tex{ld,crd} (.xy/.z), z=0 => .11 111 * DW <= PS 1.4, tex{ld,crd} (.xy/.w), w=0 => .11 112 * ABS >= SM 3.0 (abs(x)) 113 * ABSNEG >= SM 3.0 (-abs(x)) 114 * NOT >= SM 2.0 pedication only 115 */ 116#define NINED3DSPSM_NONE (D3DSPSM_NONE >> D3DSP_SRCMOD_SHIFT) 117#define NINED3DSPSM_NEG (D3DSPSM_NEG >> D3DSP_SRCMOD_SHIFT) 118#define NINED3DSPSM_BIAS (D3DSPSM_BIAS >> D3DSP_SRCMOD_SHIFT) 119#define NINED3DSPSM_BIASNEG (D3DSPSM_BIASNEG >> D3DSP_SRCMOD_SHIFT) 120#define NINED3DSPSM_SIGN (D3DSPSM_SIGN >> D3DSP_SRCMOD_SHIFT) 121#define NINED3DSPSM_SIGNNEG (D3DSPSM_SIGNNEG >> D3DSP_SRCMOD_SHIFT) 122#define NINED3DSPSM_COMP (D3DSPSM_COMP >> D3DSP_SRCMOD_SHIFT) 123#define NINED3DSPSM_X2 (D3DSPSM_X2 >> D3DSP_SRCMOD_SHIFT) 124#define NINED3DSPSM_X2NEG (D3DSPSM_X2NEG >> D3DSP_SRCMOD_SHIFT) 125#define NINED3DSPSM_DZ (D3DSPSM_DZ >> D3DSP_SRCMOD_SHIFT) 126#define NINED3DSPSM_DW (D3DSPSM_DW >> D3DSP_SRCMOD_SHIFT) 127#define NINED3DSPSM_ABS (D3DSPSM_ABS >> D3DSP_SRCMOD_SHIFT) 128#define NINED3DSPSM_ABSNEG (D3DSPSM_ABSNEG >> D3DSP_SRCMOD_SHIFT) 129#define NINED3DSPSM_NOT (D3DSPSM_NOT >> D3DSP_SRCMOD_SHIFT) 130 131static const char *sm1_mod_str[] = 132{ 133 [NINED3DSPSM_NONE] = "", 134 [NINED3DSPSM_NEG] = "-", 135 [NINED3DSPSM_BIAS] = "bias", 136 [NINED3DSPSM_BIASNEG] = "biasneg", 137 [NINED3DSPSM_SIGN] = "sign", 138 [NINED3DSPSM_SIGNNEG] = "signneg", 139 [NINED3DSPSM_COMP] = "comp", 140 [NINED3DSPSM_X2] = "x2", 141 [NINED3DSPSM_X2NEG] = "x2neg", 142 [NINED3DSPSM_DZ] = "dz", 143 [NINED3DSPSM_DW] = "dw", 144 [NINED3DSPSM_ABS] = "abs", 145 [NINED3DSPSM_ABSNEG] = "-abs", 146 [NINED3DSPSM_NOT] = "not" 147}; 148 149static void 150sm1_dump_writemask(BYTE mask) 151{ 152 if (mask & 1) DUMP("x"); else DUMP("_"); 153 if (mask & 2) DUMP("y"); else DUMP("_"); 154 if (mask & 4) DUMP("z"); else DUMP("_"); 155 if (mask & 8) DUMP("w"); else DUMP("_"); 156} 157 158static void 159sm1_dump_swizzle(BYTE s) 160{ 161 char c[4] = { 'x', 'y', 'z', 'w' }; 162 DUMP("%c%c%c%c", 163 c[(s >> 0) & 3], c[(s >> 2) & 3], c[(s >> 4) & 3], c[(s >> 6) & 3]); 164} 165 166static const char sm1_file_char[] = 167{ 168 [D3DSPR_TEMP] = 'r', 169 [D3DSPR_INPUT] = 'v', 170 [D3DSPR_CONST] = 'c', 171 [D3DSPR_ADDR] = 'A', 172 [D3DSPR_RASTOUT] = 'R', 173 [D3DSPR_ATTROUT] = 'D', 174 [D3DSPR_OUTPUT] = 'o', 175 [D3DSPR_CONSTINT] = 'I', 176 [D3DSPR_COLOROUT] = 'C', 177 [D3DSPR_DEPTHOUT] = 'D', 178 [D3DSPR_SAMPLER] = 's', 179 [D3DSPR_CONST2] = 'c', 180 [D3DSPR_CONST3] = 'c', 181 [D3DSPR_CONST4] = 'c', 182 [D3DSPR_CONSTBOOL] = 'B', 183 [D3DSPR_LOOP] = 'L', 184 [D3DSPR_TEMPFLOAT16] = 'h', 185 [D3DSPR_MISCTYPE] = 'M', 186 [D3DSPR_LABEL] = 'X', 187 [D3DSPR_PREDICATE] = 'p' 188}; 189 190static void 191sm1_dump_reg(BYTE file, INT index) 192{ 193 switch (file) { 194 case D3DSPR_LOOP: 195 DUMP("aL"); 196 break; 197 case D3DSPR_COLOROUT: 198 DUMP("oC%i", index); 199 break; 200 case D3DSPR_DEPTHOUT: 201 DUMP("oDepth"); 202 break; 203 case D3DSPR_RASTOUT: 204 DUMP("oRast%i", index); 205 break; 206 case D3DSPR_CONSTINT: 207 DUMP("iconst[%i]", index); 208 break; 209 case D3DSPR_CONSTBOOL: 210 DUMP("bconst[%i]", index); 211 break; 212 default: 213 DUMP("%c%i", sm1_file_char[file], index); 214 break; 215 } 216} 217 218struct sm1_src_param 219{ 220 INT idx; 221 struct sm1_src_param *rel; 222 BYTE file; 223 BYTE swizzle; 224 BYTE mod; 225 BYTE type; 226 union { 227 DWORD d[4]; 228 float f[4]; 229 int i[4]; 230 BOOL b; 231 } imm; 232}; 233static void 234sm1_parse_immediate(struct shader_translator *, struct sm1_src_param *); 235 236struct sm1_dst_param 237{ 238 INT idx; 239 struct sm1_src_param *rel; 240 BYTE file; 241 BYTE mask; 242 BYTE mod; 243 int8_t shift; /* sint4 */ 244 BYTE type; 245}; 246 247static inline void 248assert_replicate_swizzle(const struct ureg_src *reg) 249{ 250 assert(reg->SwizzleY == reg->SwizzleX && 251 reg->SwizzleZ == reg->SwizzleX && 252 reg->SwizzleW == reg->SwizzleX); 253} 254 255static void 256sm1_dump_immediate(const struct sm1_src_param *param) 257{ 258 switch (param->type) { 259 case NINED3DSPTYPE_FLOAT4: 260 DUMP("{ %f %f %f %f }", 261 param->imm.f[0], param->imm.f[1], 262 param->imm.f[2], param->imm.f[3]); 263 break; 264 case NINED3DSPTYPE_INT4: 265 DUMP("{ %i %i %i %i }", 266 param->imm.i[0], param->imm.i[1], 267 param->imm.i[2], param->imm.i[3]); 268 break; 269 case NINED3DSPTYPE_BOOL: 270 DUMP("%s", param->imm.b ? "TRUE" : "FALSE"); 271 break; 272 default: 273 assert(0); 274 break; 275 } 276} 277 278static void 279sm1_dump_src_param(const struct sm1_src_param *param) 280{ 281 if (param->file == NINED3DSPR_IMMEDIATE) { 282 assert(!param->mod && 283 !param->rel && 284 param->swizzle == NINED3DSP_NOSWIZZLE); 285 sm1_dump_immediate(param); 286 return; 287 } 288 289 if (param->mod) 290 DUMP("%s(", sm1_mod_str[param->mod]); 291 if (param->rel) { 292 DUMP("%c[", sm1_file_char[param->file]); 293 sm1_dump_src_param(param->rel); 294 DUMP("+%i]", param->idx); 295 } else { 296 sm1_dump_reg(param->file, param->idx); 297 } 298 if (param->mod) 299 DUMP(")"); 300 if (param->swizzle != NINED3DSP_NOSWIZZLE) { 301 DUMP("."); 302 sm1_dump_swizzle(param->swizzle); 303 } 304} 305 306static void 307sm1_dump_dst_param(const struct sm1_dst_param *param) 308{ 309 if (param->mod & NINED3DSPDM_SATURATE) 310 DUMP("sat "); 311 if (param->mod & NINED3DSPDM_PARTIALP) 312 DUMP("pp "); 313 if (param->mod & NINED3DSPDM_CENTROID) 314 DUMP("centroid "); 315 if (param->shift < 0) 316 DUMP("/%u ", 1 << -param->shift); 317 if (param->shift > 0) 318 DUMP("*%u ", 1 << param->shift); 319 320 if (param->rel) { 321 DUMP("%c[", sm1_file_char[param->file]); 322 sm1_dump_src_param(param->rel); 323 DUMP("+%i]", param->idx); 324 } else { 325 sm1_dump_reg(param->file, param->idx); 326 } 327 if (param->mask != NINED3DSP_WRITEMASK_ALL) { 328 DUMP("."); 329 sm1_dump_writemask(param->mask); 330 } 331} 332 333struct sm1_semantic 334{ 335 struct sm1_dst_param reg; 336 BYTE sampler_type; 337 D3DDECLUSAGE usage; 338 BYTE usage_idx; 339}; 340 341struct sm1_op_info 342{ 343 /* NOTE: 0 is a valid TGSI opcode, but if handler is set, this parameter 344 * should be ignored completely */ 345 unsigned sio; 346 unsigned opcode; /* TGSI_OPCODE_x */ 347 348 /* versions are still set even handler is set */ 349 struct { 350 unsigned min; 351 unsigned max; 352 } vert_version, frag_version; 353 354 /* number of regs parsed outside of special handler */ 355 unsigned ndst; 356 unsigned nsrc; 357 358 /* some instructions don't map perfectly, so use a special handler */ 359 translate_instruction_func handler; 360}; 361 362struct sm1_instruction 363{ 364 D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode; 365 BYTE flags; 366 BOOL coissue; 367 BOOL predicated; 368 BYTE ndst; 369 BYTE nsrc; 370 struct sm1_src_param src[4]; 371 struct sm1_src_param src_rel[4]; 372 struct sm1_src_param pred; 373 struct sm1_src_param dst_rel[1]; 374 struct sm1_dst_param dst[1]; 375 376 const struct sm1_op_info *info; 377}; 378 379static void 380sm1_dump_instruction(struct sm1_instruction *insn, unsigned indent) 381{ 382 unsigned i; 383 384 /* no info stored for these: */ 385 if (insn->opcode == D3DSIO_DCL) 386 return; 387 for (i = 0; i < indent; ++i) 388 DUMP(" "); 389 390 if (insn->predicated) { 391 DUMP("@"); 392 sm1_dump_src_param(&insn->pred); 393 DUMP(" "); 394 } 395 DUMP("%s", d3dsio_to_string(insn->opcode)); 396 if (insn->flags) { 397 switch (insn->opcode) { 398 case D3DSIO_TEX: 399 DUMP(insn->flags == NINED3DSI_TEXLD_PROJECT ? "p" : "b"); 400 break; 401 default: 402 DUMP("_%x", insn->flags); 403 break; 404 } 405 } 406 if (insn->coissue) 407 DUMP("_co"); 408 DUMP(" "); 409 410 for (i = 0; i < insn->ndst && i < ARRAY_SIZE(insn->dst); ++i) { 411 sm1_dump_dst_param(&insn->dst[i]); 412 DUMP(" "); 413 } 414 415 for (i = 0; i < insn->nsrc && i < ARRAY_SIZE(insn->src); ++i) { 416 sm1_dump_src_param(&insn->src[i]); 417 DUMP(" "); 418 } 419 if (insn->opcode == D3DSIO_DEF || 420 insn->opcode == D3DSIO_DEFI || 421 insn->opcode == D3DSIO_DEFB) 422 sm1_dump_immediate(&insn->src[0]); 423 424 DUMP("\n"); 425} 426 427struct sm1_local_const 428{ 429 INT idx; 430 struct ureg_src reg; 431 float f[4]; /* for indirect addressing of float constants */ 432}; 433 434struct shader_translator 435{ 436 const DWORD *byte_code; 437 const DWORD *parse; 438 const DWORD *parse_next; 439 440 struct ureg_program *ureg; 441 442 /* shader version */ 443 struct { 444 BYTE major; 445 BYTE minor; 446 } version; 447 unsigned processor; /* PIPE_SHADER_VERTEX/FRAMGENT */ 448 unsigned num_constf_allowed; 449 unsigned num_consti_allowed; 450 unsigned num_constb_allowed; 451 452 boolean native_integers; 453 boolean inline_subroutines; 454 boolean want_texcoord; 455 boolean shift_wpos; 456 boolean wpos_is_sysval; 457 boolean face_is_sysval_integer; 458 boolean mul_zero_wins; 459 unsigned texcoord_sn; 460 461 struct sm1_instruction insn; /* current instruction */ 462 463 struct { 464 struct ureg_dst *r; 465 struct ureg_dst oPos; 466 struct ureg_dst oPos_out; /* the real output when doing streamout */ 467 struct ureg_dst oFog; 468 struct ureg_dst oPts; 469 struct ureg_dst oCol[4]; 470 struct ureg_dst o[PIPE_MAX_SHADER_OUTPUTS]; 471 struct ureg_dst oDepth; 472 struct ureg_src v[PIPE_MAX_SHADER_INPUTS]; 473 struct ureg_src v_consecutive; /* copy in temp array of ps inputs for rel addressing */ 474 struct ureg_src vPos; 475 struct ureg_src vFace; 476 struct ureg_src s; 477 struct ureg_dst p; 478 struct ureg_dst address; 479 struct ureg_dst a0; 480 struct ureg_dst predicate; 481 struct ureg_dst predicate_tmp; 482 struct ureg_dst predicate_dst; 483 struct ureg_dst tS[8]; /* texture stage registers */ 484 struct ureg_dst tdst; /* scratch dst if we need extra modifiers */ 485 struct ureg_dst t[8]; /* scratch TEMPs */ 486 struct ureg_src vC[2]; /* PS color in */ 487 struct ureg_src vT[8]; /* PS texcoord in */ 488 struct ureg_dst rL[NINE_MAX_LOOP_DEPTH]; /* loop ctr */ 489 } regs; 490 unsigned num_temp; /* ARRAY_SIZE(regs.r) */ 491 unsigned num_scratch; 492 unsigned loop_depth; 493 unsigned loop_depth_max; 494 unsigned cond_depth; 495 unsigned loop_labels[NINE_MAX_LOOP_DEPTH]; 496 unsigned cond_labels[NINE_MAX_COND_DEPTH]; 497 boolean loop_or_rep[NINE_MAX_LOOP_DEPTH]; /* true: loop, false: rep */ 498 boolean predicated_activated; 499 500 unsigned *inst_labels; /* LABEL op */ 501 unsigned num_inst_labels; 502 503 unsigned sampler_targets[NINE_MAX_SAMPLERS]; /* TGSI_TEXTURE_x */ 504 505 struct sm1_local_const *lconstf; 506 unsigned num_lconstf; 507 struct sm1_local_const *lconsti; 508 unsigned num_lconsti; 509 struct sm1_local_const *lconstb; 510 unsigned num_lconstb; 511 512 boolean slots_used[NINE_MAX_CONST_ALL]; 513 unsigned *slot_map; 514 unsigned num_slots; 515 516 boolean indirect_const_access; 517 boolean failure; 518 519 struct nine_vs_output_info output_info[16]; 520 int num_outputs; 521 522 struct nine_shader_info *info; 523 524 int16_t op_info_map[D3DSIO_BREAKP + 1]; 525}; 526 527#define IS_VS (tx->processor == PIPE_SHADER_VERTEX) 528#define IS_PS (tx->processor == PIPE_SHADER_FRAGMENT) 529 530#define FAILURE_VOID(cond) if ((cond)) {tx->failure=1;return;} 531 532static void 533sm1_read_semantic(struct shader_translator *, struct sm1_semantic *); 534 535static void 536sm1_instruction_check(const struct sm1_instruction *insn) 537{ 538 if (insn->opcode == D3DSIO_CRS) 539 { 540 if (insn->dst[0].mask & NINED3DSP_WRITEMASK_3) 541 { 542 DBG("CRS.mask.w\n"); 543 } 544 } 545} 546 547static void 548nine_record_outputs(struct shader_translator *tx, BYTE Usage, BYTE UsageIndex, 549 int mask, int output_index) 550{ 551 tx->output_info[tx->num_outputs].output_semantic = Usage; 552 tx->output_info[tx->num_outputs].output_semantic_index = UsageIndex; 553 tx->output_info[tx->num_outputs].mask = mask; 554 tx->output_info[tx->num_outputs].output_index = output_index; 555 tx->num_outputs++; 556} 557 558static struct ureg_src nine_float_constant_src(struct shader_translator *tx, int idx) 559{ 560 struct ureg_src src; 561 562 if (tx->slot_map) 563 idx = tx->slot_map[idx]; 564 /* vswp constant handling: we use two buffers 565 * to fit all the float constants. The special handling 566 * doesn't need to be elsewhere, because all the instructions 567 * accessing the constants directly are VS1, and swvp 568 * is VS >= 2 */ 569 if (tx->info->swvp_on && idx >= 4096) { 570 /* TODO: swvp rel is broken if many constants are used */ 571 src = ureg_src_register(TGSI_FILE_CONSTANT, idx - 4096); 572 src = ureg_src_dimension(src, 1); 573 } else { 574 src = ureg_src_register(TGSI_FILE_CONSTANT, idx); 575 src = ureg_src_dimension(src, 0); 576 } 577 578 if (!tx->info->swvp_on) 579 tx->slots_used[idx] = TRUE; 580 if (tx->info->const_float_slots < (idx + 1)) 581 tx->info->const_float_slots = idx + 1; 582 if (tx->num_slots < (idx + 1)) 583 tx->num_slots = idx + 1; 584 585 return src; 586} 587 588static struct ureg_src nine_integer_constant_src(struct shader_translator *tx, int idx) 589{ 590 struct ureg_src src; 591 592 if (tx->info->swvp_on) { 593 src = ureg_src_register(TGSI_FILE_CONSTANT, idx); 594 src = ureg_src_dimension(src, 2); 595 } else { 596 unsigned slot_idx = tx->info->const_i_base + idx; 597 if (tx->slot_map) 598 slot_idx = tx->slot_map[slot_idx]; 599 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx); 600 src = ureg_src_dimension(src, 0); 601 tx->slots_used[slot_idx] = TRUE; 602 tx->info->int_slots_used[idx] = TRUE; 603 if (tx->num_slots < (slot_idx + 1)) 604 tx->num_slots = slot_idx + 1; 605 } 606 607 if (tx->info->const_int_slots < (idx + 1)) 608 tx->info->const_int_slots = idx + 1; 609 610 return src; 611} 612 613static struct ureg_src nine_boolean_constant_src(struct shader_translator *tx, int idx) 614{ 615 struct ureg_src src; 616 617 char r = idx / 4; 618 char s = idx & 3; 619 620 if (tx->info->swvp_on) { 621 src = ureg_src_register(TGSI_FILE_CONSTANT, r); 622 src = ureg_src_dimension(src, 3); 623 } else { 624 unsigned slot_idx = tx->info->const_b_base + r; 625 if (tx->slot_map) 626 slot_idx = tx->slot_map[slot_idx]; 627 src = ureg_src_register(TGSI_FILE_CONSTANT, slot_idx); 628 src = ureg_src_dimension(src, 0); 629 tx->slots_used[slot_idx] = TRUE; 630 tx->info->bool_slots_used[idx] = TRUE; 631 if (tx->num_slots < (slot_idx + 1)) 632 tx->num_slots = slot_idx + 1; 633 } 634 src = ureg_swizzle(src, s, s, s, s); 635 636 if (tx->info->const_bool_slots < (idx + 1)) 637 tx->info->const_bool_slots = idx + 1; 638 639 return src; 640} 641 642static boolean 643tx_lconstf(struct shader_translator *tx, struct ureg_src *src, INT index) 644{ 645 INT i; 646 647 if (index < 0 || index >= tx->num_constf_allowed) { 648 tx->failure = TRUE; 649 return FALSE; 650 } 651 for (i = 0; i < tx->num_lconstf; ++i) { 652 if (tx->lconstf[i].idx == index) { 653 *src = tx->lconstf[i].reg; 654 return TRUE; 655 } 656 } 657 return FALSE; 658} 659static boolean 660tx_lconsti(struct shader_translator *tx, struct ureg_src *src, INT index) 661{ 662 int i; 663 664 if (index < 0 || index >= tx->num_consti_allowed) { 665 tx->failure = TRUE; 666 return FALSE; 667 } 668 for (i = 0; i < tx->num_lconsti; ++i) { 669 if (tx->lconsti[i].idx == index) { 670 *src = tx->lconsti[i].reg; 671 return TRUE; 672 } 673 } 674 return FALSE; 675} 676static boolean 677tx_lconstb(struct shader_translator *tx, struct ureg_src *src, INT index) 678{ 679 int i; 680 681 if (index < 0 || index >= tx->num_constb_allowed) { 682 tx->failure = TRUE; 683 return FALSE; 684 } 685 for (i = 0; i < tx->num_lconstb; ++i) { 686 if (tx->lconstb[i].idx == index) { 687 *src = tx->lconstb[i].reg; 688 return TRUE; 689 } 690 } 691 return FALSE; 692} 693 694static void 695tx_set_lconstf(struct shader_translator *tx, INT index, float f[4]) 696{ 697 unsigned n; 698 699 FAILURE_VOID(index < 0 || index >= tx->num_constf_allowed) 700 701 for (n = 0; n < tx->num_lconstf; ++n) 702 if (tx->lconstf[n].idx == index) 703 break; 704 if (n == tx->num_lconstf) { 705 if ((n % 8) == 0) { 706 tx->lconstf = REALLOC(tx->lconstf, 707 (n + 0) * sizeof(tx->lconstf[0]), 708 (n + 8) * sizeof(tx->lconstf[0])); 709 assert(tx->lconstf); 710 } 711 tx->num_lconstf++; 712 } 713 tx->lconstf[n].idx = index; 714 tx->lconstf[n].reg = ureg_imm4f(tx->ureg, f[0], f[1], f[2], f[3]); 715 716 memcpy(tx->lconstf[n].f, f, sizeof(tx->lconstf[n].f)); 717} 718static void 719tx_set_lconsti(struct shader_translator *tx, INT index, int i[4]) 720{ 721 unsigned n; 722 723 FAILURE_VOID(index < 0 || index >= tx->num_consti_allowed) 724 725 for (n = 0; n < tx->num_lconsti; ++n) 726 if (tx->lconsti[n].idx == index) 727 break; 728 if (n == tx->num_lconsti) { 729 if ((n % 8) == 0) { 730 tx->lconsti = REALLOC(tx->lconsti, 731 (n + 0) * sizeof(tx->lconsti[0]), 732 (n + 8) * sizeof(tx->lconsti[0])); 733 assert(tx->lconsti); 734 } 735 tx->num_lconsti++; 736 } 737 738 tx->lconsti[n].idx = index; 739 tx->lconsti[n].reg = tx->native_integers ? 740 ureg_imm4i(tx->ureg, i[0], i[1], i[2], i[3]) : 741 ureg_imm4f(tx->ureg, i[0], i[1], i[2], i[3]); 742} 743static void 744tx_set_lconstb(struct shader_translator *tx, INT index, BOOL b) 745{ 746 unsigned n; 747 748 FAILURE_VOID(index < 0 || index >= tx->num_constb_allowed) 749 750 for (n = 0; n < tx->num_lconstb; ++n) 751 if (tx->lconstb[n].idx == index) 752 break; 753 if (n == tx->num_lconstb) { 754 if ((n % 8) == 0) { 755 tx->lconstb = REALLOC(tx->lconstb, 756 (n + 0) * sizeof(tx->lconstb[0]), 757 (n + 8) * sizeof(tx->lconstb[0])); 758 assert(tx->lconstb); 759 } 760 tx->num_lconstb++; 761 } 762 763 tx->lconstb[n].idx = index; 764 tx->lconstb[n].reg = tx->native_integers ? 765 ureg_imm1u(tx->ureg, b ? 0xffffffff : 0) : 766 ureg_imm1f(tx->ureg, b ? 1.0f : 0.0f); 767} 768 769static inline struct ureg_dst 770tx_scratch(struct shader_translator *tx) 771{ 772 if (tx->num_scratch >= ARRAY_SIZE(tx->regs.t)) { 773 tx->failure = TRUE; 774 return tx->regs.t[0]; 775 } 776 if (ureg_dst_is_undef(tx->regs.t[tx->num_scratch])) 777 tx->regs.t[tx->num_scratch] = ureg_DECL_local_temporary(tx->ureg); 778 return tx->regs.t[tx->num_scratch++]; 779} 780 781static inline struct ureg_dst 782tx_scratch_scalar(struct shader_translator *tx) 783{ 784 return ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 785} 786 787static inline struct ureg_src 788tx_src_scalar(struct ureg_dst dst) 789{ 790 struct ureg_src src = ureg_src(dst); 791 int c = ffs(dst.WriteMask) - 1; 792 if (dst.WriteMask == (1 << c)) 793 src = ureg_scalar(src, c); 794 return src; 795} 796 797static inline void 798tx_temp_alloc(struct shader_translator *tx, INT idx) 799{ 800 assert(idx >= 0); 801 if (idx >= tx->num_temp) { 802 unsigned k = tx->num_temp; 803 unsigned n = idx + 1; 804 tx->regs.r = REALLOC(tx->regs.r, 805 k * sizeof(tx->regs.r[0]), 806 n * sizeof(tx->regs.r[0])); 807 for (; k < n; ++k) 808 tx->regs.r[k] = ureg_dst_undef(); 809 tx->num_temp = n; 810 } 811 if (ureg_dst_is_undef(tx->regs.r[idx])) 812 tx->regs.r[idx] = ureg_DECL_temporary(tx->ureg); 813} 814 815static inline void 816tx_addr_alloc(struct shader_translator *tx, INT idx) 817{ 818 assert(idx == 0); 819 if (ureg_dst_is_undef(tx->regs.address)) 820 tx->regs.address = ureg_DECL_address(tx->ureg); 821 if (ureg_dst_is_undef(tx->regs.a0)) 822 tx->regs.a0 = ureg_DECL_temporary(tx->ureg); 823} 824 825static inline bool 826TEX_if_fetch4(struct shader_translator *tx, struct ureg_dst dst, 827 unsigned target, struct ureg_src src0, 828 struct ureg_src src1, INT idx) 829{ 830 struct ureg_dst tmp; 831 struct ureg_src src_tg4[3] = {src0, ureg_imm1f(tx->ureg, 0.f), src1}; 832 833 if (!(tx->info->fetch4 & (1 << idx))) 834 return false; 835 836 /* TODO: needs more tests, but this feature is not much used at all */ 837 838 tmp = tx_scratch(tx); 839 ureg_tex_insn(tx->ureg, TGSI_OPCODE_TG4, &tmp, 1, target, TGSI_RETURN_TYPE_FLOAT, 840 NULL, 0, src_tg4, 3); 841 ureg_MOV(tx->ureg, dst, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z, X, Y, W))); 842 return true; 843} 844 845/* NOTE: It's not very clear on which ps1.1-ps1.3 instructions 846 * the projection should be applied on the texture. It doesn't 847 * apply on texkill. 848 * The doc is very imprecise here (it says the projection is done 849 * before rasterization, thus in vs, which seems wrong since ps instructions 850 * are affected differently) 851 * For now we only apply to the ps TEX instruction and TEXBEM. 852 * Perhaps some other instructions would need it */ 853static inline void 854apply_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, 855 struct ureg_src src, INT idx) 856{ 857 struct ureg_dst tmp; 858 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); 859 860 /* no projection */ 861 if (dim == 1) { 862 ureg_MOV(tx->ureg, dst, src); 863 } else { 864 tmp = tx_scratch_scalar(tx); 865 ureg_RCP(tx->ureg, tmp, ureg_scalar(src, dim-1)); 866 ureg_MUL(tx->ureg, dst, tx_src_scalar(tmp), src); 867 } 868} 869 870static inline void 871TEX_with_ps1x_projection(struct shader_translator *tx, struct ureg_dst dst, 872 unsigned target, struct ureg_src src0, 873 struct ureg_src src1, INT idx) 874{ 875 unsigned dim = 1 + ((tx->info->projected >> (2 * idx)) & 3); 876 struct ureg_dst tmp; 877 boolean shadow = !!(tx->info->sampler_mask_shadow & (1 << idx)); 878 879 /* dim == 1: no projection 880 * Looks like must be disabled when it makes no 881 * sense according the texture dimensions 882 */ 883 if (dim == 1 || (dim <= target && !shadow)) { 884 ureg_TEX(tx->ureg, dst, target, src0, src1); 885 } else if (dim == 4) { 886 ureg_TXP(tx->ureg, dst, target, src0, src1); 887 } else { 888 tmp = tx_scratch(tx); 889 apply_ps1x_projection(tx, tmp, src0, idx); 890 ureg_TEX(tx->ureg, dst, target, ureg_src(tmp), src1); 891 } 892} 893 894static inline void 895tx_texcoord_alloc(struct shader_translator *tx, INT idx) 896{ 897 assert(IS_PS); 898 assert(idx >= 0 && idx < ARRAY_SIZE(tx->regs.vT)); 899 if (ureg_src_is_undef(tx->regs.vT[idx])) 900 tx->regs.vT[idx] = ureg_DECL_fs_input(tx->ureg, tx->texcoord_sn, idx, 901 TGSI_INTERPOLATE_PERSPECTIVE); 902} 903 904static inline unsigned * 905tx_bgnloop(struct shader_translator *tx) 906{ 907 tx->loop_depth++; 908 if (tx->loop_depth_max < tx->loop_depth) 909 tx->loop_depth_max = tx->loop_depth; 910 assert(tx->loop_depth < NINE_MAX_LOOP_DEPTH); 911 return &tx->loop_labels[tx->loop_depth - 1]; 912} 913 914static inline unsigned * 915tx_endloop(struct shader_translator *tx) 916{ 917 assert(tx->loop_depth); 918 tx->loop_depth--; 919 ureg_fixup_label(tx->ureg, tx->loop_labels[tx->loop_depth], 920 ureg_get_instruction_number(tx->ureg)); 921 return &tx->loop_labels[tx->loop_depth]; 922} 923 924static struct ureg_dst 925tx_get_loopctr(struct shader_translator *tx, boolean loop_or_rep) 926{ 927 const unsigned l = tx->loop_depth - 1; 928 929 if (!tx->loop_depth) 930 { 931 DBG("loop counter requested outside of loop\n"); 932 return ureg_dst_undef(); 933 } 934 935 if (ureg_dst_is_undef(tx->regs.rL[l])) { 936 /* loop or rep ctr creation */ 937 tx->regs.rL[l] = ureg_DECL_local_temporary(tx->ureg); 938 tx->loop_or_rep[l] = loop_or_rep; 939 } 940 /* loop - rep - endloop - endrep not allowed */ 941 assert(tx->loop_or_rep[l] == loop_or_rep); 942 943 return tx->regs.rL[l]; 944} 945 946static struct ureg_src 947tx_get_loopal(struct shader_translator *tx) 948{ 949 int loop_level = tx->loop_depth - 1; 950 951 while (loop_level >= 0) { 952 /* handle loop - rep - endrep - endloop case */ 953 if (tx->loop_or_rep[loop_level]) 954 /* the value is in the loop counter y component (nine implementation) */ 955 return ureg_scalar(ureg_src(tx->regs.rL[loop_level]), TGSI_SWIZZLE_Y); 956 loop_level--; 957 } 958 959 DBG("aL counter requested outside of loop\n"); 960 return ureg_src_undef(); 961} 962 963static inline unsigned * 964tx_cond(struct shader_translator *tx) 965{ 966 assert(tx->cond_depth <= NINE_MAX_COND_DEPTH); 967 tx->cond_depth++; 968 return &tx->cond_labels[tx->cond_depth - 1]; 969} 970 971static inline unsigned * 972tx_elsecond(struct shader_translator *tx) 973{ 974 assert(tx->cond_depth); 975 return &tx->cond_labels[tx->cond_depth - 1]; 976} 977 978static inline void 979tx_endcond(struct shader_translator *tx) 980{ 981 assert(tx->cond_depth); 982 tx->cond_depth--; 983 ureg_fixup_label(tx->ureg, tx->cond_labels[tx->cond_depth], 984 ureg_get_instruction_number(tx->ureg)); 985} 986 987static inline struct ureg_dst 988nine_ureg_dst_register(unsigned file, int index) 989{ 990 return ureg_dst(ureg_src_register(file, index)); 991} 992 993static inline struct ureg_src 994nine_get_position_input(struct shader_translator *tx) 995{ 996 struct ureg_program *ureg = tx->ureg; 997 998 if (tx->wpos_is_sysval) 999 return ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); 1000 else 1001 return ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 1002 0, TGSI_INTERPOLATE_LINEAR); 1003} 1004 1005static struct ureg_src 1006tx_src_param(struct shader_translator *tx, const struct sm1_src_param *param) 1007{ 1008 struct ureg_program *ureg = tx->ureg; 1009 struct ureg_src src; 1010 struct ureg_dst tmp; 1011 1012 assert(!param->rel || (IS_VS && param->file == D3DSPR_CONST) || 1013 (param->file == D3DSPR_INPUT && tx->version.major == 3)); 1014 1015 switch (param->file) 1016 { 1017 case D3DSPR_TEMP: 1018 tx_temp_alloc(tx, param->idx); 1019 src = ureg_src(tx->regs.r[param->idx]); 1020 break; 1021 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */ 1022 case D3DSPR_ADDR: 1023 if (IS_VS) { 1024 assert(param->idx == 0); 1025 /* the address register (vs only) must be 1026 * assigned before use */ 1027 assert(!ureg_dst_is_undef(tx->regs.a0)); 1028 /* Round to lowest for vs1.1 (contrary to the doc), else 1029 * round to nearest */ 1030 if (tx->version.major < 2 && tx->version.minor < 2) 1031 ureg_ARL(ureg, tx->regs.address, ureg_src(tx->regs.a0)); 1032 else 1033 ureg_ARR(ureg, tx->regs.address, ureg_src(tx->regs.a0)); 1034 src = ureg_src(tx->regs.address); 1035 } else { 1036 if (tx->version.major < 2 && tx->version.minor < 4) { 1037 /* no subroutines, so should be defined */ 1038 src = ureg_src(tx->regs.tS[param->idx]); 1039 } else { 1040 tx_texcoord_alloc(tx, param->idx); 1041 src = tx->regs.vT[param->idx]; 1042 } 1043 } 1044 break; 1045 case D3DSPR_INPUT: 1046 if (IS_VS) { 1047 src = ureg_src_register(TGSI_FILE_INPUT, param->idx); 1048 } else { 1049 if (tx->version.major < 3) { 1050 src = ureg_DECL_fs_input_centroid( 1051 ureg, TGSI_SEMANTIC_COLOR, param->idx, 1052 TGSI_INTERPOLATE_COLOR, 1053 tx->info->force_color_in_centroid ? 1054 TGSI_INTERPOLATE_LOC_CENTROID : 0, 1055 0, 1); 1056 } else { 1057 if(param->rel) { 1058 /* Copy all inputs (non consecutive) 1059 * to temp array (consecutive). 1060 * This is not good for performance. 1061 * A better way would be to have inputs 1062 * consecutive (would need implement alternative 1063 * way to match vs outputs and ps inputs). 1064 * However even with the better way, the temp array 1065 * copy would need to be used if some inputs 1066 * are not GENERIC or if they have different 1067 * interpolation flag. */ 1068 if (ureg_src_is_undef(tx->regs.v_consecutive)) { 1069 int i; 1070 tx->regs.v_consecutive = ureg_src(ureg_DECL_array_temporary(ureg, 10, 0)); 1071 for (i = 0; i < 10; i++) { 1072 if (!ureg_src_is_undef(tx->regs.v[i])) 1073 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), tx->regs.v[i]); 1074 else 1075 ureg_MOV(ureg, ureg_dst_array_offset(ureg_dst(tx->regs.v_consecutive), i), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 1076 } 1077 } 1078 src = ureg_src_array_offset(tx->regs.v_consecutive, param->idx); 1079 } else { 1080 assert(param->idx < ARRAY_SIZE(tx->regs.v)); 1081 src = tx->regs.v[param->idx]; 1082 } 1083 } 1084 } 1085 if (param->rel) 1086 src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 1087 break; 1088 case D3DSPR_PREDICATE: 1089 if (ureg_dst_is_undef(tx->regs.predicate)) { 1090 /* Forbidden to use the predicate register before being set */ 1091 tx->failure = TRUE; 1092 tx->regs.predicate = ureg_DECL_temporary(tx->ureg); 1093 } 1094 src = ureg_src(tx->regs.predicate); 1095 break; 1096 case D3DSPR_SAMPLER: 1097 assert(param->mod == NINED3DSPSM_NONE); 1098 /* assert(param->swizzle == NINED3DSP_NOSWIZZLE); Passed by wine tests */ 1099 src = ureg_DECL_sampler(ureg, param->idx); 1100 break; 1101 case D3DSPR_CONST: 1102 if (param->rel || !tx_lconstf(tx, &src, param->idx)) { 1103 src = nine_float_constant_src(tx, param->idx); 1104 if (param->rel) { 1105 tx->indirect_const_access = TRUE; 1106 src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 1107 } 1108 } 1109 if (!IS_VS && tx->version.major < 2) { 1110 /* ps 1.X clamps constants */ 1111 tmp = tx_scratch(tx); 1112 ureg_MIN(ureg, tmp, src, ureg_imm1f(ureg, 1.0f)); 1113 ureg_MAX(ureg, tmp, ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); 1114 src = ureg_src(tmp); 1115 } 1116 break; 1117 case D3DSPR_CONST2: 1118 case D3DSPR_CONST3: 1119 case D3DSPR_CONST4: 1120 DBG("CONST2/3/4 should have been collapsed into D3DSPR_CONST !\n"); 1121 assert(!"CONST2/3/4"); 1122 src = ureg_imm1f(ureg, 0.0f); 1123 break; 1124 case D3DSPR_CONSTINT: 1125 /* relative adressing only possible for float constants in vs */ 1126 if (!tx_lconsti(tx, &src, param->idx)) 1127 src = nine_integer_constant_src(tx, param->idx); 1128 break; 1129 case D3DSPR_CONSTBOOL: 1130 if (!tx_lconstb(tx, &src, param->idx)) 1131 src = nine_boolean_constant_src(tx, param->idx); 1132 break; 1133 case D3DSPR_LOOP: 1134 if (ureg_dst_is_undef(tx->regs.address)) 1135 tx->regs.address = ureg_DECL_address(ureg); 1136 if (!tx->native_integers) 1137 ureg_ARR(ureg, tx->regs.address, tx_get_loopal(tx)); 1138 else 1139 ureg_UARL(ureg, tx->regs.address, tx_get_loopal(tx)); 1140 src = ureg_src(tx->regs.address); 1141 break; 1142 case D3DSPR_MISCTYPE: 1143 switch (param->idx) { 1144 case D3DSMO_POSITION: 1145 if (ureg_src_is_undef(tx->regs.vPos)) 1146 tx->regs.vPos = nine_get_position_input(tx); 1147 if (tx->shift_wpos) { 1148 /* TODO: do this only once */ 1149 struct ureg_dst wpos = tx_scratch(tx); 1150 ureg_ADD(ureg, wpos, tx->regs.vPos, 1151 ureg_imm4f(ureg, -0.5f, -0.5f, 0.0f, 0.0f)); 1152 src = ureg_src(wpos); 1153 } else { 1154 src = tx->regs.vPos; 1155 } 1156 break; 1157 case D3DSMO_FACE: 1158 if (ureg_src_is_undef(tx->regs.vFace)) { 1159 if (tx->face_is_sysval_integer) { 1160 tmp = ureg_DECL_temporary(ureg); 1161 tx->regs.vFace = 1162 ureg_DECL_system_value(ureg, TGSI_SEMANTIC_FACE, 0); 1163 1164 /* convert bool to float */ 1165 ureg_UCMP(ureg, tmp, ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X), 1166 ureg_imm1f(ureg, 1), ureg_imm1f(ureg, -1)); 1167 tx->regs.vFace = ureg_src(tmp); 1168 } else { 1169 tx->regs.vFace = ureg_DECL_fs_input(ureg, 1170 TGSI_SEMANTIC_FACE, 0, 1171 TGSI_INTERPOLATE_CONSTANT); 1172 } 1173 tx->regs.vFace = ureg_scalar(tx->regs.vFace, TGSI_SWIZZLE_X); 1174 } 1175 src = tx->regs.vFace; 1176 break; 1177 default: 1178 assert(!"invalid src D3DSMO"); 1179 break; 1180 } 1181 break; 1182 case D3DSPR_TEMPFLOAT16: 1183 break; 1184 default: 1185 assert(!"invalid src D3DSPR"); 1186 } 1187 1188 switch (param->mod) { 1189 case NINED3DSPSM_DW: 1190 tmp = tx_scratch(tx); 1191 /* NOTE: app is not allowed to read w with this modifier */ 1192 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_3), ureg_scalar(src, TGSI_SWIZZLE_W)); 1193 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(W,W,W,W))); 1194 src = ureg_src(tmp); 1195 break; 1196 case NINED3DSPSM_DZ: 1197 tmp = tx_scratch(tx); 1198 /* NOTE: app is not allowed to read z with this modifier */ 1199 ureg_RCP(ureg, ureg_writemask(tmp, NINED3DSP_WRITEMASK_2), ureg_scalar(src, TGSI_SWIZZLE_Z)); 1200 ureg_MUL(ureg, tmp, src, ureg_swizzle(ureg_src(tmp), NINE_SWIZZLE4(Z,Z,Z,Z))); 1201 src = ureg_src(tmp); 1202 break; 1203 default: 1204 break; 1205 } 1206 1207 if (param->swizzle != NINED3DSP_NOSWIZZLE && param->file != D3DSPR_SAMPLER) 1208 src = ureg_swizzle(src, 1209 (param->swizzle >> 0) & 0x3, 1210 (param->swizzle >> 2) & 0x3, 1211 (param->swizzle >> 4) & 0x3, 1212 (param->swizzle >> 6) & 0x3); 1213 1214 switch (param->mod) { 1215 case NINED3DSPSM_ABS: 1216 src = ureg_abs(src); 1217 break; 1218 case NINED3DSPSM_ABSNEG: 1219 src = ureg_negate(ureg_abs(src)); 1220 break; 1221 case NINED3DSPSM_NEG: 1222 src = ureg_negate(src); 1223 break; 1224 case NINED3DSPSM_BIAS: 1225 tmp = tx_scratch(tx); 1226 ureg_ADD(ureg, tmp, src, ureg_imm1f(ureg, -0.5f)); 1227 src = ureg_src(tmp); 1228 break; 1229 case NINED3DSPSM_BIASNEG: 1230 tmp = tx_scratch(tx); 1231 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 0.5f), ureg_negate(src)); 1232 src = ureg_src(tmp); 1233 break; 1234 case NINED3DSPSM_NOT: 1235 if (tx->native_integers && param->file == D3DSPR_CONSTBOOL) { 1236 tmp = tx_scratch(tx); 1237 ureg_NOT(ureg, tmp, src); 1238 src = ureg_src(tmp); 1239 break; 1240 } else { /* predicate */ 1241 tmp = tx_scratch(tx); 1242 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src)); 1243 src = ureg_src(tmp); 1244 } 1245 FALLTHROUGH; 1246 case NINED3DSPSM_COMP: 1247 tmp = tx_scratch(tx); 1248 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(src)); 1249 src = ureg_src(tmp); 1250 break; 1251 case NINED3DSPSM_DZ: 1252 case NINED3DSPSM_DW: 1253 /* Already handled*/ 1254 break; 1255 case NINED3DSPSM_SIGN: 1256 tmp = tx_scratch(tx); 1257 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); 1258 src = ureg_src(tmp); 1259 break; 1260 case NINED3DSPSM_SIGNNEG: 1261 tmp = tx_scratch(tx); 1262 ureg_MAD(ureg, tmp, src, ureg_imm1f(ureg, -2.0f), ureg_imm1f(ureg, 1.0f)); 1263 src = ureg_src(tmp); 1264 break; 1265 case NINED3DSPSM_X2: 1266 tmp = tx_scratch(tx); 1267 ureg_ADD(ureg, tmp, src, src); 1268 src = ureg_src(tmp); 1269 break; 1270 case NINED3DSPSM_X2NEG: 1271 tmp = tx_scratch(tx); 1272 ureg_ADD(ureg, tmp, src, src); 1273 src = ureg_negate(ureg_src(tmp)); 1274 break; 1275 default: 1276 assert(param->mod == NINED3DSPSM_NONE); 1277 break; 1278 } 1279 1280 return src; 1281} 1282 1283static struct ureg_dst 1284_tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) 1285{ 1286 struct ureg_dst dst; 1287 1288 switch (param->file) 1289 { 1290 case D3DSPR_TEMP: 1291 assert(!param->rel); 1292 tx_temp_alloc(tx, param->idx); 1293 dst = tx->regs.r[param->idx]; 1294 break; 1295 /* case D3DSPR_TEXTURE: == D3DSPR_ADDR */ 1296 case D3DSPR_ADDR: 1297 assert(!param->rel); 1298 if (tx->version.major < 2 && !IS_VS) { 1299 if (ureg_dst_is_undef(tx->regs.tS[param->idx])) 1300 tx->regs.tS[param->idx] = ureg_DECL_temporary(tx->ureg); 1301 dst = tx->regs.tS[param->idx]; 1302 } else 1303 if (!IS_VS && tx->insn.opcode == D3DSIO_TEXKILL) { /* maybe others, too */ 1304 tx_texcoord_alloc(tx, param->idx); 1305 dst = ureg_dst(tx->regs.vT[param->idx]); 1306 } else { 1307 tx_addr_alloc(tx, param->idx); 1308 dst = tx->regs.a0; 1309 } 1310 break; 1311 case D3DSPR_RASTOUT: 1312 assert(!param->rel); 1313 switch (param->idx) { 1314 case 0: 1315 if (ureg_dst_is_undef(tx->regs.oPos)) 1316 tx->regs.oPos = 1317 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0); 1318 dst = tx->regs.oPos; 1319 break; 1320 case 1: 1321 if (ureg_dst_is_undef(tx->regs.oFog)) 1322 tx->regs.oFog = 1323 ureg_saturate(ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16)); 1324 dst = tx->regs.oFog; 1325 break; 1326 case 2: 1327 if (ureg_dst_is_undef(tx->regs.oPts)) 1328 tx->regs.oPts = ureg_DECL_temporary(tx->ureg); 1329 dst = tx->regs.oPts; 1330 break; 1331 default: 1332 assert(0); 1333 break; 1334 } 1335 break; 1336 /* case D3DSPR_TEXCRDOUT: == D3DSPR_OUTPUT */ 1337 case D3DSPR_OUTPUT: 1338 if (tx->version.major < 3) { 1339 assert(!param->rel); 1340 dst = ureg_DECL_output(tx->ureg, tx->texcoord_sn, param->idx); 1341 } else { 1342 assert(!param->rel); /* TODO */ 1343 assert(param->idx < ARRAY_SIZE(tx->regs.o)); 1344 dst = tx->regs.o[param->idx]; 1345 } 1346 break; 1347 case D3DSPR_ATTROUT: /* VS */ 1348 case D3DSPR_COLOROUT: /* PS */ 1349 assert(param->idx >= 0 && param->idx < 4); 1350 assert(!param->rel); 1351 tx->info->rt_mask |= 1 << param->idx; 1352 if (ureg_dst_is_undef(tx->regs.oCol[param->idx])) { 1353 /* ps < 3: oCol[0] will have fog blending afterward */ 1354 if (!IS_VS && tx->version.major < 3 && param->idx == 0) { 1355 tx->regs.oCol[0] = ureg_DECL_temporary(tx->ureg); 1356 } else { 1357 tx->regs.oCol[param->idx] = 1358 ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_COLOR, param->idx); 1359 } 1360 } 1361 dst = tx->regs.oCol[param->idx]; 1362 if (IS_VS && tx->version.major < 3) 1363 dst = ureg_saturate(dst); 1364 break; 1365 case D3DSPR_DEPTHOUT: 1366 assert(!param->rel); 1367 if (ureg_dst_is_undef(tx->regs.oDepth)) 1368 tx->regs.oDepth = 1369 ureg_DECL_output_masked(tx->ureg, TGSI_SEMANTIC_POSITION, 0, 1370 TGSI_WRITEMASK_Z, 0, 1); 1371 dst = tx->regs.oDepth; /* XXX: must write .z component */ 1372 break; 1373 case D3DSPR_PREDICATE: 1374 if (ureg_dst_is_undef(tx->regs.predicate)) 1375 tx->regs.predicate = ureg_DECL_temporary(tx->ureg); 1376 dst = tx->regs.predicate; 1377 break; 1378 case D3DSPR_TEMPFLOAT16: 1379 DBG("unhandled D3DSPR: %u\n", param->file); 1380 break; 1381 default: 1382 assert(!"invalid dst D3DSPR"); 1383 break; 1384 } 1385 if (param->rel) 1386 dst = ureg_dst_indirect(dst, tx_src_param(tx, param->rel)); 1387 1388 if (param->mask != NINED3DSP_WRITEMASK_ALL) 1389 dst = ureg_writemask(dst, param->mask); 1390 if (param->mod & NINED3DSPDM_SATURATE) 1391 dst = ureg_saturate(dst); 1392 1393 if (tx->predicated_activated) { 1394 tx->regs.predicate_dst = dst; 1395 dst = tx->regs.predicate_tmp; 1396 } 1397 1398 return dst; 1399} 1400 1401static struct ureg_dst 1402tx_dst_param(struct shader_translator *tx, const struct sm1_dst_param *param) 1403{ 1404 if (param->shift) { 1405 tx->regs.tdst = ureg_writemask(tx_scratch(tx), param->mask); 1406 return tx->regs.tdst; 1407 } 1408 return _tx_dst_param(tx, param); 1409} 1410 1411static void 1412tx_apply_dst0_modifiers(struct shader_translator *tx) 1413{ 1414 struct ureg_dst rdst; 1415 float f; 1416 1417 if (!tx->insn.ndst || !tx->insn.dst[0].shift || tx->insn.opcode == D3DSIO_TEXKILL) 1418 return; 1419 rdst = _tx_dst_param(tx, &tx->insn.dst[0]); 1420 1421 assert(rdst.File != TGSI_FILE_ADDRESS); /* this probably isn't possible */ 1422 1423 if (tx->insn.dst[0].shift < 0) 1424 f = 1.0f / (1 << -tx->insn.dst[0].shift); 1425 else 1426 f = 1 << tx->insn.dst[0].shift; 1427 1428 ureg_MUL(tx->ureg, rdst, ureg_src(tx->regs.tdst), ureg_imm1f(tx->ureg, f)); 1429} 1430 1431static struct ureg_src 1432tx_dst_param_as_src(struct shader_translator *tx, const struct sm1_dst_param *param) 1433{ 1434 struct ureg_src src; 1435 1436 assert(!param->shift); 1437 assert(!(param->mod & NINED3DSPDM_SATURATE)); 1438 1439 switch (param->file) { 1440 case D3DSPR_INPUT: 1441 if (IS_VS) { 1442 src = ureg_src_register(TGSI_FILE_INPUT, param->idx); 1443 } else { 1444 assert(!param->rel); 1445 assert(param->idx < ARRAY_SIZE(tx->regs.v)); 1446 src = tx->regs.v[param->idx]; 1447 } 1448 break; 1449 default: 1450 src = ureg_src(tx_dst_param(tx, param)); 1451 break; 1452 } 1453 if (param->rel) 1454 src = ureg_src_indirect(src, tx_src_param(tx, param->rel)); 1455 1456 if (!param->mask) 1457 WARN("mask is 0, using identity swizzle\n"); 1458 1459 if (param->mask && param->mask != NINED3DSP_WRITEMASK_ALL) { 1460 char s[4]; 1461 int n; 1462 int c; 1463 for (n = 0, c = 0; c < 4; ++c) 1464 if (param->mask & (1 << c)) 1465 s[n++] = c; 1466 assert(n); 1467 for (c = n; c < 4; ++c) 1468 s[c] = s[n - 1]; 1469 src = ureg_swizzle(src, s[0], s[1], s[2], s[3]); 1470 } 1471 return src; 1472} 1473 1474static HRESULT 1475NineTranslateInstruction_Mkxn(struct shader_translator *tx, const unsigned k, const unsigned n) 1476{ 1477 struct ureg_program *ureg = tx->ureg; 1478 struct ureg_dst dst; 1479 struct ureg_src src[2]; 1480 struct sm1_src_param *src_mat = &tx->insn.src[1]; 1481 unsigned i; 1482 1483 dst = tx_dst_param(tx, &tx->insn.dst[0]); 1484 src[0] = tx_src_param(tx, &tx->insn.src[0]); 1485 1486 for (i = 0; i < n; i++) 1487 { 1488 const unsigned m = (1 << i); 1489 1490 src[1] = tx_src_param(tx, src_mat); 1491 src_mat->idx++; 1492 1493 if (!(dst.WriteMask & m)) 1494 continue; 1495 1496 /* XXX: src == dst case ? */ 1497 1498 switch (k) { 1499 case 3: 1500 ureg_DP3(ureg, ureg_writemask(dst, m), src[0], src[1]); 1501 break; 1502 case 4: 1503 ureg_DP4(ureg, ureg_writemask(dst, m), src[0], src[1]); 1504 break; 1505 default: 1506 DBG("invalid operation: M%ux%u\n", m, n); 1507 break; 1508 } 1509 } 1510 1511 return D3D_OK; 1512} 1513 1514#define VNOTSUPPORTED 0, 0 1515#define V(maj, min) (((maj) << 8) | (min)) 1516 1517static inline const char * 1518d3dsio_to_string( unsigned opcode ) 1519{ 1520 static const char *names[] = { 1521 "NOP", 1522 "MOV", 1523 "ADD", 1524 "SUB", 1525 "MAD", 1526 "MUL", 1527 "RCP", 1528 "RSQ", 1529 "DP3", 1530 "DP4", 1531 "MIN", 1532 "MAX", 1533 "SLT", 1534 "SGE", 1535 "EXP", 1536 "LOG", 1537 "LIT", 1538 "DST", 1539 "LRP", 1540 "FRC", 1541 "M4x4", 1542 "M4x3", 1543 "M3x4", 1544 "M3x3", 1545 "M3x2", 1546 "CALL", 1547 "CALLNZ", 1548 "LOOP", 1549 "RET", 1550 "ENDLOOP", 1551 "LABEL", 1552 "DCL", 1553 "POW", 1554 "CRS", 1555 "SGN", 1556 "ABS", 1557 "NRM", 1558 "SINCOS", 1559 "REP", 1560 "ENDREP", 1561 "IF", 1562 "IFC", 1563 "ELSE", 1564 "ENDIF", 1565 "BREAK", 1566 "BREAKC", 1567 "MOVA", 1568 "DEFB", 1569 "DEFI", 1570 NULL, 1571 NULL, 1572 NULL, 1573 NULL, 1574 NULL, 1575 NULL, 1576 NULL, 1577 NULL, 1578 NULL, 1579 NULL, 1580 NULL, 1581 NULL, 1582 NULL, 1583 NULL, 1584 NULL, 1585 "TEXCOORD", 1586 "TEXKILL", 1587 "TEX", 1588 "TEXBEM", 1589 "TEXBEML", 1590 "TEXREG2AR", 1591 "TEXREG2GB", 1592 "TEXM3x2PAD", 1593 "TEXM3x2TEX", 1594 "TEXM3x3PAD", 1595 "TEXM3x3TEX", 1596 NULL, 1597 "TEXM3x3SPEC", 1598 "TEXM3x3VSPEC", 1599 "EXPP", 1600 "LOGP", 1601 "CND", 1602 "DEF", 1603 "TEXREG2RGB", 1604 "TEXDP3TEX", 1605 "TEXM3x2DEPTH", 1606 "TEXDP3", 1607 "TEXM3x3", 1608 "TEXDEPTH", 1609 "CMP", 1610 "BEM", 1611 "DP2ADD", 1612 "DSX", 1613 "DSY", 1614 "TEXLDD", 1615 "SETP", 1616 "TEXLDL", 1617 "BREAKP" 1618 }; 1619 1620 if (opcode < ARRAY_SIZE(names)) return names[opcode]; 1621 1622 switch (opcode) { 1623 case D3DSIO_PHASE: return "PHASE"; 1624 case D3DSIO_COMMENT: return "COMMENT"; 1625 case D3DSIO_END: return "END"; 1626 default: 1627 return NULL; 1628 } 1629} 1630 1631#define NULL_INSTRUCTION { 0, { 0, 0 }, { 0, 0 }, 0, 0, NULL } 1632#define IS_VALID_INSTRUCTION(inst) ((inst).vert_version.min | \ 1633 (inst).vert_version.max | \ 1634 (inst).frag_version.min | \ 1635 (inst).frag_version.max) 1636 1637#define SPECIAL(name) \ 1638 NineTranslateInstruction_##name 1639 1640#define DECL_SPECIAL(name) \ 1641 static HRESULT \ 1642 NineTranslateInstruction_##name( struct shader_translator *tx ) 1643 1644static HRESULT 1645NineTranslateInstruction_Generic(struct shader_translator *); 1646 1647DECL_SPECIAL(NOP) 1648{ 1649 /* Nothing to do. NOP was used to avoid hangs 1650 * with very old d3d drivers. */ 1651 return D3D_OK; 1652} 1653 1654DECL_SPECIAL(SUB) 1655{ 1656 struct ureg_program *ureg = tx->ureg; 1657 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1658 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 1659 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 1660 1661 ureg_ADD(ureg, dst, src0, ureg_negate(src1)); 1662 return D3D_OK; 1663} 1664 1665DECL_SPECIAL(ABS) 1666{ 1667 struct ureg_program *ureg = tx->ureg; 1668 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1669 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 1670 1671 ureg_MOV(ureg, dst, ureg_abs(src)); 1672 return D3D_OK; 1673} 1674 1675DECL_SPECIAL(XPD) 1676{ 1677 struct ureg_program *ureg = tx->ureg; 1678 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1679 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 1680 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 1681 1682 ureg_MUL(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), 1683 ureg_swizzle(src0, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, 1684 TGSI_SWIZZLE_X, 0), 1685 ureg_swizzle(src1, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 1686 TGSI_SWIZZLE_Y, 0)); 1687 ureg_MAD(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYZ), 1688 ureg_swizzle(src0, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 1689 TGSI_SWIZZLE_Y, 0), 1690 ureg_negate(ureg_swizzle(src1, TGSI_SWIZZLE_Y, 1691 TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)), 1692 ureg_src(dst)); 1693 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), 1694 ureg_imm1f(ureg, 1)); 1695 return D3D_OK; 1696} 1697 1698DECL_SPECIAL(M4x4) 1699{ 1700 return NineTranslateInstruction_Mkxn(tx, 4, 4); 1701} 1702 1703DECL_SPECIAL(M4x3) 1704{ 1705 return NineTranslateInstruction_Mkxn(tx, 4, 3); 1706} 1707 1708DECL_SPECIAL(M3x4) 1709{ 1710 return NineTranslateInstruction_Mkxn(tx, 3, 4); 1711} 1712 1713DECL_SPECIAL(M3x3) 1714{ 1715 return NineTranslateInstruction_Mkxn(tx, 3, 3); 1716} 1717 1718DECL_SPECIAL(M3x2) 1719{ 1720 return NineTranslateInstruction_Mkxn(tx, 3, 2); 1721} 1722 1723DECL_SPECIAL(CMP) 1724{ 1725 ureg_CMP(tx->ureg, tx_dst_param(tx, &tx->insn.dst[0]), 1726 tx_src_param(tx, &tx->insn.src[0]), 1727 tx_src_param(tx, &tx->insn.src[2]), 1728 tx_src_param(tx, &tx->insn.src[1])); 1729 return D3D_OK; 1730} 1731 1732DECL_SPECIAL(CND) 1733{ 1734 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1735 struct ureg_dst cgt; 1736 struct ureg_src cnd; 1737 1738 /* the coissue flag was a tip for compilers to advise to 1739 * execute two operations at the same time, in cases 1740 * the two executions had same dst with different channels. 1741 * It has no effect on current hw. However it seems CND 1742 * is affected. The handling of this very specific case 1743 * handled below mimick wine behaviour */ 1744 if (tx->insn.coissue && tx->version.major == 1 && tx->version.minor < 4 && tx->insn.dst[0].mask != NINED3DSP_WRITEMASK_3) { 1745 ureg_MOV(tx->ureg, 1746 dst, tx_src_param(tx, &tx->insn.src[1])); 1747 return D3D_OK; 1748 } 1749 1750 cnd = tx_src_param(tx, &tx->insn.src[0]); 1751 cgt = tx_scratch(tx); 1752 1753 if (tx->version.major == 1 && tx->version.minor < 4) 1754 cnd = ureg_scalar(cnd, TGSI_SWIZZLE_W); 1755 1756 ureg_SGT(tx->ureg, cgt, cnd, ureg_imm1f(tx->ureg, 0.5f)); 1757 1758 ureg_CMP(tx->ureg, dst, ureg_negate(ureg_src(cgt)), 1759 tx_src_param(tx, &tx->insn.src[1]), 1760 tx_src_param(tx, &tx->insn.src[2])); 1761 return D3D_OK; 1762} 1763 1764DECL_SPECIAL(CALL) 1765{ 1766 assert(tx->insn.src[0].idx < tx->num_inst_labels); 1767 ureg_CAL(tx->ureg, &tx->inst_labels[tx->insn.src[0].idx]); 1768 return D3D_OK; 1769} 1770 1771DECL_SPECIAL(CALLNZ) 1772{ 1773 struct ureg_program *ureg = tx->ureg; 1774 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]); 1775 1776 if (!tx->native_integers) 1777 ureg_IF(ureg, src, tx_cond(tx)); 1778 else 1779 ureg_UIF(ureg, src, tx_cond(tx)); 1780 ureg_CAL(ureg, &tx->inst_labels[tx->insn.src[0].idx]); 1781 tx_endcond(tx); 1782 ureg_ENDIF(ureg); 1783 return D3D_OK; 1784} 1785 1786DECL_SPECIAL(LOOP) 1787{ 1788 struct ureg_program *ureg = tx->ureg; 1789 unsigned *label; 1790 struct ureg_src src = tx_src_param(tx, &tx->insn.src[1]); 1791 struct ureg_dst ctr; 1792 struct ureg_dst tmp; 1793 struct ureg_src ctrx; 1794 1795 label = tx_bgnloop(tx); 1796 ctr = tx_get_loopctr(tx, TRUE); 1797 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); 1798 1799 /* src: num_iterations - start_value of al - step for al - 0 */ 1800 ureg_MOV(ureg, ctr, src); 1801 ureg_BGNLOOP(tx->ureg, label); 1802 tmp = tx_scratch_scalar(tx); 1803 /* Initially ctr.x contains the number of iterations. 1804 * ctr.y will contain the updated value of al. 1805 * We decrease ctr.x at the end of every iteration, 1806 * and stop when it reaches 0. */ 1807 1808 if (!tx->native_integers) { 1809 /* case src and ctr contain floats */ 1810 /* to avoid precision issue, we stop when ctr <= 0.5 */ 1811 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); 1812 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1813 } else { 1814 /* case src and ctr contain integers */ 1815 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); 1816 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1817 } 1818 ureg_BRK(ureg); 1819 tx_endcond(tx); 1820 ureg_ENDIF(ureg); 1821 return D3D_OK; 1822} 1823 1824DECL_SPECIAL(RET) 1825{ 1826 /* RET as a last instruction could be safely ignored. 1827 * Remove it to prevent crashes/warnings in case underlying 1828 * driver doesn't implement arbitrary returns. 1829 */ 1830 if (*(tx->parse_next) != NINED3DSP_END) { 1831 ureg_RET(tx->ureg); 1832 } 1833 return D3D_OK; 1834} 1835 1836DECL_SPECIAL(ENDLOOP) 1837{ 1838 struct ureg_program *ureg = tx->ureg; 1839 struct ureg_dst ctr = tx_get_loopctr(tx, TRUE); 1840 struct ureg_dst dst_ctrx, dst_al; 1841 struct ureg_src src_ctr, al_counter; 1842 1843 dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); 1844 dst_al = ureg_writemask(ctr, NINED3DSP_WRITEMASK_1); 1845 src_ctr = ureg_src(ctr); 1846 al_counter = ureg_scalar(src_ctr, TGSI_SWIZZLE_Z); 1847 1848 /* ctr.x -= 1 1849 * ctr.y (aL) += step */ 1850 if (!tx->native_integers) { 1851 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); 1852 ureg_ADD(ureg, dst_al, src_ctr, al_counter); 1853 } else { 1854 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); 1855 ureg_UADD(ureg, dst_al, src_ctr, al_counter); 1856 } 1857 ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); 1858 return D3D_OK; 1859} 1860 1861DECL_SPECIAL(LABEL) 1862{ 1863 unsigned k = tx->num_inst_labels; 1864 unsigned n = tx->insn.src[0].idx; 1865 assert(n < 2048); 1866 if (n >= k) 1867 tx->inst_labels = REALLOC(tx->inst_labels, 1868 k * sizeof(tx->inst_labels[0]), 1869 n * sizeof(tx->inst_labels[0])); 1870 1871 tx->inst_labels[n] = ureg_get_instruction_number(tx->ureg); 1872 return D3D_OK; 1873} 1874 1875DECL_SPECIAL(SINCOS) 1876{ 1877 struct ureg_program *ureg = tx->ureg; 1878 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 1879 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 1880 struct ureg_dst tmp = tx_scratch_scalar(tx); 1881 1882 assert(!(dst.WriteMask & 0xc)); 1883 1884 /* Copying to a temporary register avoids src/dst aliasing. 1885 * src is supposed to have replicated swizzle. */ 1886 ureg_MOV(ureg, tmp, src); 1887 1888 /* z undefined, w untouched */ 1889 ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), 1890 tx_src_scalar(tmp)); 1891 ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), 1892 tx_src_scalar(tmp)); 1893 return D3D_OK; 1894} 1895 1896DECL_SPECIAL(SGN) 1897{ 1898 ureg_SSG(tx->ureg, 1899 tx_dst_param(tx, &tx->insn.dst[0]), 1900 tx_src_param(tx, &tx->insn.src[0])); 1901 return D3D_OK; 1902} 1903 1904DECL_SPECIAL(REP) 1905{ 1906 struct ureg_program *ureg = tx->ureg; 1907 unsigned *label; 1908 struct ureg_src rep = tx_src_param(tx, &tx->insn.src[0]); 1909 struct ureg_dst ctr; 1910 struct ureg_dst tmp; 1911 struct ureg_src ctrx; 1912 1913 label = tx_bgnloop(tx); 1914 ctr = ureg_writemask(tx_get_loopctr(tx, FALSE), NINED3DSP_WRITEMASK_0); 1915 ctrx = ureg_scalar(ureg_src(ctr), TGSI_SWIZZLE_X); 1916 1917 /* NOTE: rep must be constant, so we don't have to save the count */ 1918 assert(rep.File == TGSI_FILE_CONSTANT || rep.File == TGSI_FILE_IMMEDIATE); 1919 1920 /* rep: num_iterations - 0 - 0 - 0 */ 1921 ureg_MOV(ureg, ctr, rep); 1922 ureg_BGNLOOP(ureg, label); 1923 tmp = tx_scratch_scalar(tx); 1924 /* Initially ctr.x contains the number of iterations. 1925 * We decrease ctr.x at the end of every iteration, 1926 * and stop when it reaches 0. */ 1927 1928 if (!tx->native_integers) { 1929 /* case src and ctr contain floats */ 1930 /* to avoid precision issue, we stop when ctr <= 0.5 */ 1931 ureg_SGE(ureg, tmp, ureg_imm1f(ureg, 0.5f), ctrx); 1932 ureg_IF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1933 } else { 1934 /* case src and ctr contain integers */ 1935 ureg_ISGE(ureg, tmp, ureg_imm1i(ureg, 0), ctrx); 1936 ureg_UIF(ureg, tx_src_scalar(tmp), tx_cond(tx)); 1937 } 1938 ureg_BRK(ureg); 1939 tx_endcond(tx); 1940 ureg_ENDIF(ureg); 1941 1942 return D3D_OK; 1943} 1944 1945DECL_SPECIAL(ENDREP) 1946{ 1947 struct ureg_program *ureg = tx->ureg; 1948 struct ureg_dst ctr = tx_get_loopctr(tx, FALSE); 1949 struct ureg_dst dst_ctrx = ureg_writemask(ctr, NINED3DSP_WRITEMASK_0); 1950 struct ureg_src src_ctr = ureg_src(ctr); 1951 1952 /* ctr.x -= 1 */ 1953 if (!tx->native_integers) 1954 ureg_ADD(ureg, dst_ctrx, src_ctr, ureg_imm1f(ureg, -1.0f)); 1955 else 1956 ureg_UADD(ureg, dst_ctrx, src_ctr, ureg_imm1i(ureg, -1)); 1957 1958 ureg_ENDLOOP(tx->ureg, tx_endloop(tx)); 1959 return D3D_OK; 1960} 1961 1962DECL_SPECIAL(ENDIF) 1963{ 1964 tx_endcond(tx); 1965 ureg_ENDIF(tx->ureg); 1966 return D3D_OK; 1967} 1968 1969DECL_SPECIAL(IF) 1970{ 1971 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 1972 1973 if (tx->native_integers && tx->insn.src[0].file == D3DSPR_CONSTBOOL) 1974 ureg_UIF(tx->ureg, src, tx_cond(tx)); 1975 else 1976 ureg_IF(tx->ureg, src, tx_cond(tx)); 1977 1978 return D3D_OK; 1979} 1980 1981static inline unsigned 1982sm1_insn_flags_to_tgsi_setop(BYTE flags) 1983{ 1984 switch (flags) { 1985 case NINED3DSHADER_REL_OP_GT: return TGSI_OPCODE_SGT; 1986 case NINED3DSHADER_REL_OP_EQ: return TGSI_OPCODE_SEQ; 1987 case NINED3DSHADER_REL_OP_GE: return TGSI_OPCODE_SGE; 1988 case NINED3DSHADER_REL_OP_LT: return TGSI_OPCODE_SLT; 1989 case NINED3DSHADER_REL_OP_NE: return TGSI_OPCODE_SNE; 1990 case NINED3DSHADER_REL_OP_LE: return TGSI_OPCODE_SLE; 1991 default: 1992 assert(!"invalid comparison flags"); 1993 return TGSI_OPCODE_SGT; 1994 } 1995} 1996 1997DECL_SPECIAL(IFC) 1998{ 1999 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 2000 struct ureg_src src[2]; 2001 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 2002 src[0] = tx_src_param(tx, &tx->insn.src[0]); 2003 src[1] = tx_src_param(tx, &tx->insn.src[1]); 2004 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0); 2005 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx)); 2006 return D3D_OK; 2007} 2008 2009DECL_SPECIAL(ELSE) 2010{ 2011 ureg_ELSE(tx->ureg, tx_elsecond(tx)); 2012 return D3D_OK; 2013} 2014 2015DECL_SPECIAL(BREAKC) 2016{ 2017 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 2018 struct ureg_src src[2]; 2019 struct ureg_dst tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_X); 2020 src[0] = tx_src_param(tx, &tx->insn.src[0]); 2021 src[1] = tx_src_param(tx, &tx->insn.src[1]); 2022 ureg_insn(tx->ureg, cmp_op, &tmp, 1, src, 2, 0); 2023 ureg_IF(tx->ureg, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), tx_cond(tx)); 2024 ureg_BRK(tx->ureg); 2025 tx_endcond(tx); 2026 ureg_ENDIF(tx->ureg); 2027 return D3D_OK; 2028} 2029 2030static const char *sm1_declusage_names[] = 2031{ 2032 [D3DDECLUSAGE_POSITION] = "POSITION", 2033 [D3DDECLUSAGE_BLENDWEIGHT] = "BLENDWEIGHT", 2034 [D3DDECLUSAGE_BLENDINDICES] = "BLENDINDICES", 2035 [D3DDECLUSAGE_NORMAL] = "NORMAL", 2036 [D3DDECLUSAGE_PSIZE] = "PSIZE", 2037 [D3DDECLUSAGE_TEXCOORD] = "TEXCOORD", 2038 [D3DDECLUSAGE_TANGENT] = "TANGENT", 2039 [D3DDECLUSAGE_BINORMAL] = "BINORMAL", 2040 [D3DDECLUSAGE_TESSFACTOR] = "TESSFACTOR", 2041 [D3DDECLUSAGE_POSITIONT] = "POSITIONT", 2042 [D3DDECLUSAGE_COLOR] = "COLOR", 2043 [D3DDECLUSAGE_FOG] = "FOG", 2044 [D3DDECLUSAGE_DEPTH] = "DEPTH", 2045 [D3DDECLUSAGE_SAMPLE] = "SAMPLE" 2046}; 2047 2048static inline unsigned 2049sm1_to_nine_declusage(struct sm1_semantic *dcl) 2050{ 2051 return nine_d3d9_to_nine_declusage(dcl->usage, dcl->usage_idx); 2052} 2053 2054static void 2055sm1_declusage_to_tgsi(struct tgsi_declaration_semantic *sem, 2056 boolean tc, 2057 struct sm1_semantic *dcl) 2058{ 2059 BYTE index = dcl->usage_idx; 2060 2061 /* For everything that is not matching to a TGSI_SEMANTIC_****, 2062 * we match to a TGSI_SEMANTIC_GENERIC with index. 2063 * 2064 * The index can be anything UINT16 and usage_idx is BYTE, 2065 * so we can fit everything. It doesn't matter if indices 2066 * are close together or low. 2067 * 2068 * 2069 * POSITION >= 1: 10 * index + 7 2070 * COLOR >= 2: 10 * (index-1) + 8 2071 * FOG: 16 2072 * TEXCOORD[0..15]: index 2073 * BLENDWEIGHT: 10 * index + 19 2074 * BLENDINDICES: 10 * index + 20 2075 * NORMAL: 10 * index + 21 2076 * TANGENT: 10 * index + 22 2077 * BINORMAL: 10 * index + 23 2078 * TESSFACTOR: 10 * index + 24 2079 */ 2080 2081 switch (dcl->usage) { 2082 case D3DDECLUSAGE_POSITION: 2083 case D3DDECLUSAGE_POSITIONT: 2084 case D3DDECLUSAGE_DEPTH: 2085 if (index == 0) { 2086 sem->Name = TGSI_SEMANTIC_POSITION; 2087 sem->Index = 0; 2088 } else { 2089 sem->Name = TGSI_SEMANTIC_GENERIC; 2090 sem->Index = 10 * index + 7; 2091 } 2092 break; 2093 case D3DDECLUSAGE_COLOR: 2094 if (index < 2) { 2095 sem->Name = TGSI_SEMANTIC_COLOR; 2096 sem->Index = index; 2097 } else { 2098 sem->Name = TGSI_SEMANTIC_GENERIC; 2099 sem->Index = 10 * (index-1) + 8; 2100 } 2101 break; 2102 case D3DDECLUSAGE_FOG: 2103 assert(index == 0); 2104 sem->Name = TGSI_SEMANTIC_GENERIC; 2105 sem->Index = 16; 2106 break; 2107 case D3DDECLUSAGE_PSIZE: 2108 assert(index == 0); 2109 sem->Name = TGSI_SEMANTIC_PSIZE; 2110 sem->Index = 0; 2111 break; 2112 case D3DDECLUSAGE_TEXCOORD: 2113 assert(index < 16); 2114 if (index < 8 && tc) 2115 sem->Name = TGSI_SEMANTIC_TEXCOORD; 2116 else 2117 sem->Name = TGSI_SEMANTIC_GENERIC; 2118 sem->Index = index; 2119 break; 2120 case D3DDECLUSAGE_BLENDWEIGHT: 2121 sem->Name = TGSI_SEMANTIC_GENERIC; 2122 sem->Index = 10 * index + 19; 2123 break; 2124 case D3DDECLUSAGE_BLENDINDICES: 2125 sem->Name = TGSI_SEMANTIC_GENERIC; 2126 sem->Index = 10 * index + 20; 2127 break; 2128 case D3DDECLUSAGE_NORMAL: 2129 sem->Name = TGSI_SEMANTIC_GENERIC; 2130 sem->Index = 10 * index + 21; 2131 break; 2132 case D3DDECLUSAGE_TANGENT: 2133 sem->Name = TGSI_SEMANTIC_GENERIC; 2134 sem->Index = 10 * index + 22; 2135 break; 2136 case D3DDECLUSAGE_BINORMAL: 2137 sem->Name = TGSI_SEMANTIC_GENERIC; 2138 sem->Index = 10 * index + 23; 2139 break; 2140 case D3DDECLUSAGE_TESSFACTOR: 2141 sem->Name = TGSI_SEMANTIC_GENERIC; 2142 sem->Index = 10 * index + 24; 2143 break; 2144 case D3DDECLUSAGE_SAMPLE: 2145 sem->Name = TGSI_SEMANTIC_COUNT; 2146 sem->Index = 0; 2147 break; 2148 default: 2149 unreachable("Invalid DECLUSAGE."); 2150 break; 2151 } 2152} 2153 2154#define NINED3DSTT_1D (D3DSTT_1D >> D3DSP_TEXTURETYPE_SHIFT) 2155#define NINED3DSTT_2D (D3DSTT_2D >> D3DSP_TEXTURETYPE_SHIFT) 2156#define NINED3DSTT_VOLUME (D3DSTT_VOLUME >> D3DSP_TEXTURETYPE_SHIFT) 2157#define NINED3DSTT_CUBE (D3DSTT_CUBE >> D3DSP_TEXTURETYPE_SHIFT) 2158static inline unsigned 2159d3dstt_to_tgsi_tex(BYTE sampler_type) 2160{ 2161 switch (sampler_type) { 2162 case NINED3DSTT_1D: return TGSI_TEXTURE_1D; 2163 case NINED3DSTT_2D: return TGSI_TEXTURE_2D; 2164 case NINED3DSTT_VOLUME: return TGSI_TEXTURE_3D; 2165 case NINED3DSTT_CUBE: return TGSI_TEXTURE_CUBE; 2166 default: 2167 assert(0); 2168 return TGSI_TEXTURE_UNKNOWN; 2169 } 2170} 2171static inline unsigned 2172d3dstt_to_tgsi_tex_shadow(BYTE sampler_type) 2173{ 2174 switch (sampler_type) { 2175 case NINED3DSTT_1D: return TGSI_TEXTURE_SHADOW1D; 2176 case NINED3DSTT_2D: return TGSI_TEXTURE_SHADOW2D; 2177 case NINED3DSTT_VOLUME: 2178 case NINED3DSTT_CUBE: 2179 default: 2180 assert(0); 2181 return TGSI_TEXTURE_UNKNOWN; 2182 } 2183} 2184static inline unsigned 2185ps1x_sampler_type(const struct nine_shader_info *info, unsigned stage) 2186{ 2187 boolean shadow = !!(info->sampler_mask_shadow & (1 << stage)); 2188 switch ((info->sampler_ps1xtypes >> (stage * 2)) & 0x3) { 2189 case 1: return shadow ? TGSI_TEXTURE_SHADOW1D : TGSI_TEXTURE_1D; 2190 case 0: return shadow ? TGSI_TEXTURE_SHADOW2D : TGSI_TEXTURE_2D; 2191 case 3: return TGSI_TEXTURE_3D; 2192 default: 2193 return TGSI_TEXTURE_CUBE; 2194 } 2195} 2196 2197static const char * 2198sm1_sampler_type_name(BYTE sampler_type) 2199{ 2200 switch (sampler_type) { 2201 case NINED3DSTT_1D: return "1D"; 2202 case NINED3DSTT_2D: return "2D"; 2203 case NINED3DSTT_VOLUME: return "VOLUME"; 2204 case NINED3DSTT_CUBE: return "CUBE"; 2205 default: 2206 return "(D3DSTT_?)"; 2207 } 2208} 2209 2210static inline unsigned 2211nine_tgsi_to_interp_mode(struct tgsi_declaration_semantic *sem) 2212{ 2213 switch (sem->Name) { 2214 case TGSI_SEMANTIC_POSITION: 2215 case TGSI_SEMANTIC_NORMAL: 2216 return TGSI_INTERPOLATE_LINEAR; 2217 case TGSI_SEMANTIC_BCOLOR: 2218 case TGSI_SEMANTIC_COLOR: 2219 return TGSI_INTERPOLATE_COLOR; 2220 case TGSI_SEMANTIC_FOG: 2221 case TGSI_SEMANTIC_GENERIC: 2222 case TGSI_SEMANTIC_TEXCOORD: 2223 case TGSI_SEMANTIC_CLIPDIST: 2224 case TGSI_SEMANTIC_CLIPVERTEX: 2225 return TGSI_INTERPOLATE_PERSPECTIVE; 2226 case TGSI_SEMANTIC_EDGEFLAG: 2227 case TGSI_SEMANTIC_FACE: 2228 case TGSI_SEMANTIC_INSTANCEID: 2229 case TGSI_SEMANTIC_PCOORD: 2230 case TGSI_SEMANTIC_PRIMID: 2231 case TGSI_SEMANTIC_PSIZE: 2232 case TGSI_SEMANTIC_VERTEXID: 2233 return TGSI_INTERPOLATE_CONSTANT; 2234 default: 2235 assert(0); 2236 return TGSI_INTERPOLATE_CONSTANT; 2237 } 2238} 2239 2240DECL_SPECIAL(DCL) 2241{ 2242 struct ureg_program *ureg = tx->ureg; 2243 boolean is_input; 2244 boolean is_sampler; 2245 struct tgsi_declaration_semantic tgsi; 2246 struct sm1_semantic sem; 2247 sm1_read_semantic(tx, &sem); 2248 2249 is_input = sem.reg.file == D3DSPR_INPUT; 2250 is_sampler = 2251 sem.usage == D3DDECLUSAGE_SAMPLE || sem.reg.file == D3DSPR_SAMPLER; 2252 2253 DUMP("DCL "); 2254 sm1_dump_dst_param(&sem.reg); 2255 if (is_sampler) 2256 DUMP(" %s\n", sm1_sampler_type_name(sem.sampler_type)); 2257 else 2258 if (tx->version.major >= 3) 2259 DUMP(" %s%i\n", sm1_declusage_names[sem.usage], sem.usage_idx); 2260 else 2261 if (sem.usage | sem.usage_idx) 2262 DUMP(" %u[%u]\n", sem.usage, sem.usage_idx); 2263 else 2264 DUMP("\n"); 2265 2266 if (is_sampler) { 2267 const unsigned m = 1 << sem.reg.idx; 2268 ureg_DECL_sampler(ureg, sem.reg.idx); 2269 tx->info->sampler_mask |= m; 2270 tx->sampler_targets[sem.reg.idx] = (tx->info->sampler_mask_shadow & m) ? 2271 d3dstt_to_tgsi_tex_shadow(sem.sampler_type) : 2272 d3dstt_to_tgsi_tex(sem.sampler_type); 2273 return D3D_OK; 2274 } 2275 2276 sm1_declusage_to_tgsi(&tgsi, tx->want_texcoord, &sem); 2277 if (IS_VS) { 2278 if (is_input) { 2279 /* linkage outside of shader with vertex declaration */ 2280 ureg_DECL_vs_input(ureg, sem.reg.idx); 2281 assert(sem.reg.idx < ARRAY_SIZE(tx->info->input_map)); 2282 tx->info->input_map[sem.reg.idx] = sm1_to_nine_declusage(&sem); 2283 tx->info->num_inputs = MAX2(tx->info->num_inputs, sem.reg.idx + 1); 2284 /* NOTE: preserving order in case of indirect access */ 2285 } else 2286 if (tx->version.major >= 3) { 2287 /* SM2 output semantic determined by file */ 2288 assert(sem.reg.mask != 0); 2289 if (sem.usage == D3DDECLUSAGE_POSITIONT) 2290 tx->info->position_t = TRUE; 2291 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.o)); 2292 assert(ureg_dst_is_undef(tx->regs.o[sem.reg.idx]) && "Nine doesn't support yet packing"); 2293 tx->regs.o[sem.reg.idx] = ureg_DECL_output_masked( 2294 ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 0, 1); 2295 nine_record_outputs(tx, sem.usage, sem.usage_idx, sem.reg.mask, sem.reg.idx); 2296 if (tx->info->process_vertices && sem.usage == D3DDECLUSAGE_POSITION && sem.usage_idx == 0) { 2297 tx->regs.oPos_out = tx->regs.o[sem.reg.idx]; 2298 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); 2299 tx->regs.oPos = tx->regs.o[sem.reg.idx]; 2300 } 2301 2302 if (tgsi.Name == TGSI_SEMANTIC_PSIZE) { 2303 tx->regs.o[sem.reg.idx] = ureg_DECL_temporary(ureg); 2304 tx->regs.oPts = tx->regs.o[sem.reg.idx]; 2305 } 2306 } 2307 } else { 2308 if (is_input && tx->version.major >= 3) { 2309 unsigned interp_location = 0; 2310 /* SM3 only, SM2 input semantic determined by file */ 2311 assert(sem.reg.idx < ARRAY_SIZE(tx->regs.v)); 2312 assert(ureg_src_is_undef(tx->regs.v[sem.reg.idx]) && "Nine doesn't support yet packing"); 2313 /* PositionT and tessfactor forbidden */ 2314 if (sem.usage == D3DDECLUSAGE_POSITIONT || sem.usage == D3DDECLUSAGE_TESSFACTOR) 2315 return D3DERR_INVALIDCALL; 2316 2317 if (tgsi.Name == TGSI_SEMANTIC_POSITION) { 2318 /* Position0 is forbidden (likely because vPos already does that) */ 2319 if (sem.usage == D3DDECLUSAGE_POSITION) 2320 return D3DERR_INVALIDCALL; 2321 /* Following code is for depth */ 2322 tx->regs.v[sem.reg.idx] = nine_get_position_input(tx); 2323 return D3D_OK; 2324 } 2325 2326 if (sem.reg.mod & NINED3DSPDM_CENTROID || 2327 (tgsi.Name == TGSI_SEMANTIC_COLOR && tx->info->force_color_in_centroid)) 2328 interp_location = TGSI_INTERPOLATE_LOC_CENTROID; 2329 2330 tx->regs.v[sem.reg.idx] = ureg_DECL_fs_input_centroid( 2331 ureg, tgsi.Name, tgsi.Index, 2332 nine_tgsi_to_interp_mode(&tgsi), 2333 interp_location, 0, 1); 2334 } else 2335 if (!is_input && 0) { /* declare in COLOROUT/DEPTHOUT case */ 2336 /* FragColor or FragDepth */ 2337 assert(sem.reg.mask != 0); 2338 ureg_DECL_output_masked(ureg, tgsi.Name, tgsi.Index, sem.reg.mask, 2339 0, 1); 2340 } 2341 } 2342 return D3D_OK; 2343} 2344 2345DECL_SPECIAL(DEF) 2346{ 2347 tx_set_lconstf(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.f); 2348 return D3D_OK; 2349} 2350 2351DECL_SPECIAL(DEFB) 2352{ 2353 tx_set_lconstb(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.b); 2354 return D3D_OK; 2355} 2356 2357DECL_SPECIAL(DEFI) 2358{ 2359 tx_set_lconsti(tx, tx->insn.dst[0].idx, tx->insn.src[0].imm.i); 2360 return D3D_OK; 2361} 2362 2363DECL_SPECIAL(POW) 2364{ 2365 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2366 struct ureg_src src[2] = { 2367 tx_src_param(tx, &tx->insn.src[0]), 2368 tx_src_param(tx, &tx->insn.src[1]) 2369 }; 2370 ureg_POW(tx->ureg, dst, ureg_abs(src[0]), src[1]); 2371 return D3D_OK; 2372} 2373 2374/* Tests results on Win 10: 2375 * NV (NVIDIA GeForce GT 635M) 2376 * AMD (AMD Radeon HD 7730M) 2377 * INTEL (Intel(R) HD Graphics 4000) 2378 * PS2 and PS3: 2379 * RCP and RSQ can generate inf on NV and AMD. 2380 * RCP and RSQ are clamped on INTEL (+- FLT_MAX), 2381 * NV: log not clamped 2382 * AMD: log(0) is -FLT_MAX (but log(inf) is inf) 2383 * INTEL: log(0) is -FLT_MAX and log(inf) is 127 2384 * All devices have 0*anything = 0 2385 * 2386 * INTEL VS2 and VS3: same behaviour. 2387 * Some differences VS2 and VS3 for constants defined with inf/NaN. 2388 * While PS3, VS3 and PS2 keep NaN and Inf shader constants without change, 2389 * VS2 seems to clamp to zero (may be test failure). 2390 * AMD VS2: unknown, VS3: very likely behaviour of PS3 2391 * NV VS2 and VS3: very likely behaviour of PS3 2392 * For both, Inf in VS becomes NaN is PS 2393 * "Very likely" because the test was less extensive. 2394 * 2395 * Thus all clamping can be removed for shaders 2 and 3, 2396 * as long as 0*anything = 0. 2397 * Else clamps to enforce 0*anything = 0 (anything being then 2398 * neither inf or NaN, the user being unlikely to pass them 2399 * as constant). 2400 * The status for VS1 and PS1 is unknown. 2401 */ 2402 2403DECL_SPECIAL(RCP) 2404{ 2405 struct ureg_program *ureg = tx->ureg; 2406 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2407 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2408 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx); 2409 ureg_RCP(ureg, tmp, src); 2410 if (!tx->mul_zero_wins) { 2411 /* FLT_MAX has issues with Rayman */ 2412 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX/2.f), ureg_src(tmp)); 2413 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX/2.f), ureg_src(tmp)); 2414 } 2415 return D3D_OK; 2416} 2417 2418DECL_SPECIAL(RSQ) 2419{ 2420 struct ureg_program *ureg = tx->ureg; 2421 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2422 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2423 struct ureg_dst tmp = tx->mul_zero_wins ? dst : tx_scratch(tx); 2424 ureg_RSQ(ureg, tmp, ureg_abs(src)); 2425 if (!tx->mul_zero_wins) 2426 ureg_MIN(ureg, dst, ureg_imm1f(ureg, FLT_MAX), ureg_src(tmp)); 2427 return D3D_OK; 2428} 2429 2430DECL_SPECIAL(LOG) 2431{ 2432 struct ureg_program *ureg = tx->ureg; 2433 struct ureg_dst tmp = tx_scratch_scalar(tx); 2434 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2435 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2436 ureg_LG2(ureg, tmp, ureg_abs(src)); 2437 if (tx->mul_zero_wins) { 2438 ureg_MOV(ureg, dst, tx_src_scalar(tmp)); 2439 } else { 2440 ureg_MAX(ureg, dst, ureg_imm1f(ureg, -FLT_MAX), tx_src_scalar(tmp)); 2441 } 2442 return D3D_OK; 2443} 2444 2445DECL_SPECIAL(LIT) 2446{ 2447 struct ureg_program *ureg = tx->ureg; 2448 struct ureg_dst tmp = tx_scratch(tx); 2449 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2450 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2451 ureg_LIT(ureg, tmp, src); 2452 /* d3d9 LIT is the same than gallium LIT. One difference is that d3d9 2453 * states that dst.z is 0 when src.y <= 0. Gallium definition can assign 2454 * it 0^0 if src.w=0, which value is driver dependent. */ 2455 ureg_CMP(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), 2456 ureg_negate(ureg_scalar(src, TGSI_SWIZZLE_Y)), 2457 ureg_src(tmp), ureg_imm1f(ureg, 0.0f)); 2458 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XYW), ureg_src(tmp)); 2459 return D3D_OK; 2460} 2461 2462DECL_SPECIAL(NRM) 2463{ 2464 struct ureg_program *ureg = tx->ureg; 2465 struct ureg_dst tmp = tx_scratch_scalar(tx); 2466 struct ureg_src nrm = tx_src_scalar(tmp); 2467 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2468 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2469 ureg_DP3(ureg, tmp, src, src); 2470 ureg_RSQ(ureg, tmp, nrm); 2471 if (!tx->mul_zero_wins) 2472 ureg_MIN(ureg, tmp, ureg_imm1f(ureg, FLT_MAX), nrm); 2473 ureg_MUL(ureg, dst, src, nrm); 2474 return D3D_OK; 2475} 2476 2477DECL_SPECIAL(DP2ADD) 2478{ 2479 struct ureg_dst tmp = tx_scratch_scalar(tx); 2480 struct ureg_src dp2 = tx_src_scalar(tmp); 2481 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2482 struct ureg_src src[3]; 2483 int i; 2484 for (i = 0; i < 3; ++i) 2485 src[i] = tx_src_param(tx, &tx->insn.src[i]); 2486 assert_replicate_swizzle(&src[2]); 2487 2488 ureg_DP2(tx->ureg, tmp, src[0], src[1]); 2489 ureg_ADD(tx->ureg, dst, src[2], dp2); 2490 2491 return D3D_OK; 2492} 2493 2494DECL_SPECIAL(TEXCOORD) 2495{ 2496 struct ureg_program *ureg = tx->ureg; 2497 const unsigned s = tx->insn.dst[0].idx; 2498 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2499 2500 tx_texcoord_alloc(tx, s); 2501 ureg_MOV(ureg, ureg_writemask(ureg_saturate(dst), TGSI_WRITEMASK_XYZ), tx->regs.vT[s]); 2502 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(tx->ureg, 1.0f)); 2503 2504 return D3D_OK; 2505} 2506 2507DECL_SPECIAL(TEXCOORD_ps14) 2508{ 2509 struct ureg_program *ureg = tx->ureg; 2510 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2511 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2512 2513 assert(tx->insn.src[0].file == D3DSPR_TEXTURE); 2514 2515 ureg_MOV(ureg, dst, src); 2516 2517 return D3D_OK; 2518} 2519 2520DECL_SPECIAL(TEXKILL) 2521{ 2522 struct ureg_src reg; 2523 2524 if (tx->version.major > 1 || tx->version.minor > 3) { 2525 reg = tx_dst_param_as_src(tx, &tx->insn.dst[0]); 2526 } else { 2527 tx_texcoord_alloc(tx, tx->insn.dst[0].idx); 2528 reg = tx->regs.vT[tx->insn.dst[0].idx]; 2529 } 2530 if (tx->version.major < 2) 2531 reg = ureg_swizzle(reg, NINE_SWIZZLE4(X,Y,Z,Z)); 2532 ureg_KILL_IF(tx->ureg, reg); 2533 2534 return D3D_OK; 2535} 2536 2537DECL_SPECIAL(TEXBEM) 2538{ 2539 struct ureg_program *ureg = tx->ureg; 2540 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2541 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2542 struct ureg_dst tmp, tmp2, texcoord; 2543 struct ureg_src sample, m00, m01, m10, m11, c8m, c16m2; 2544 struct ureg_src bumpenvlscale, bumpenvloffset; 2545 const int m = tx->insn.dst[0].idx; 2546 2547 assert(tx->version.major == 1); 2548 2549 sample = ureg_DECL_sampler(ureg, m); 2550 tx->info->sampler_mask |= 1 << m; 2551 2552 tx_texcoord_alloc(tx, m); 2553 2554 tmp = tx_scratch(tx); 2555 tmp2 = tx_scratch(tx); 2556 texcoord = tx_scratch(tx); 2557 /* 2558 * Bump-env-matrix: 2559 * 00 is X 2560 * 01 is Y 2561 * 10 is Z 2562 * 11 is W 2563 */ 2564 c8m = nine_float_constant_src(tx, 8+m); 2565 c16m2 = nine_float_constant_src(tx, 8+8+m/2); 2566 2567 m00 = NINE_APPLY_SWIZZLE(c8m, X); 2568 m01 = NINE_APPLY_SWIZZLE(c8m, Y); 2569 m10 = NINE_APPLY_SWIZZLE(c8m, Z); 2570 m11 = NINE_APPLY_SWIZZLE(c8m, W); 2571 2572 /* These two attributes are packed as X=scale0 Y=offset0 Z=scale1 W=offset1 etc */ 2573 if (m % 2 == 0) { 2574 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, X); 2575 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, Y); 2576 } else { 2577 bumpenvlscale = NINE_APPLY_SWIZZLE(c16m2, Z); 2578 bumpenvloffset = NINE_APPLY_SWIZZLE(c16m2, W); 2579 } 2580 2581 apply_ps1x_projection(tx, texcoord, tx->regs.vT[m], m); 2582 2583 /* u' = TextureCoordinates(stage m)u + D3DTSS_BUMPENVMAT00(stage m)*t(n)R */ 2584 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, 2585 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord)); 2586 /* u' = u' + D3DTSS_BUMPENVMAT10(stage m)*t(n)G */ 2587 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, 2588 NINE_APPLY_SWIZZLE(src, Y), 2589 NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); 2590 2591 /* v' = TextureCoordinates(stage m)v + D3DTSS_BUMPENVMAT01(stage m)*t(n)R */ 2592 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, 2593 NINE_APPLY_SWIZZLE(src, X), ureg_src(texcoord)); 2594 /* v' = v' + D3DTSS_BUMPENVMAT11(stage m)*t(n)G*/ 2595 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, 2596 NINE_APPLY_SWIZZLE(src, Y), 2597 NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); 2598 2599 /* Now the texture coordinates are in tmp.xy */ 2600 2601 if (tx->insn.opcode == D3DSIO_TEXBEM) { 2602 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 2603 } else if (tx->insn.opcode == D3DSIO_TEXBEML) { 2604 /* t(m)RGBA = t(m)RGBA * [(t(n)B * D3DTSS_BUMPENVLSCALE(stage m)) + D3DTSS_BUMPENVLOFFSET(stage m)] */ 2605 ureg_TEX(ureg, tmp, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 2606 ureg_MAD(ureg, tmp2, NINE_APPLY_SWIZZLE(src, Z), 2607 bumpenvlscale, bumpenvloffset); 2608 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_src(tmp2)); 2609 } 2610 2611 tx->info->bumpenvmat_needed = 1; 2612 2613 return D3D_OK; 2614} 2615 2616DECL_SPECIAL(TEXREG2AR) 2617{ 2618 struct ureg_program *ureg = tx->ureg; 2619 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2620 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2621 struct ureg_src sample; 2622 const int m = tx->insn.dst[0].idx; 2623 ASSERTED const int n = tx->insn.src[0].idx; 2624 assert(m >= 0 && m > n); 2625 2626 sample = ureg_DECL_sampler(ureg, m); 2627 tx->info->sampler_mask |= 1 << m; 2628 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(W,X,X,X)), sample); 2629 2630 return D3D_OK; 2631} 2632 2633DECL_SPECIAL(TEXREG2GB) 2634{ 2635 struct ureg_program *ureg = tx->ureg; 2636 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2637 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2638 struct ureg_src sample; 2639 const int m = tx->insn.dst[0].idx; 2640 ASSERTED const int n = tx->insn.src[0].idx; 2641 assert(m >= 0 && m > n); 2642 2643 sample = ureg_DECL_sampler(ureg, m); 2644 tx->info->sampler_mask |= 1 << m; 2645 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_swizzle(src, NINE_SWIZZLE4(Y,Z,Z,Z)), sample); 2646 2647 return D3D_OK; 2648} 2649 2650DECL_SPECIAL(TEXM3x2PAD) 2651{ 2652 return D3D_OK; /* this is just padding */ 2653} 2654 2655DECL_SPECIAL(TEXM3x2TEX) 2656{ 2657 struct ureg_program *ureg = tx->ureg; 2658 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2659 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2660 struct ureg_src sample; 2661 const int m = tx->insn.dst[0].idx - 1; 2662 ASSERTED const int n = tx->insn.src[0].idx; 2663 assert(m >= 0 && m > n); 2664 2665 tx_texcoord_alloc(tx, m); 2666 tx_texcoord_alloc(tx, m+1); 2667 2668 /* performs the matrix multiplication */ 2669 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 2670 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 2671 2672 sample = ureg_DECL_sampler(ureg, m + 1); 2673 tx->info->sampler_mask |= 1 << (m + 1); 2674 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 1), ureg_src(dst), sample); 2675 2676 return D3D_OK; 2677} 2678 2679DECL_SPECIAL(TEXM3x3PAD) 2680{ 2681 return D3D_OK; /* this is just padding */ 2682} 2683 2684DECL_SPECIAL(TEXM3x3SPEC) 2685{ 2686 struct ureg_program *ureg = tx->ureg; 2687 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2688 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2689 struct ureg_src E = tx_src_param(tx, &tx->insn.src[1]); 2690 struct ureg_src sample; 2691 struct ureg_dst tmp; 2692 const int m = tx->insn.dst[0].idx - 2; 2693 ASSERTED const int n = tx->insn.src[0].idx; 2694 assert(m >= 0 && m > n); 2695 2696 tx_texcoord_alloc(tx, m); 2697 tx_texcoord_alloc(tx, m+1); 2698 tx_texcoord_alloc(tx, m+2); 2699 2700 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 2701 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 2702 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src); 2703 2704 sample = ureg_DECL_sampler(ureg, m + 2); 2705 tx->info->sampler_mask |= 1 << (m + 2); 2706 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ); 2707 2708 /* At this step, dst = N = (u', w', z'). 2709 * We want dst to be the texture sampled at (u'', w'', z''), with 2710 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */ 2711 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst)); 2712 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 2713 /* at this step tmp.x = 1/N.N */ 2714 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), E); 2715 /* at this step tmp.y = N.E */ 2716 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 2717 /* at this step tmp.x = N.E/N.N */ 2718 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f)); 2719 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst)); 2720 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */ 2721 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(E)); 2722 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample); 2723 2724 return D3D_OK; 2725} 2726 2727DECL_SPECIAL(TEXREG2RGB) 2728{ 2729 struct ureg_program *ureg = tx->ureg; 2730 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2731 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2732 struct ureg_src sample; 2733 const int m = tx->insn.dst[0].idx; 2734 ASSERTED const int n = tx->insn.src[0].idx; 2735 assert(m >= 0 && m > n); 2736 2737 sample = ureg_DECL_sampler(ureg, m); 2738 tx->info->sampler_mask |= 1 << m; 2739 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), src, sample); 2740 2741 return D3D_OK; 2742} 2743 2744DECL_SPECIAL(TEXDP3TEX) 2745{ 2746 struct ureg_program *ureg = tx->ureg; 2747 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2748 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2749 struct ureg_dst tmp; 2750 struct ureg_src sample; 2751 const int m = tx->insn.dst[0].idx; 2752 ASSERTED const int n = tx->insn.src[0].idx; 2753 assert(m >= 0 && m > n); 2754 2755 tx_texcoord_alloc(tx, m); 2756 2757 tmp = tx_scratch(tx); 2758 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 2759 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_YZ), ureg_imm1f(ureg, 0.0f)); 2760 2761 sample = ureg_DECL_sampler(ureg, m); 2762 tx->info->sampler_mask |= 1 << m; 2763 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m), ureg_src(tmp), sample); 2764 2765 return D3D_OK; 2766} 2767 2768DECL_SPECIAL(TEXM3x2DEPTH) 2769{ 2770 struct ureg_program *ureg = tx->ureg; 2771 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2772 struct ureg_dst tmp; 2773 const int m = tx->insn.dst[0].idx - 1; 2774 ASSERTED const int n = tx->insn.src[0].idx; 2775 assert(m >= 0 && m > n); 2776 2777 tx_texcoord_alloc(tx, m); 2778 tx_texcoord_alloc(tx, m+1); 2779 2780 tmp = tx_scratch(tx); 2781 2782 /* performs the matrix multiplication */ 2783 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 2784 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 2785 2786 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Z), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 2787 /* tmp.x = 'z', tmp.y = 'w', tmp.z = 1/'w'. */ 2788 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Z)); 2789 /* res = 'w' == 0 ? 1.0 : z/w */ 2790 ureg_CMP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y))), 2791 ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1.0f)); 2792 /* replace the depth for depth testing with the result */ 2793 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, 2794 TGSI_WRITEMASK_Z, 0, 1); 2795 ureg_MOV(ureg, tx->regs.oDepth, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 2796 /* note that we write nothing to the destination, since it's disallowed to use it afterward */ 2797 return D3D_OK; 2798} 2799 2800DECL_SPECIAL(TEXDP3) 2801{ 2802 struct ureg_program *ureg = tx->ureg; 2803 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2804 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2805 const int m = tx->insn.dst[0].idx; 2806 ASSERTED const int n = tx->insn.src[0].idx; 2807 assert(m >= 0 && m > n); 2808 2809 tx_texcoord_alloc(tx, m); 2810 2811 ureg_DP3(ureg, dst, tx->regs.vT[m], src); 2812 2813 return D3D_OK; 2814} 2815 2816DECL_SPECIAL(TEXM3x3) 2817{ 2818 struct ureg_program *ureg = tx->ureg; 2819 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2820 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); /* t[n] */ 2821 struct ureg_src sample; 2822 struct ureg_dst E, tmp; 2823 const int m = tx->insn.dst[0].idx - 2; 2824 ASSERTED const int n = tx->insn.src[0].idx; 2825 assert(m >= 0 && m > n); 2826 2827 tx_texcoord_alloc(tx, m); 2828 tx_texcoord_alloc(tx, m+1); 2829 tx_texcoord_alloc(tx, m+2); 2830 2831 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), tx->regs.vT[m], src); 2832 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), tx->regs.vT[m+1], src); 2833 ureg_DP3(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), tx->regs.vT[m+2], src); 2834 2835 switch (tx->insn.opcode) { 2836 case D3DSIO_TEXM3x3: 2837 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 2838 break; 2839 case D3DSIO_TEXM3x3TEX: 2840 sample = ureg_DECL_sampler(ureg, m + 2); 2841 tx->info->sampler_mask |= 1 << (m + 2); 2842 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(dst), sample); 2843 break; 2844 case D3DSIO_TEXM3x3VSPEC: 2845 sample = ureg_DECL_sampler(ureg, m + 2); 2846 tx->info->sampler_mask |= 1 << (m + 2); 2847 E = tx_scratch(tx); 2848 tmp = ureg_writemask(tx_scratch(tx), TGSI_WRITEMASK_XYZ); 2849 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_X), ureg_scalar(tx->regs.vT[m], TGSI_SWIZZLE_W)); 2850 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Y), ureg_scalar(tx->regs.vT[m+1], TGSI_SWIZZLE_W)); 2851 ureg_MOV(ureg, ureg_writemask(E, TGSI_WRITEMASK_Z), ureg_scalar(tx->regs.vT[m+2], TGSI_SWIZZLE_W)); 2852 /* At this step, dst = N = (u', w', z'). 2853 * We want dst to be the texture sampled at (u'', w'', z''), with 2854 * (u'', w'', z'') = 2 * (N.E / N.N) * N - E */ 2855 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_src(dst), ureg_src(dst)); 2856 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X)); 2857 /* at this step tmp.x = 1/N.N */ 2858 ureg_DP3(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_src(dst), ureg_src(E)); 2859 /* at this step tmp.y = N.E */ 2860 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_Y)); 2861 /* at this step tmp.x = N.E/N.N */ 2862 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_imm1f(ureg, 2.0f)); 2863 ureg_MUL(ureg, tmp, ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X), ureg_src(dst)); 2864 /* at this step tmp.xyz = 2 * (N.E / N.N) * N */ 2865 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_negate(ureg_src(E))); 2866 ureg_TEX(ureg, dst, ps1x_sampler_type(tx->info, m + 2), ureg_src(tmp), sample); 2867 break; 2868 default: 2869 return D3DERR_INVALIDCALL; 2870 } 2871 return D3D_OK; 2872} 2873 2874DECL_SPECIAL(TEXDEPTH) 2875{ 2876 struct ureg_program *ureg = tx->ureg; 2877 struct ureg_dst r5; 2878 struct ureg_src r5r, r5g; 2879 2880 assert(tx->insn.dst[0].idx == 5); /* instruction must get r5 here */ 2881 2882 /* we must replace the depth by r5.g == 0 ? 1.0f : r5.r/r5.g. 2883 * r5 won't be used afterward, thus we can use r5.ba */ 2884 r5 = tx->regs.r[5]; 2885 r5r = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_X); 2886 r5g = ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Y); 2887 2888 ureg_RCP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_Z), r5g); 2889 ureg_MUL(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), r5r, ureg_scalar(ureg_src(r5), TGSI_SWIZZLE_Z)); 2890 /* r5.r = r/g */ 2891 ureg_CMP(ureg, ureg_writemask(r5, TGSI_WRITEMASK_X), ureg_negate(ureg_abs(r5g)), 2892 r5r, ureg_imm1f(ureg, 1.0f)); 2893 /* replace the depth for depth testing with the result */ 2894 tx->regs.oDepth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, 2895 TGSI_WRITEMASK_Z, 0, 1); 2896 ureg_MOV(ureg, tx->regs.oDepth, r5r); 2897 2898 return D3D_OK; 2899} 2900 2901DECL_SPECIAL(BEM) 2902{ 2903 struct ureg_program *ureg = tx->ureg; 2904 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2905 struct ureg_src src0 = tx_src_param(tx, &tx->insn.src[0]); 2906 struct ureg_src src1 = tx_src_param(tx, &tx->insn.src[1]); 2907 struct ureg_src m00, m01, m10, m11, c8m; 2908 const int m = tx->insn.dst[0].idx; 2909 struct ureg_dst tmp = tx_scratch(tx); 2910 /* 2911 * Bump-env-matrix: 2912 * 00 is X 2913 * 01 is Y 2914 * 10 is Z 2915 * 11 is W 2916 */ 2917 c8m = nine_float_constant_src(tx, 8+m); 2918 m00 = NINE_APPLY_SWIZZLE(c8m, X); 2919 m01 = NINE_APPLY_SWIZZLE(c8m, Y); 2920 m10 = NINE_APPLY_SWIZZLE(c8m, Z); 2921 m11 = NINE_APPLY_SWIZZLE(c8m, W); 2922 /* dest.r = src0.r + D3DTSS_BUMPENVMAT00(stage n) * src1.r */ 2923 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m00, 2924 NINE_APPLY_SWIZZLE(src1, X), NINE_APPLY_SWIZZLE(src0, X)); 2925 /* dest.r = dest.r + D3DTSS_BUMPENVMAT10(stage n) * src1.g; */ 2926 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), m10, 2927 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), X)); 2928 2929 /* dest.g = src0.g + D3DTSS_BUMPENVMAT01(stage n) * src1.r */ 2930 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m01, 2931 NINE_APPLY_SWIZZLE(src1, X), src0); 2932 /* dest.g = dest.g + D3DTSS_BUMPENVMAT11(stage n) * src1.g */ 2933 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), m11, 2934 NINE_APPLY_SWIZZLE(src1, Y), NINE_APPLY_SWIZZLE(ureg_src(tmp), Y)); 2935 ureg_MOV(ureg, ureg_writemask(dst, TGSI_WRITEMASK_XY), ureg_src(tmp)); 2936 2937 tx->info->bumpenvmat_needed = 1; 2938 2939 return D3D_OK; 2940} 2941 2942DECL_SPECIAL(TEXLD) 2943{ 2944 struct ureg_program *ureg = tx->ureg; 2945 unsigned target; 2946 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2947 struct ureg_src src[2] = { 2948 tx_src_param(tx, &tx->insn.src[0]), 2949 tx_src_param(tx, &tx->insn.src[1]) 2950 }; 2951 assert(tx->insn.src[1].idx >= 0 && 2952 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 2953 target = tx->sampler_targets[tx->insn.src[1].idx]; 2954 2955 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx)) 2956 return D3D_OK; 2957 2958 switch (tx->insn.flags) { 2959 case 0: 2960 ureg_TEX(ureg, dst, target, src[0], src[1]); 2961 break; 2962 case NINED3DSI_TEXLD_PROJECT: 2963 ureg_TXP(ureg, dst, target, src[0], src[1]); 2964 break; 2965 case NINED3DSI_TEXLD_BIAS: 2966 ureg_TXB(ureg, dst, target, src[0], src[1]); 2967 break; 2968 default: 2969 assert(0); 2970 return D3DERR_INVALIDCALL; 2971 } 2972 return D3D_OK; 2973} 2974 2975DECL_SPECIAL(TEXLD_14) 2976{ 2977 struct ureg_program *ureg = tx->ureg; 2978 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2979 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 2980 const unsigned s = tx->insn.dst[0].idx; 2981 const unsigned t = ps1x_sampler_type(tx->info, s); 2982 2983 tx->info->sampler_mask |= 1 << s; 2984 ureg_TEX(ureg, dst, t, src, ureg_DECL_sampler(ureg, s)); 2985 2986 return D3D_OK; 2987} 2988 2989DECL_SPECIAL(TEX) 2990{ 2991 struct ureg_program *ureg = tx->ureg; 2992 const unsigned s = tx->insn.dst[0].idx; 2993 const unsigned t = ps1x_sampler_type(tx->info, s); 2994 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 2995 struct ureg_src src[2]; 2996 2997 tx_texcoord_alloc(tx, s); 2998 2999 src[0] = tx->regs.vT[s]; 3000 src[1] = ureg_DECL_sampler(ureg, s); 3001 tx->info->sampler_mask |= 1 << s; 3002 3003 TEX_with_ps1x_projection(tx, dst, t, src[0], src[1], s); 3004 3005 return D3D_OK; 3006} 3007 3008DECL_SPECIAL(TEXLDD) 3009{ 3010 unsigned target; 3011 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 3012 struct ureg_src src[4] = { 3013 tx_src_param(tx, &tx->insn.src[0]), 3014 tx_src_param(tx, &tx->insn.src[1]), 3015 tx_src_param(tx, &tx->insn.src[2]), 3016 tx_src_param(tx, &tx->insn.src[3]) 3017 }; 3018 assert(tx->insn.src[1].idx >= 0 && 3019 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 3020 target = tx->sampler_targets[tx->insn.src[1].idx]; 3021 3022 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx)) 3023 return D3D_OK; 3024 3025 ureg_TXD(tx->ureg, dst, target, src[0], src[2], src[3], src[1]); 3026 return D3D_OK; 3027} 3028 3029DECL_SPECIAL(TEXLDL) 3030{ 3031 unsigned target; 3032 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 3033 struct ureg_src src[2] = { 3034 tx_src_param(tx, &tx->insn.src[0]), 3035 tx_src_param(tx, &tx->insn.src[1]) 3036 }; 3037 assert(tx->insn.src[1].idx >= 0 && 3038 tx->insn.src[1].idx < ARRAY_SIZE(tx->sampler_targets)); 3039 target = tx->sampler_targets[tx->insn.src[1].idx]; 3040 3041 if (TEX_if_fetch4(tx, dst, target, src[0], src[1], tx->insn.src[1].idx)) 3042 return D3D_OK; 3043 3044 ureg_TXL(tx->ureg, dst, target, src[0], src[1]); 3045 return D3D_OK; 3046} 3047 3048DECL_SPECIAL(SETP) 3049{ 3050 const unsigned cmp_op = sm1_insn_flags_to_tgsi_setop(tx->insn.flags); 3051 struct ureg_dst dst = tx_dst_param(tx, &tx->insn.dst[0]); 3052 struct ureg_src src[2] = { 3053 tx_src_param(tx, &tx->insn.src[0]), 3054 tx_src_param(tx, &tx->insn.src[1]) 3055 }; 3056 ureg_insn(tx->ureg, cmp_op, &dst, 1, src, 2, 0); 3057 return D3D_OK; 3058} 3059 3060DECL_SPECIAL(BREAKP) 3061{ 3062 struct ureg_src src = tx_src_param(tx, &tx->insn.src[0]); 3063 ureg_IF(tx->ureg, src, tx_cond(tx)); 3064 ureg_BRK(tx->ureg); 3065 tx_endcond(tx); 3066 ureg_ENDIF(tx->ureg); 3067 return D3D_OK; 3068} 3069 3070DECL_SPECIAL(PHASE) 3071{ 3072 return D3D_OK; /* we don't care about phase */ 3073} 3074 3075DECL_SPECIAL(COMMENT) 3076{ 3077 return D3D_OK; /* nothing to do */ 3078} 3079 3080 3081#define _OPI(o,t,vv1,vv2,pv1,pv2,d,s,h) \ 3082 { D3DSIO_##o, TGSI_OPCODE_##t, { vv1, vv2 }, { pv1, pv2, }, d, s, h } 3083 3084static const struct sm1_op_info inst_table[] = 3085{ 3086 _OPI(NOP, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(NOP)), /* 0 */ 3087 _OPI(MOV, MOV, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), 3088 _OPI(ADD, ADD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 2 */ 3089 _OPI(SUB, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(SUB)), /* 3 */ 3090 _OPI(MAD, MAD, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 4 */ 3091 _OPI(MUL, MUL, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 5 */ 3092 _OPI(RCP, RCP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RCP)), /* 6 */ 3093 _OPI(RSQ, RSQ, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(RSQ)), /* 7 */ 3094 _OPI(DP3, DP3, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 8 */ 3095 _OPI(DP4, DP4, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 9 */ 3096 _OPI(MIN, MIN, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 10 */ 3097 _OPI(MAX, MAX, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 11 */ 3098 _OPI(SLT, SLT, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 12 */ 3099 _OPI(SGE, SGE, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 13 */ 3100 _OPI(EXP, EX2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 14 */ 3101 _OPI(LOG, LG2, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(LOG)), /* 15 */ 3102 _OPI(LIT, LIT, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LIT)), /* 16 */ 3103 _OPI(DST, DST, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, NULL), /* 17 */ 3104 _OPI(LRP, LRP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 3, NULL), /* 18 */ 3105 _OPI(FRC, FRC, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, NULL), /* 19 */ 3106 3107 _OPI(M4x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x4)), 3108 _OPI(M4x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M4x3)), 3109 _OPI(M3x4, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x4)), 3110 _OPI(M3x3, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x3)), 3111 _OPI(M3x2, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(M3x2)), 3112 3113 _OPI(CALL, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(CALL)), 3114 _OPI(CALLNZ, CAL, V(2,0), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(CALLNZ)), 3115 _OPI(LOOP, BGNLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 2, SPECIAL(LOOP)), 3116 _OPI(RET, RET, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(RET)), 3117 _OPI(ENDLOOP, ENDLOOP, V(2,0), V(3,0), V(3,0), V(3,0), 0, 0, SPECIAL(ENDLOOP)), 3118 _OPI(LABEL, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(LABEL)), 3119 3120 _OPI(DCL, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(DCL)), 3121 3122 _OPI(POW, POW, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(POW)), 3123 _OPI(CRS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 2, SPECIAL(XPD)), /* XXX: .w */ 3124 _OPI(SGN, SSG, V(2,0), V(3,0), V(0,0), V(0,0), 1, 3, SPECIAL(SGN)), /* ignore src1,2 */ 3125 _OPI(ABS, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(ABS)), 3126 _OPI(NRM, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 1, SPECIAL(NRM)), /* NRM doesn't fit */ 3127 3128 _OPI(SINCOS, NOP, V(2,0), V(2,1), V(2,0), V(2,1), 1, 3, SPECIAL(SINCOS)), 3129 _OPI(SINCOS, NOP, V(3,0), V(3,0), V(3,0), V(3,0), 1, 1, SPECIAL(SINCOS)), 3130 3131 /* More flow control */ 3132 _OPI(REP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(REP)), 3133 _OPI(ENDREP, NOP, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDREP)), 3134 _OPI(IF, IF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(IF)), 3135 _OPI(IFC, IF, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(IFC)), 3136 _OPI(ELSE, ELSE, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ELSE)), 3137 _OPI(ENDIF, ENDIF, V(2,0), V(3,0), V(2,1), V(3,0), 0, 0, SPECIAL(ENDIF)), 3138 _OPI(BREAK, BRK, V(2,1), V(3,0), V(2,1), V(3,0), 0, 0, NULL), 3139 _OPI(BREAKC, NOP, V(2,1), V(3,0), V(2,1), V(3,0), 0, 2, SPECIAL(BREAKC)), 3140 /* we don't write to the address register, but a normal register (copied 3141 * when needed to the address register), thus we don't use ARR */ 3142 _OPI(MOVA, MOV, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), 3143 3144 _OPI(DEFB, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFB)), 3145 _OPI(DEFI, NOP, V(0,0), V(3,0) , V(0,0), V(3,0) , 1, 0, SPECIAL(DEFI)), 3146 3147 _OPI(TEXCOORD, NOP, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEXCOORD)), 3148 _OPI(TEXCOORD, MOV, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXCOORD_ps14)), 3149 _OPI(TEXKILL, KILL_IF, V(0,0), V(0,0), V(0,0), V(3,0), 1, 0, SPECIAL(TEXKILL)), 3150 _OPI(TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 0, SPECIAL(TEX)), 3151 _OPI(TEX, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 1, SPECIAL(TEXLD_14)), 3152 _OPI(TEX, TEX, V(0,0), V(0,0), V(2,0), V(3,0), 1, 2, SPECIAL(TEXLD)), 3153 _OPI(TEXBEM, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), 3154 _OPI(TEXBEML, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXBEM)), 3155 _OPI(TEXREG2AR, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2AR)), 3156 _OPI(TEXREG2GB, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXREG2GB)), 3157 _OPI(TEXM3x2PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2PAD)), 3158 _OPI(TEXM3x2TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x2TEX)), 3159 _OPI(TEXM3x3PAD, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3PAD)), 3160 _OPI(TEXM3x3TEX, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 3161 _OPI(TEXM3x3SPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 2, SPECIAL(TEXM3x3SPEC)), 3162 _OPI(TEXM3x3VSPEC, TEX, V(0,0), V(0,0), V(0,0), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 3163 3164 _OPI(EXPP, EXP, V(0,0), V(1,1), V(0,0), V(0,0), 1, 1, NULL), 3165 _OPI(EXPP, EX2, V(2,0), V(3,0), V(0,0), V(0,0), 1, 1, NULL), 3166 _OPI(LOGP, LG2, V(0,0), V(3,0), V(0,0), V(0,0), 1, 1, SPECIAL(LOG)), 3167 _OPI(CND, NOP, V(0,0), V(0,0), V(0,0), V(1,4), 1, 3, SPECIAL(CND)), 3168 3169 _OPI(DEF, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 1, 0, SPECIAL(DEF)), 3170 3171 /* More tex stuff */ 3172 _OPI(TEXREG2RGB, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXREG2RGB)), 3173 _OPI(TEXDP3TEX, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3TEX)), 3174 _OPI(TEXM3x2DEPTH, TEX, V(0,0), V(0,0), V(1,3), V(1,3), 1, 1, SPECIAL(TEXM3x2DEPTH)), 3175 _OPI(TEXDP3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXDP3)), 3176 _OPI(TEXM3x3, TEX, V(0,0), V(0,0), V(1,2), V(1,3), 1, 1, SPECIAL(TEXM3x3)), 3177 _OPI(TEXDEPTH, TEX, V(0,0), V(0,0), V(1,4), V(1,4), 1, 0, SPECIAL(TEXDEPTH)), 3178 3179 /* Misc */ 3180 _OPI(CMP, CMP, V(0,0), V(0,0), V(1,2), V(3,0), 1, 3, SPECIAL(CMP)), /* reversed */ 3181 _OPI(BEM, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 1, 2, SPECIAL(BEM)), 3182 _OPI(DP2ADD, NOP, V(0,0), V(0,0), V(2,0), V(3,0), 1, 3, SPECIAL(DP2ADD)), 3183 _OPI(DSX, DDX, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL), 3184 _OPI(DSY, DDY, V(0,0), V(0,0), V(2,1), V(3,0), 1, 1, NULL), 3185 _OPI(TEXLDD, TXD, V(0,0), V(0,0), V(2,1), V(3,0), 1, 4, SPECIAL(TEXLDD)), 3186 _OPI(SETP, NOP, V(0,0), V(3,0), V(2,1), V(3,0), 1, 2, SPECIAL(SETP)), 3187 _OPI(TEXLDL, TXL, V(3,0), V(3,0), V(3,0), V(3,0), 1, 2, SPECIAL(TEXLDL)), 3188 _OPI(BREAKP, BRK, V(0,0), V(3,0), V(2,1), V(3,0), 0, 1, SPECIAL(BREAKP)) 3189}; 3190 3191static const struct sm1_op_info inst_phase = 3192 _OPI(PHASE, NOP, V(0,0), V(0,0), V(1,4), V(1,4), 0, 0, SPECIAL(PHASE)); 3193 3194static const struct sm1_op_info inst_comment = 3195 _OPI(COMMENT, NOP, V(0,0), V(3,0), V(0,0), V(3,0), 0, 0, SPECIAL(COMMENT)); 3196 3197static void 3198create_op_info_map(struct shader_translator *tx) 3199{ 3200 const unsigned version = (tx->version.major << 8) | tx->version.minor; 3201 unsigned i; 3202 3203 for (i = 0; i < ARRAY_SIZE(tx->op_info_map); ++i) 3204 tx->op_info_map[i] = -1; 3205 3206 if (tx->processor == PIPE_SHADER_VERTEX) { 3207 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) { 3208 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map)); 3209 if (inst_table[i].vert_version.min <= version && 3210 inst_table[i].vert_version.max >= version) 3211 tx->op_info_map[inst_table[i].sio] = i; 3212 } 3213 } else { 3214 for (i = 0; i < ARRAY_SIZE(inst_table); ++i) { 3215 assert(inst_table[i].sio < ARRAY_SIZE(tx->op_info_map)); 3216 if (inst_table[i].frag_version.min <= version && 3217 inst_table[i].frag_version.max >= version) 3218 tx->op_info_map[inst_table[i].sio] = i; 3219 } 3220 } 3221} 3222 3223static inline HRESULT 3224NineTranslateInstruction_Generic(struct shader_translator *tx) 3225{ 3226 struct ureg_dst dst[1]; 3227 struct ureg_src src[4]; 3228 unsigned i; 3229 3230 for (i = 0; i < tx->insn.ndst && i < ARRAY_SIZE(dst); ++i) 3231 dst[i] = tx_dst_param(tx, &tx->insn.dst[i]); 3232 for (i = 0; i < tx->insn.nsrc && i < ARRAY_SIZE(src); ++i) 3233 src[i] = tx_src_param(tx, &tx->insn.src[i]); 3234 3235 ureg_insn(tx->ureg, tx->insn.info->opcode, 3236 dst, tx->insn.ndst, 3237 src, tx->insn.nsrc, 0); 3238 return D3D_OK; 3239} 3240 3241static inline DWORD 3242TOKEN_PEEK(struct shader_translator *tx) 3243{ 3244 return *(tx->parse); 3245} 3246 3247static inline DWORD 3248TOKEN_NEXT(struct shader_translator *tx) 3249{ 3250 return *(tx->parse)++; 3251} 3252 3253static inline void 3254TOKEN_JUMP(struct shader_translator *tx) 3255{ 3256 if (tx->parse_next && tx->parse != tx->parse_next) { 3257 WARN("parse(%p) != parse_next(%p) !\n", tx->parse, tx->parse_next); 3258 tx->parse = tx->parse_next; 3259 } 3260} 3261 3262static inline boolean 3263sm1_parse_eof(struct shader_translator *tx) 3264{ 3265 return TOKEN_PEEK(tx) == NINED3DSP_END; 3266} 3267 3268static void 3269sm1_read_version(struct shader_translator *tx) 3270{ 3271 const DWORD tok = TOKEN_NEXT(tx); 3272 3273 tx->version.major = D3DSHADER_VERSION_MAJOR(tok); 3274 tx->version.minor = D3DSHADER_VERSION_MINOR(tok); 3275 3276 switch (tok >> 16) { 3277 case NINED3D_SM1_VS: tx->processor = PIPE_SHADER_VERTEX; break; 3278 case NINED3D_SM1_PS: tx->processor = PIPE_SHADER_FRAGMENT; break; 3279 default: 3280 DBG("Invalid shader type: %x\n", tok); 3281 tx->processor = ~0; 3282 break; 3283 } 3284} 3285 3286/* This is just to check if we parsed the instruction properly. */ 3287static void 3288sm1_parse_get_skip(struct shader_translator *tx) 3289{ 3290 const DWORD tok = TOKEN_PEEK(tx); 3291 3292 if (tx->version.major >= 2) { 3293 tx->parse_next = tx->parse + 1 /* this */ + 3294 ((tok & D3DSI_INSTLENGTH_MASK) >> D3DSI_INSTLENGTH_SHIFT); 3295 } else { 3296 tx->parse_next = NULL; /* TODO: determine from param count */ 3297 } 3298} 3299 3300static void 3301sm1_print_comment(const char *comment, UINT size) 3302{ 3303 if (!size) 3304 return; 3305 /* TODO */ 3306} 3307 3308static void 3309sm1_parse_comments(struct shader_translator *tx, BOOL print) 3310{ 3311 DWORD tok = TOKEN_PEEK(tx); 3312 3313 while ((tok & D3DSI_OPCODE_MASK) == D3DSIO_COMMENT) 3314 { 3315 const char *comment = ""; 3316 UINT size = (tok & D3DSI_COMMENTSIZE_MASK) >> D3DSI_COMMENTSIZE_SHIFT; 3317 tx->parse += size + 1; 3318 3319 if (print) 3320 sm1_print_comment(comment, size); 3321 3322 tok = TOKEN_PEEK(tx); 3323 } 3324} 3325 3326static void 3327sm1_parse_get_param(struct shader_translator *tx, DWORD *reg, DWORD *rel) 3328{ 3329 *reg = TOKEN_NEXT(tx); 3330 3331 if (*reg & D3DSHADER_ADDRMODE_RELATIVE) 3332 { 3333 if (tx->version.major < 2) 3334 *rel = (1 << 31) | 3335 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2) | 3336 ((D3DSPR_ADDR << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) | 3337 D3DSP_NOSWIZZLE; 3338 else 3339 *rel = TOKEN_NEXT(tx); 3340 } 3341} 3342 3343static void 3344sm1_parse_dst_param(struct sm1_dst_param *dst, DWORD tok) 3345{ 3346 int8_t shift; 3347 dst->file = 3348 (tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT | 3349 (tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2; 3350 dst->type = TGSI_RETURN_TYPE_FLOAT; 3351 dst->idx = tok & D3DSP_REGNUM_MASK; 3352 dst->rel = NULL; 3353 dst->mask = (tok & NINED3DSP_WRITEMASK_MASK) >> NINED3DSP_WRITEMASK_SHIFT; 3354 dst->mod = (tok & D3DSP_DSTMOD_MASK) >> D3DSP_DSTMOD_SHIFT; 3355 shift = (tok & D3DSP_DSTSHIFT_MASK) >> D3DSP_DSTSHIFT_SHIFT; 3356 dst->shift = (shift & 0x7) - (shift & 0x8); 3357} 3358 3359static void 3360sm1_parse_src_param(struct sm1_src_param *src, DWORD tok) 3361{ 3362 src->file = 3363 ((tok & D3DSP_REGTYPE_MASK) >> D3DSP_REGTYPE_SHIFT) | 3364 ((tok & D3DSP_REGTYPE_MASK2) >> D3DSP_REGTYPE_SHIFT2); 3365 src->type = TGSI_RETURN_TYPE_FLOAT; 3366 src->idx = tok & D3DSP_REGNUM_MASK; 3367 src->rel = NULL; 3368 src->swizzle = (tok & D3DSP_SWIZZLE_MASK) >> D3DSP_SWIZZLE_SHIFT; 3369 src->mod = (tok & D3DSP_SRCMOD_MASK) >> D3DSP_SRCMOD_SHIFT; 3370 3371 switch (src->file) { 3372 case D3DSPR_CONST2: src->file = D3DSPR_CONST; src->idx += 2048; break; 3373 case D3DSPR_CONST3: src->file = D3DSPR_CONST; src->idx += 4096; break; 3374 case D3DSPR_CONST4: src->file = D3DSPR_CONST; src->idx += 6144; break; 3375 default: 3376 break; 3377 } 3378} 3379 3380static void 3381sm1_parse_immediate(struct shader_translator *tx, 3382 struct sm1_src_param *imm) 3383{ 3384 imm->file = NINED3DSPR_IMMEDIATE; 3385 imm->idx = INT_MIN; 3386 imm->rel = NULL; 3387 imm->swizzle = NINED3DSP_NOSWIZZLE; 3388 imm->mod = 0; 3389 switch (tx->insn.opcode) { 3390 case D3DSIO_DEF: 3391 imm->type = NINED3DSPTYPE_FLOAT4; 3392 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD)); 3393 tx->parse += 4; 3394 break; 3395 case D3DSIO_DEFI: 3396 imm->type = NINED3DSPTYPE_INT4; 3397 memcpy(&imm->imm.d[0], tx->parse, 4 * sizeof(DWORD)); 3398 tx->parse += 4; 3399 break; 3400 case D3DSIO_DEFB: 3401 imm->type = NINED3DSPTYPE_BOOL; 3402 memcpy(&imm->imm.d[0], tx->parse, 1 * sizeof(DWORD)); 3403 tx->parse += 1; 3404 break; 3405 default: 3406 assert(0); 3407 break; 3408 } 3409} 3410 3411static void 3412sm1_read_dst_param(struct shader_translator *tx, 3413 struct sm1_dst_param *dst, 3414 struct sm1_src_param *rel) 3415{ 3416 DWORD tok_dst, tok_rel = 0; 3417 3418 sm1_parse_get_param(tx, &tok_dst, &tok_rel); 3419 sm1_parse_dst_param(dst, tok_dst); 3420 if (tok_dst & D3DSHADER_ADDRMODE_RELATIVE) { 3421 sm1_parse_src_param(rel, tok_rel); 3422 dst->rel = rel; 3423 } 3424} 3425 3426static void 3427sm1_read_src_param(struct shader_translator *tx, 3428 struct sm1_src_param *src, 3429 struct sm1_src_param *rel) 3430{ 3431 DWORD tok_src, tok_rel = 0; 3432 3433 sm1_parse_get_param(tx, &tok_src, &tok_rel); 3434 sm1_parse_src_param(src, tok_src); 3435 if (tok_src & D3DSHADER_ADDRMODE_RELATIVE) { 3436 assert(rel); 3437 sm1_parse_src_param(rel, tok_rel); 3438 src->rel = rel; 3439 } 3440} 3441 3442static void 3443sm1_read_semantic(struct shader_translator *tx, 3444 struct sm1_semantic *sem) 3445{ 3446 const DWORD tok_usg = TOKEN_NEXT(tx); 3447 const DWORD tok_dst = TOKEN_NEXT(tx); 3448 3449 sem->sampler_type = (tok_usg & D3DSP_TEXTURETYPE_MASK) >> D3DSP_TEXTURETYPE_SHIFT; 3450 sem->usage = (tok_usg & D3DSP_DCL_USAGE_MASK) >> D3DSP_DCL_USAGE_SHIFT; 3451 sem->usage_idx = (tok_usg & D3DSP_DCL_USAGEINDEX_MASK) >> D3DSP_DCL_USAGEINDEX_SHIFT; 3452 3453 sm1_parse_dst_param(&sem->reg, tok_dst); 3454} 3455 3456static void 3457sm1_parse_instruction(struct shader_translator *tx) 3458{ 3459 struct sm1_instruction *insn = &tx->insn; 3460 HRESULT hr; 3461 DWORD tok; 3462 const struct sm1_op_info *info = NULL; 3463 unsigned i; 3464 3465 sm1_parse_comments(tx, TRUE); 3466 sm1_parse_get_skip(tx); 3467 3468 tok = TOKEN_NEXT(tx); 3469 3470 insn->opcode = tok & D3DSI_OPCODE_MASK; 3471 insn->flags = (tok & NINED3DSIO_OPCODE_FLAGS_MASK) >> NINED3DSIO_OPCODE_FLAGS_SHIFT; 3472 insn->coissue = !!(tok & D3DSI_COISSUE); 3473 insn->predicated = !!(tok & NINED3DSHADER_INST_PREDICATED); 3474 3475 if (insn->opcode < ARRAY_SIZE(tx->op_info_map)) { 3476 int k = tx->op_info_map[insn->opcode]; 3477 if (k >= 0) { 3478 assert(k < ARRAY_SIZE(inst_table)); 3479 info = &inst_table[k]; 3480 } 3481 } else { 3482 if (insn->opcode == D3DSIO_PHASE) info = &inst_phase; 3483 if (insn->opcode == D3DSIO_COMMENT) info = &inst_comment; 3484 } 3485 if (!info) { 3486 DBG("illegal or unhandled opcode: %08x\n", insn->opcode); 3487 TOKEN_JUMP(tx); 3488 return; 3489 } 3490 insn->info = info; 3491 insn->ndst = info->ndst; 3492 insn->nsrc = info->nsrc; 3493 3494 /* check version */ 3495 { 3496 unsigned min = IS_VS ? info->vert_version.min : info->frag_version.min; 3497 unsigned max = IS_VS ? info->vert_version.max : info->frag_version.max; 3498 unsigned ver = (tx->version.major << 8) | tx->version.minor; 3499 if (ver < min || ver > max) { 3500 DBG("opcode not supported in this shader version: %x <= %x <= %x\n", 3501 min, ver, max); 3502 return; 3503 } 3504 } 3505 3506 for (i = 0; i < insn->ndst; ++i) 3507 sm1_read_dst_param(tx, &insn->dst[i], &insn->dst_rel[i]); 3508 if (insn->predicated) 3509 sm1_read_src_param(tx, &insn->pred, NULL); 3510 for (i = 0; i < insn->nsrc; ++i) 3511 sm1_read_src_param(tx, &insn->src[i], &insn->src_rel[i]); 3512 3513 /* parse here so we can dump them before processing */ 3514 if (insn->opcode == D3DSIO_DEF || 3515 insn->opcode == D3DSIO_DEFI || 3516 insn->opcode == D3DSIO_DEFB) 3517 sm1_parse_immediate(tx, &tx->insn.src[0]); 3518 3519 sm1_dump_instruction(insn, tx->cond_depth + tx->loop_depth); 3520 sm1_instruction_check(insn); 3521 3522 if (insn->predicated) { 3523 tx->predicated_activated = true; 3524 if (ureg_dst_is_undef(tx->regs.predicate_tmp)) { 3525 tx->regs.predicate_tmp = ureg_DECL_temporary(tx->ureg); 3526 tx->regs.predicate_dst = ureg_DECL_temporary(tx->ureg); 3527 } 3528 } 3529 3530 if (info->handler) 3531 hr = info->handler(tx); 3532 else 3533 hr = NineTranslateInstruction_Generic(tx); 3534 tx_apply_dst0_modifiers(tx); 3535 3536 if (insn->predicated) { 3537 tx->predicated_activated = false; 3538 /* TODO: predicate might be allowed on outputs, 3539 * which cannot be src. Workaround it. */ 3540 ureg_CMP(tx->ureg, tx->regs.predicate_dst, 3541 ureg_negate(tx_src_param(tx, &insn->pred)), 3542 ureg_src(tx->regs.predicate_tmp), 3543 ureg_src(tx->regs.predicate_dst)); 3544 } 3545 3546 if (hr != D3D_OK) 3547 tx->failure = TRUE; 3548 tx->num_scratch = 0; /* reset */ 3549 3550 TOKEN_JUMP(tx); 3551} 3552 3553#define GET_CAP(n) screen->get_param( \ 3554 screen, PIPE_CAP_##n) 3555#define GET_SHADER_CAP(n) screen->get_shader_param( \ 3556 screen, info->type, PIPE_SHADER_CAP_##n) 3557 3558static HRESULT 3559tx_ctor(struct shader_translator *tx, struct pipe_screen *screen, struct nine_shader_info *info) 3560{ 3561 unsigned i; 3562 3563 memset(tx, 0, sizeof(*tx)); 3564 3565 tx->info = info; 3566 3567 tx->byte_code = info->byte_code; 3568 tx->parse = info->byte_code; 3569 3570 for (i = 0; i < ARRAY_SIZE(info->input_map); ++i) 3571 info->input_map[i] = NINE_DECLUSAGE_NONE; 3572 info->num_inputs = 0; 3573 3574 info->position_t = FALSE; 3575 info->point_size = FALSE; 3576 3577 memset(tx->slots_used, 0, sizeof(tx->slots_used)); 3578 memset(info->int_slots_used, 0, sizeof(info->int_slots_used)); 3579 memset(info->bool_slots_used, 0, sizeof(info->bool_slots_used)); 3580 3581 tx->info->const_float_slots = 0; 3582 tx->info->const_int_slots = 0; 3583 tx->info->const_bool_slots = 0; 3584 3585 info->sampler_mask = 0x0; 3586 info->rt_mask = 0x0; 3587 3588 info->lconstf.data = NULL; 3589 info->lconstf.ranges = NULL; 3590 3591 info->bumpenvmat_needed = 0; 3592 3593 for (i = 0; i < ARRAY_SIZE(tx->regs.rL); ++i) { 3594 tx->regs.rL[i] = ureg_dst_undef(); 3595 } 3596 tx->regs.address = ureg_dst_undef(); 3597 tx->regs.a0 = ureg_dst_undef(); 3598 tx->regs.p = ureg_dst_undef(); 3599 tx->regs.oDepth = ureg_dst_undef(); 3600 tx->regs.vPos = ureg_src_undef(); 3601 tx->regs.vFace = ureg_src_undef(); 3602 for (i = 0; i < ARRAY_SIZE(tx->regs.o); ++i) 3603 tx->regs.o[i] = ureg_dst_undef(); 3604 for (i = 0; i < ARRAY_SIZE(tx->regs.oCol); ++i) 3605 tx->regs.oCol[i] = ureg_dst_undef(); 3606 for (i = 0; i < ARRAY_SIZE(tx->regs.vC); ++i) 3607 tx->regs.vC[i] = ureg_src_undef(); 3608 for (i = 0; i < ARRAY_SIZE(tx->regs.vT); ++i) 3609 tx->regs.vT[i] = ureg_src_undef(); 3610 3611 sm1_read_version(tx); 3612 3613 info->version = (tx->version.major << 4) | tx->version.minor; 3614 3615 tx->num_outputs = 0; 3616 3617 create_op_info_map(tx); 3618 3619 tx->ureg = ureg_create(info->type); 3620 if (!tx->ureg) { 3621 return E_OUTOFMEMORY; 3622 } 3623 3624 tx->native_integers = GET_SHADER_CAP(INTEGERS); 3625 tx->inline_subroutines = !GET_SHADER_CAP(SUBROUTINES); 3626 tx->want_texcoord = GET_CAP(TGSI_TEXCOORD); 3627 tx->shift_wpos = !GET_CAP(FS_COORD_PIXEL_CENTER_INTEGER); 3628 tx->texcoord_sn = tx->want_texcoord ? 3629 TGSI_SEMANTIC_TEXCOORD : TGSI_SEMANTIC_GENERIC; 3630 tx->wpos_is_sysval = GET_CAP(FS_POSITION_IS_SYSVAL); 3631 tx->face_is_sysval_integer = GET_CAP(FS_FACE_IS_INTEGER_SYSVAL); 3632 3633 if (IS_VS) { 3634 tx->num_constf_allowed = NINE_MAX_CONST_F; 3635 } else if (tx->version.major < 2) {/* IS_PS v1 */ 3636 tx->num_constf_allowed = 8; 3637 } else if (tx->version.major == 2) {/* IS_PS v2 */ 3638 tx->num_constf_allowed = 32; 3639 } else {/* IS_PS v3 */ 3640 tx->num_constf_allowed = NINE_MAX_CONST_F_PS3; 3641 } 3642 3643 if (tx->version.major < 2) { 3644 tx->num_consti_allowed = 0; 3645 tx->num_constb_allowed = 0; 3646 } else { 3647 tx->num_consti_allowed = NINE_MAX_CONST_I; 3648 tx->num_constb_allowed = NINE_MAX_CONST_B; 3649 } 3650 3651 if (info->swvp_on) { 3652 /* TODO: The values tx->version.major == 1 */ 3653 tx->num_constf_allowed = 8192; 3654 tx->num_consti_allowed = 2048; 3655 tx->num_constb_allowed = 2048; 3656 } 3657 3658 /* VS must always write position. Declare it here to make it the 1st output. 3659 * (Some drivers like nv50 are buggy and rely on that.) 3660 */ 3661 if (IS_VS) { 3662 tx->regs.oPos = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_POSITION, 0); 3663 } else { 3664 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT); 3665 if (!tx->shift_wpos) 3666 ureg_property(tx->ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 3667 } 3668 3669 tx->mul_zero_wins = GET_CAP(LEGACY_MATH_RULES); 3670 if (tx->mul_zero_wins) 3671 ureg_property(tx->ureg, TGSI_PROPERTY_LEGACY_MATH_RULES, 1); 3672 3673 /* Add additional definition of constants */ 3674 if (info->add_constants_defs.c_combination) { 3675 unsigned i; 3676 3677 assert(info->add_constants_defs.int_const_added); 3678 assert(info->add_constants_defs.bool_const_added); 3679 /* We only add constants that are used by the shader 3680 * and that are not defined in the shader */ 3681 for (i = 0; i < NINE_MAX_CONST_I; ++i) { 3682 if ((*info->add_constants_defs.int_const_added)[i]) { 3683 DBG("Defining const i%i : { %i %i %i %i }\n", i, 3684 info->add_constants_defs.c_combination->const_i[i][0], 3685 info->add_constants_defs.c_combination->const_i[i][1], 3686 info->add_constants_defs.c_combination->const_i[i][2], 3687 info->add_constants_defs.c_combination->const_i[i][3]); 3688 tx_set_lconsti(tx, i, info->add_constants_defs.c_combination->const_i[i]); 3689 } 3690 } 3691 for (i = 0; i < NINE_MAX_CONST_B; ++i) { 3692 if ((*info->add_constants_defs.bool_const_added)[i]) { 3693 DBG("Defining const b%i : %i\n", i, (int)(info->add_constants_defs.c_combination->const_b[i] != 0)); 3694 tx_set_lconstb(tx, i, info->add_constants_defs.c_combination->const_b[i]); 3695 } 3696 } 3697 } 3698 return D3D_OK; 3699} 3700 3701static void 3702tx_dtor(struct shader_translator *tx) 3703{ 3704 if (tx->slot_map) 3705 FREE(tx->slot_map); 3706 if (tx->num_inst_labels) 3707 FREE(tx->inst_labels); 3708 FREE(tx->lconstf); 3709 FREE(tx->regs.r); 3710 FREE(tx); 3711} 3712 3713/* CONST[0].xyz = width/2, -height/2, zmax-zmin 3714 * CONST[1].xyz = x+width/2, y+height/2, zmin */ 3715static void 3716shader_add_vs_viewport_transform(struct shader_translator *tx) 3717{ 3718 struct ureg_program *ureg = tx->ureg; 3719 struct ureg_src c0 = ureg_src_register(TGSI_FILE_CONSTANT, 0); 3720 struct ureg_src c1 = ureg_src_register(TGSI_FILE_CONSTANT, 1); 3721 /* struct ureg_dst pos_tmp = ureg_DECL_temporary(ureg);*/ 3722 3723 c0 = ureg_src_dimension(c0, 4); 3724 c1 = ureg_src_dimension(c1, 4); 3725 /* TODO: find out when we need to apply the viewport transformation or not. 3726 * Likely will be XYZ vs XYZRHW in vdecl_out 3727 * ureg_MUL(ureg, ureg_writemask(pos_tmp, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos), c0); 3728 * ureg_ADD(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(pos_tmp), c1); 3729 */ 3730 ureg_MOV(ureg, ureg_writemask(tx->regs.oPos_out, TGSI_WRITEMASK_XYZ), ureg_src(tx->regs.oPos)); 3731} 3732 3733static void 3734shader_add_ps_fog_stage(struct shader_translator *tx, struct ureg_src src_col) 3735{ 3736 struct ureg_program *ureg = tx->ureg; 3737 struct ureg_dst oCol0 = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 3738 struct ureg_src fog_end, fog_coeff, fog_density, fog_params; 3739 struct ureg_src fog_vs, fog_color; 3740 struct ureg_dst fog_factor, depth; 3741 3742 if (!tx->info->fog_enable) { 3743 ureg_MOV(ureg, oCol0, src_col); 3744 return; 3745 } 3746 3747 if (tx->info->fog_mode != D3DFOG_NONE) { 3748 depth = tx_scratch_scalar(tx); 3749 /* Depth used for fog is perspective interpolated */ 3750 ureg_RCP(ureg, depth, ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_W)); 3751 ureg_MUL(ureg, depth, ureg_src(depth), ureg_scalar(nine_get_position_input(tx), TGSI_SWIZZLE_Z)); 3752 } 3753 3754 fog_color = nine_float_constant_src(tx, 32); 3755 fog_params = nine_float_constant_src(tx, 33); 3756 fog_factor = tx_scratch_scalar(tx); 3757 3758 if (tx->info->fog_mode == D3DFOG_LINEAR) { 3759 fog_end = NINE_APPLY_SWIZZLE(fog_params, X); 3760 fog_coeff = NINE_APPLY_SWIZZLE(fog_params, Y); 3761 ureg_ADD(ureg, fog_factor, fog_end, ureg_negate(ureg_src(depth))); 3762 ureg_MUL(ureg, ureg_saturate(fog_factor), tx_src_scalar(fog_factor), fog_coeff); 3763 } else if (tx->info->fog_mode == D3DFOG_EXP) { 3764 fog_density = NINE_APPLY_SWIZZLE(fog_params, X); 3765 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density); 3766 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); 3767 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); 3768 } else if (tx->info->fog_mode == D3DFOG_EXP2) { 3769 fog_density = NINE_APPLY_SWIZZLE(fog_params, X); 3770 ureg_MUL(ureg, fog_factor, ureg_src(depth), fog_density); 3771 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), tx_src_scalar(fog_factor)); 3772 ureg_MUL(ureg, fog_factor, tx_src_scalar(fog_factor), ureg_imm1f(ureg, -1.442695f)); 3773 ureg_EX2(ureg, fog_factor, tx_src_scalar(fog_factor)); 3774 } else { 3775 fog_vs = ureg_scalar(ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, 3776 TGSI_INTERPOLATE_PERSPECTIVE), 3777 TGSI_SWIZZLE_X); 3778 ureg_MOV(ureg, fog_factor, fog_vs); 3779 } 3780 3781 ureg_LRP(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_XYZ), 3782 tx_src_scalar(fog_factor), src_col, fog_color); 3783 ureg_MOV(ureg, ureg_writemask(oCol0, TGSI_WRITEMASK_W), src_col); 3784} 3785 3786static void parse_shader(struct shader_translator *tx) 3787{ 3788 struct nine_shader_info *info = tx->info; 3789 3790 while (!sm1_parse_eof(tx) && !tx->failure) 3791 sm1_parse_instruction(tx); 3792 tx->parse++; /* for byte_size */ 3793 3794 if (tx->failure) 3795 return; 3796 3797 if (IS_PS && tx->version.major < 3) { 3798 if (tx->version.major < 2) { 3799 assert(tx->num_temp); /* there must be color output */ 3800 info->rt_mask |= 0x1; 3801 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.r[0])); 3802 } else { 3803 shader_add_ps_fog_stage(tx, ureg_src(tx->regs.oCol[0])); 3804 } 3805 } 3806 3807 if (IS_VS && tx->version.major < 3 && ureg_dst_is_undef(tx->regs.oFog) && info->fog_enable) { 3808 tx->regs.oFog = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_GENERIC, 16); 3809 ureg_MOV(tx->ureg, ureg_writemask(tx->regs.oFog, TGSI_WRITEMASK_X), ureg_imm1f(tx->ureg, 0.0f)); 3810 } 3811 3812 if (info->position_t) 3813 ureg_property(tx->ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); 3814 3815 if (IS_VS && !ureg_dst_is_undef(tx->regs.oPts)) { 3816 struct ureg_dst oPts = ureg_DECL_output(tx->ureg, TGSI_SEMANTIC_PSIZE, 0); 3817 ureg_MAX(tx->ureg, tx->regs.oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_min)); 3818 ureg_MIN(tx->ureg, oPts, ureg_src(tx->regs.oPts), ureg_imm1f(tx->ureg, info->point_size_max)); 3819 info->point_size = TRUE; 3820 } 3821 3822 if (info->process_vertices) 3823 shader_add_vs_viewport_transform(tx); 3824 3825 ureg_END(tx->ureg); 3826} 3827 3828#define NINE_SHADER_DEBUG_OPTION_NIR_VS (1 << 0) 3829#define NINE_SHADER_DEBUG_OPTION_NIR_PS (1 << 1) 3830#define NINE_SHADER_DEBUG_OPTION_NO_NIR_VS (1 << 2) 3831#define NINE_SHADER_DEBUG_OPTION_NO_NIR_PS (1 << 3) 3832#define NINE_SHADER_DEBUG_OPTION_DUMP_NIR (1 << 4) 3833#define NINE_SHADER_DEBUG_OPTION_DUMP_TGSI (1 << 5) 3834 3835static const struct debug_named_value nine_shader_debug_options[] = { 3836 { "nir_vs", NINE_SHADER_DEBUG_OPTION_NIR_VS, "Use NIR for vertex shaders even if the driver doesn't prefer it." }, 3837 { "nir_ps", NINE_SHADER_DEBUG_OPTION_NIR_PS, "Use NIR for pixel shaders even if the driver doesn't prefer it." }, 3838 { "no_nir_vs", NINE_SHADER_DEBUG_OPTION_NO_NIR_VS, "Never use NIR for vertex shaders even if the driver prefers it." }, 3839 { "no_nir_ps", NINE_SHADER_DEBUG_OPTION_NO_NIR_PS, "Never use NIR for pixel shaders even if the driver prefers it." }, 3840 { "dump_nir", NINE_SHADER_DEBUG_OPTION_DUMP_NIR, "Print translated NIR shaders." }, 3841 { "dump_tgsi", NINE_SHADER_DEBUG_OPTION_DUMP_TGSI, "Print TGSI shaders." }, 3842 DEBUG_NAMED_VALUE_END /* must be last */ 3843}; 3844 3845static inline boolean 3846nine_shader_get_debug_flag(uint64_t flag) 3847{ 3848 static uint64_t flags = 0; 3849 static boolean first_run = TRUE; 3850 3851 if (unlikely(first_run)) { 3852 first_run = FALSE; 3853 flags = debug_get_flags_option("NINE_SHADER", nine_shader_debug_options, 0); 3854 3855 // Check old TGSI dump envvar too 3856 if (debug_get_bool_option("NINE_TGSI_DUMP", FALSE)) { 3857 flags |= NINE_SHADER_DEBUG_OPTION_DUMP_TGSI; 3858 } 3859 } 3860 3861 return !!(flags & flag); 3862} 3863 3864static void 3865nine_pipe_nir_shader_state_from_tgsi(struct pipe_shader_state *state, const struct tgsi_token *tgsi_tokens, 3866 struct pipe_screen *screen) 3867{ 3868 struct nir_shader *nir = tgsi_to_nir(tgsi_tokens, screen, screen->get_disk_shader_cache != NULL); 3869 3870 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_NIR))) { 3871 nir_print_shader(nir, stdout); 3872 } 3873 3874 state->type = PIPE_SHADER_IR_NIR; 3875 state->tokens = NULL; 3876 state->ir.nir = nir; 3877 memset(&state->stream_output, 0, sizeof(state->stream_output)); 3878} 3879 3880static void * 3881nine_ureg_create_shader(struct ureg_program *ureg, 3882 struct pipe_context *pipe, 3883 const struct pipe_stream_output_info *so) 3884{ 3885 struct pipe_shader_state state; 3886 const struct tgsi_token *tgsi_tokens; 3887 struct pipe_screen *screen = pipe->screen; 3888 3889 tgsi_tokens = ureg_finalize(ureg); 3890 if (!tgsi_tokens) 3891 return NULL; 3892 3893 assert(((struct tgsi_header *) &tgsi_tokens[0])->HeaderSize >= 2); 3894 enum pipe_shader_type shader_type = ((struct tgsi_processor *) &tgsi_tokens[1])->Processor; 3895 3896 int preferred_ir = screen->get_shader_param(screen, shader_type, PIPE_SHADER_CAP_PREFERRED_IR); 3897 bool prefer_nir = (preferred_ir == PIPE_SHADER_IR_NIR); 3898 bool use_nir = prefer_nir || 3899 ((shader_type == PIPE_SHADER_VERTEX) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_VS)) || 3900 ((shader_type == PIPE_SHADER_FRAGMENT) && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NIR_PS)); 3901 3902 /* Allow user to override preferred IR, this is very useful for debugging */ 3903 if (unlikely(shader_type == PIPE_SHADER_VERTEX && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_VS))) 3904 use_nir = false; 3905 if (unlikely(shader_type == PIPE_SHADER_FRAGMENT && nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_NO_NIR_PS))) 3906 use_nir = false; 3907 3908 DUMP("shader type: %s, preferred IR: %s, selected IR: %s\n", 3909 shader_type == PIPE_SHADER_VERTEX ? "VS" : "PS", 3910 prefer_nir ? "NIR" : "TGSI", 3911 use_nir ? "NIR" : "TGSI"); 3912 3913 if (use_nir) { 3914 nine_pipe_nir_shader_state_from_tgsi(&state, tgsi_tokens, screen); 3915 } else { 3916 pipe_shader_state_from_tgsi(&state, tgsi_tokens); 3917 } 3918 3919 assert(state.tokens || state.ir.nir); 3920 3921 if (so) 3922 state.stream_output = *so; 3923 3924 switch (shader_type) { 3925 case PIPE_SHADER_VERTEX: 3926 return pipe->create_vs_state(pipe, &state); 3927 case PIPE_SHADER_FRAGMENT: 3928 return pipe->create_fs_state(pipe, &state); 3929 default: 3930 unreachable("unsupported shader type"); 3931 } 3932} 3933 3934 3935void * 3936nine_create_shader_with_so_and_destroy(struct ureg_program *p, 3937 struct pipe_context *pipe, 3938 const struct pipe_stream_output_info *so) 3939{ 3940 void *result = nine_ureg_create_shader(p, pipe, so); 3941 ureg_destroy(p); 3942 return result; 3943} 3944 3945HRESULT 3946nine_translate_shader(struct NineDevice9 *device, struct nine_shader_info *info, struct pipe_context *pipe) 3947{ 3948 struct shader_translator *tx; 3949 HRESULT hr = D3D_OK; 3950 const unsigned processor = info->type; 3951 struct pipe_screen *screen = info->process_vertices ? device->screen_sw : device->screen; 3952 unsigned *const_ranges = NULL; 3953 3954 user_assert(processor != ~0, D3DERR_INVALIDCALL); 3955 3956 tx = MALLOC_STRUCT(shader_translator); 3957 if (!tx) 3958 return E_OUTOFMEMORY; 3959 3960 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) { 3961 hr = E_OUTOFMEMORY; 3962 goto out; 3963 } 3964 3965 assert(IS_VS || !info->swvp_on); 3966 3967 if (((tx->version.major << 16) | tx->version.minor) > 0x00030000) { 3968 hr = D3DERR_INVALIDCALL; 3969 DBG("Unsupported shader version: %u.%u !\n", 3970 tx->version.major, tx->version.minor); 3971 goto out; 3972 } 3973 if (tx->processor != processor) { 3974 hr = D3DERR_INVALIDCALL; 3975 DBG("Shader type mismatch: %u / %u !\n", tx->processor, processor); 3976 goto out; 3977 } 3978 DUMP("%s%u.%u\n", processor == PIPE_SHADER_VERTEX ? "VS" : "PS", 3979 tx->version.major, tx->version.minor); 3980 3981 parse_shader(tx); 3982 3983 if (tx->failure) { 3984 /* For VS shaders, we print the warning later, 3985 * we first try with swvp. */ 3986 if (IS_PS) 3987 ERR("Encountered buggy shader\n"); 3988 ureg_destroy(tx->ureg); 3989 hr = D3DERR_INVALIDCALL; 3990 goto out; 3991 } 3992 3993 /* Recompile after compacting constant slots if possible */ 3994 if (!tx->indirect_const_access && !info->swvp_on && tx->num_slots > 0) { 3995 unsigned *slot_map; 3996 unsigned c; 3997 int i, j, num_ranges, prev; 3998 3999 DBG("Recompiling shader for constant compaction\n"); 4000 ureg_destroy(tx->ureg); 4001 4002 if (tx->num_inst_labels) 4003 FREE(tx->inst_labels); 4004 FREE(tx->lconstf); 4005 FREE(tx->regs.r); 4006 4007 num_ranges = 0; 4008 prev = -2; 4009 for (i = 0; i < NINE_MAX_CONST_ALL; i++) { 4010 if (tx->slots_used[i]) { 4011 if (prev != i - 1) 4012 num_ranges++; 4013 prev = i; 4014 } 4015 } 4016 slot_map = MALLOC(NINE_MAX_CONST_ALL * sizeof(unsigned)); 4017 const_ranges = CALLOC(num_ranges + 1, 2 * sizeof(unsigned)); /* ranges stop when last is of size 0 */ 4018 if (!slot_map || !const_ranges) { 4019 hr = E_OUTOFMEMORY; 4020 goto out; 4021 } 4022 c = 0; 4023 j = -1; 4024 prev = -2; 4025 for (i = 0; i < NINE_MAX_CONST_ALL; i++) { 4026 if (tx->slots_used[i]) { 4027 if (prev != i - 1) 4028 j++; 4029 /* Initialize first slot of the range */ 4030 if (!const_ranges[2*j+1]) 4031 const_ranges[2*j] = i; 4032 const_ranges[2*j+1]++; 4033 prev = i; 4034 slot_map[i] = c++; 4035 } 4036 } 4037 4038 if (tx_ctor(tx, screen, info) == E_OUTOFMEMORY) { 4039 hr = E_OUTOFMEMORY; 4040 goto out; 4041 } 4042 tx->slot_map = slot_map; 4043 parse_shader(tx); 4044 assert(!tx->failure); 4045#if !defined(NDEBUG) 4046 i = 0; 4047 j = 0; 4048 while (const_ranges[i*2+1] != 0) { 4049 j += const_ranges[i*2+1]; 4050 i++; 4051 } 4052 assert(j == tx->num_slots); 4053#endif 4054 } 4055 4056 /* record local constants */ 4057 if (tx->num_lconstf && tx->indirect_const_access) { 4058 struct nine_range *ranges; 4059 float *data; 4060 int *indices; 4061 unsigned i, k, n; 4062 4063 hr = E_OUTOFMEMORY; 4064 4065 data = MALLOC(tx->num_lconstf * 4 * sizeof(float)); 4066 if (!data) 4067 goto out; 4068 info->lconstf.data = data; 4069 4070 indices = MALLOC(tx->num_lconstf * sizeof(indices[0])); 4071 if (!indices) 4072 goto out; 4073 4074 /* lazy sort, num_lconstf should be small */ 4075 for (n = 0; n < tx->num_lconstf; ++n) { 4076 for (k = 0, i = 0; i < tx->num_lconstf; ++i) { 4077 if (tx->lconstf[i].idx < tx->lconstf[k].idx) 4078 k = i; 4079 } 4080 indices[n] = tx->lconstf[k].idx; 4081 memcpy(&data[n * 4], &tx->lconstf[k].f[0], 4 * sizeof(float)); 4082 tx->lconstf[k].idx = INT_MAX; 4083 } 4084 4085 /* count ranges */ 4086 for (n = 1, i = 1; i < tx->num_lconstf; ++i) 4087 if (indices[i] != indices[i - 1] + 1) 4088 ++n; 4089 ranges = MALLOC(n * sizeof(ranges[0])); 4090 if (!ranges) { 4091 FREE(indices); 4092 goto out; 4093 } 4094 info->lconstf.ranges = ranges; 4095 4096 k = 0; 4097 ranges[k].bgn = indices[0]; 4098 for (i = 1; i < tx->num_lconstf; ++i) { 4099 if (indices[i] != indices[i - 1] + 1) { 4100 ranges[k].next = &ranges[k + 1]; 4101 ranges[k].end = indices[i - 1] + 1; 4102 ++k; 4103 ranges[k].bgn = indices[i]; 4104 } 4105 } 4106 ranges[k].end = indices[i - 1] + 1; 4107 ranges[k].next = NULL; 4108 assert(n == (k + 1)); 4109 4110 FREE(indices); 4111 hr = D3D_OK; 4112 } 4113 4114 /* r500 */ 4115 if (info->const_float_slots > device->max_vs_const_f && 4116 (info->const_int_slots || info->const_bool_slots) && 4117 !info->swvp_on) 4118 ERR("Overlapping constant slots. The shader is likely to be buggy\n"); 4119 4120 4121 if (tx->indirect_const_access) { /* vs only */ 4122 info->const_float_slots = device->max_vs_const_f; 4123 tx->num_slots = MAX2(tx->num_slots, device->max_vs_const_f); 4124 } 4125 4126 if (!info->swvp_on) { 4127 info->const_used_size = sizeof(float[4]) * tx->num_slots; 4128 if (tx->num_slots) 4129 ureg_DECL_constant2D(tx->ureg, 0, tx->num_slots-1, 0); 4130 } else { 4131 ureg_DECL_constant2D(tx->ureg, 0, 4095, 0); 4132 ureg_DECL_constant2D(tx->ureg, 0, 4095, 1); 4133 ureg_DECL_constant2D(tx->ureg, 0, 2047, 2); 4134 ureg_DECL_constant2D(tx->ureg, 0, 511, 3); 4135 } 4136 4137 if (info->process_vertices) 4138 ureg_DECL_constant2D(tx->ureg, 0, 2, 4); /* Viewport data */ 4139 4140 if (unlikely(nine_shader_get_debug_flag(NINE_SHADER_DEBUG_OPTION_DUMP_TGSI))) { 4141 const struct tgsi_token *toks = ureg_get_tokens(tx->ureg, NULL); 4142 tgsi_dump(toks, 0); 4143 ureg_free_tokens(toks); 4144 } 4145 4146 if (info->process_vertices) { 4147 NineVertexDeclaration9_FillStreamOutputInfo(info->vdecl_out, 4148 tx->output_info, 4149 tx->num_outputs, 4150 &(info->so)); 4151 info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, &(info->so)); 4152 } else 4153 info->cso = nine_create_shader_with_so_and_destroy(tx->ureg, pipe, NULL); 4154 if (!info->cso) { 4155 hr = D3DERR_DRIVERINTERNALERROR; 4156 FREE(info->lconstf.data); 4157 FREE(info->lconstf.ranges); 4158 goto out; 4159 } 4160 4161 info->const_ranges = const_ranges; 4162 const_ranges = NULL; 4163 info->byte_size = (tx->parse - tx->byte_code) * sizeof(DWORD); 4164out: 4165 if (const_ranges) 4166 FREE(const_ranges); 4167 tx_dtor(tx); 4168 return hr; 4169} 4170