1/********************************************************** 2 * Copyright 2008-2009 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26 27#include "pipe/p_shader_tokens.h" 28#include "tgsi/tgsi_dump.h" 29#include "tgsi/tgsi_parse.h" 30#include "util/u_memory.h" 31#include "util/u_math.h" 32#include "util/u_pstipple.h" 33 34#include "svga_tgsi_emit.h" 35#include "svga_context.h" 36 37 38static boolean emit_vs_postamble( struct svga_shader_emitter *emit ); 39static boolean emit_ps_postamble( struct svga_shader_emitter *emit ); 40 41 42static SVGA3dShaderOpCodeType 43translate_opcode(enum tgsi_opcode opcode) 44{ 45 switch (opcode) { 46 case TGSI_OPCODE_ADD: return SVGA3DOP_ADD; 47 case TGSI_OPCODE_DP3: return SVGA3DOP_DP3; 48 case TGSI_OPCODE_DP4: return SVGA3DOP_DP4; 49 case TGSI_OPCODE_FRC: return SVGA3DOP_FRC; 50 case TGSI_OPCODE_MAD: return SVGA3DOP_MAD; 51 case TGSI_OPCODE_MAX: return SVGA3DOP_MAX; 52 case TGSI_OPCODE_MIN: return SVGA3DOP_MIN; 53 case TGSI_OPCODE_MOV: return SVGA3DOP_MOV; 54 case TGSI_OPCODE_MUL: return SVGA3DOP_MUL; 55 case TGSI_OPCODE_NOP: return SVGA3DOP_NOP; 56 default: 57 assert(!"svga: unexpected opcode in translate_opcode()"); 58 return SVGA3DOP_LAST_INST; 59 } 60} 61 62 63static SVGA3dShaderRegType 64translate_file(enum tgsi_file_type file) 65{ 66 switch (file) { 67 case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP; 68 case TGSI_FILE_INPUT: return SVGA3DREG_INPUT; 69 case TGSI_FILE_OUTPUT: return SVGA3DREG_OUTPUT; /* VS3.0+ only */ 70 case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST; 71 case TGSI_FILE_CONSTANT: return SVGA3DREG_CONST; 72 case TGSI_FILE_SAMPLER: return SVGA3DREG_SAMPLER; 73 case TGSI_FILE_ADDRESS: return SVGA3DREG_ADDR; 74 default: 75 assert(!"svga: unexpected register file in translate_file()"); 76 return SVGA3DREG_TEMP; 77 } 78} 79 80 81/** 82 * Translate a TGSI destination register to an SVGA3DShaderDestToken. 83 * \param insn the TGSI instruction 84 * \param idx which TGSI dest register to translate (usually (always?) zero) 85 */ 86static SVGA3dShaderDestToken 87translate_dst_register( struct svga_shader_emitter *emit, 88 const struct tgsi_full_instruction *insn, 89 unsigned idx ) 90{ 91 const struct tgsi_full_dst_register *reg = &insn->Dst[idx]; 92 SVGA3dShaderDestToken dest; 93 94 switch (reg->Register.File) { 95 case TGSI_FILE_OUTPUT: 96 /* Output registers encode semantic information in their name. 97 * Need to lookup a table built at decl time: 98 */ 99 dest = emit->output_map[reg->Register.Index]; 100 emit->num_output_writes++; 101 break; 102 103 default: 104 { 105 unsigned index = reg->Register.Index; 106 assert(index < SVGA3D_TEMPREG_MAX); 107 index = MIN2(index, SVGA3D_TEMPREG_MAX - 1); 108 dest = dst_register(translate_file(reg->Register.File), index); 109 } 110 break; 111 } 112 113 if (reg->Register.Indirect) { 114 debug_warning("Indirect indexing of dest registers is not supported!\n"); 115 } 116 117 dest.mask = reg->Register.WriteMask; 118 assert(dest.mask); 119 120 if (insn->Instruction.Saturate) 121 dest.dstMod = SVGA3DDSTMOD_SATURATE; 122 123 return dest; 124} 125 126 127/** 128 * Apply a swizzle to a src_register, returning a new src_register 129 * Ex: swizzle(SRC.ZZYY, SWIZZLE_Z, SWIZZLE_W, SWIZZLE_X, SWIZZLE_Y) 130 * would return SRC.YYZZ 131 */ 132static struct src_register 133swizzle(struct src_register src, 134 unsigned x, unsigned y, unsigned z, unsigned w) 135{ 136 assert(x < 4); 137 assert(y < 4); 138 assert(z < 4); 139 assert(w < 4); 140 x = (src.base.swizzle >> (x * 2)) & 0x3; 141 y = (src.base.swizzle >> (y * 2)) & 0x3; 142 z = (src.base.swizzle >> (z * 2)) & 0x3; 143 w = (src.base.swizzle >> (w * 2)) & 0x3; 144 145 src.base.swizzle = TRANSLATE_SWIZZLE(x, y, z, w); 146 147 return src; 148} 149 150 151/** 152 * Apply a "scalar" swizzle to a src_register returning a new 153 * src_register where all the swizzle terms are the same. 154 * Ex: scalar(SRC.WZYX, SWIZZLE_Y) would return SRC.ZZZZ 155 */ 156static struct src_register 157scalar(struct src_register src, unsigned comp) 158{ 159 assert(comp < 4); 160 return swizzle( src, comp, comp, comp, comp ); 161} 162 163 164static boolean 165svga_arl_needs_adjustment( const struct svga_shader_emitter *emit ) 166{ 167 unsigned i; 168 169 for (i = 0; i < emit->num_arl_consts; ++i) { 170 if (emit->arl_consts[i].arl_num == emit->current_arl) 171 return TRUE; 172 } 173 return FALSE; 174} 175 176 177static int 178svga_arl_adjustment( const struct svga_shader_emitter *emit ) 179{ 180 unsigned i; 181 182 for (i = 0; i < emit->num_arl_consts; ++i) { 183 if (emit->arl_consts[i].arl_num == emit->current_arl) 184 return emit->arl_consts[i].number; 185 } 186 return 0; 187} 188 189 190/** 191 * Translate a TGSI src register to a src_register. 192 */ 193static struct src_register 194translate_src_register( const struct svga_shader_emitter *emit, 195 const struct tgsi_full_src_register *reg ) 196{ 197 struct src_register src; 198 199 switch (reg->Register.File) { 200 case TGSI_FILE_INPUT: 201 /* Input registers are referred to by their semantic name rather 202 * than by index. Use the mapping build up from the decls: 203 */ 204 src = emit->input_map[reg->Register.Index]; 205 break; 206 207 case TGSI_FILE_IMMEDIATE: 208 /* Immediates are appended after TGSI constants in the D3D 209 * constant buffer. 210 */ 211 src = src_register( translate_file( reg->Register.File ), 212 reg->Register.Index + emit->imm_start ); 213 break; 214 215 default: 216 src = src_register( translate_file( reg->Register.File ), 217 reg->Register.Index ); 218 break; 219 } 220 221 /* Indirect addressing. 222 */ 223 if (reg->Register.Indirect) { 224 if (emit->unit == PIPE_SHADER_FRAGMENT) { 225 /* Pixel shaders have only loop registers for relative 226 * addressing into inputs. Ignore the redundant address 227 * register, the contents of aL should be in sync with it. 228 */ 229 if (reg->Register.File == TGSI_FILE_INPUT) { 230 src.base.relAddr = 1; 231 src.indirect = src_token(SVGA3DREG_LOOP, 0); 232 } 233 } 234 else { 235 /* Constant buffers only. 236 */ 237 if (reg->Register.File == TGSI_FILE_CONSTANT) { 238 /* we shift the offset towards the minimum */ 239 if (svga_arl_needs_adjustment( emit )) { 240 src.base.num -= svga_arl_adjustment( emit ); 241 } 242 src.base.relAddr = 1; 243 244 /* Not really sure what should go in the second token: 245 */ 246 src.indirect = src_token( SVGA3DREG_ADDR, 247 reg->Indirect.Index ); 248 249 src.indirect.swizzle = SWIZZLE_XXXX; 250 } 251 } 252 } 253 254 src = swizzle( src, 255 reg->Register.SwizzleX, 256 reg->Register.SwizzleY, 257 reg->Register.SwizzleZ, 258 reg->Register.SwizzleW ); 259 260 /* src.mod isn't a bitfield, unfortunately */ 261 if (reg->Register.Absolute) { 262 if (reg->Register.Negate) 263 src.base.srcMod = SVGA3DSRCMOD_ABSNEG; 264 else 265 src.base.srcMod = SVGA3DSRCMOD_ABS; 266 } 267 else { 268 if (reg->Register.Negate) 269 src.base.srcMod = SVGA3DSRCMOD_NEG; 270 else 271 src.base.srcMod = SVGA3DSRCMOD_NONE; 272 } 273 274 return src; 275} 276 277 278/* 279 * Get a temporary register. 280 * Note: if we exceed the temporary register limit we just use 281 * register SVGA3D_TEMPREG_MAX - 1. 282 */ 283static SVGA3dShaderDestToken 284get_temp( struct svga_shader_emitter *emit ) 285{ 286 int i = emit->nr_hw_temp + emit->internal_temp_count++; 287 if (i >= SVGA3D_TEMPREG_MAX) { 288 debug_warn_once("svga: Too many temporary registers used in shader\n"); 289 i = SVGA3D_TEMPREG_MAX - 1; 290 } 291 return dst_register( SVGA3DREG_TEMP, i ); 292} 293 294 295/** 296 * Release a single temp. Currently only effective if it was the last 297 * allocated temp, otherwise release will be delayed until the next 298 * call to reset_temp_regs(). 299 */ 300static void 301release_temp( struct svga_shader_emitter *emit, 302 SVGA3dShaderDestToken temp ) 303{ 304 if (temp.num == emit->internal_temp_count - 1) 305 emit->internal_temp_count--; 306} 307 308 309/** 310 * Release all temps. 311 */ 312static void 313reset_temp_regs(struct svga_shader_emitter *emit) 314{ 315 emit->internal_temp_count = 0; 316} 317 318 319/** Emit bytecode for a src_register */ 320static boolean 321emit_src(struct svga_shader_emitter *emit, const struct src_register src) 322{ 323 if (src.base.relAddr) { 324 assert(src.base.reserved0); 325 assert(src.indirect.reserved0); 326 return (svga_shader_emit_dword( emit, src.base.value ) && 327 svga_shader_emit_dword( emit, src.indirect.value )); 328 } 329 else { 330 assert(src.base.reserved0); 331 return svga_shader_emit_dword( emit, src.base.value ); 332 } 333} 334 335 336/** Emit bytecode for a dst_register */ 337static boolean 338emit_dst(struct svga_shader_emitter *emit, SVGA3dShaderDestToken dest) 339{ 340 assert(dest.reserved0); 341 assert(dest.mask); 342 return svga_shader_emit_dword( emit, dest.value ); 343} 344 345 346/** Emit bytecode for a 1-operand instruction */ 347static boolean 348emit_op1(struct svga_shader_emitter *emit, 349 SVGA3dShaderInstToken inst, 350 SVGA3dShaderDestToken dest, 351 struct src_register src0) 352{ 353 return (emit_instruction(emit, inst) && 354 emit_dst(emit, dest) && 355 emit_src(emit, src0)); 356} 357 358 359/** Emit bytecode for a 2-operand instruction */ 360static boolean 361emit_op2(struct svga_shader_emitter *emit, 362 SVGA3dShaderInstToken inst, 363 SVGA3dShaderDestToken dest, 364 struct src_register src0, 365 struct src_register src1) 366{ 367 return (emit_instruction(emit, inst) && 368 emit_dst(emit, dest) && 369 emit_src(emit, src0) && 370 emit_src(emit, src1)); 371} 372 373 374/** Emit bytecode for a 3-operand instruction */ 375static boolean 376emit_op3(struct svga_shader_emitter *emit, 377 SVGA3dShaderInstToken inst, 378 SVGA3dShaderDestToken dest, 379 struct src_register src0, 380 struct src_register src1, 381 struct src_register src2) 382{ 383 return (emit_instruction(emit, inst) && 384 emit_dst(emit, dest) && 385 emit_src(emit, src0) && 386 emit_src(emit, src1) && 387 emit_src(emit, src2)); 388} 389 390 391/** Emit bytecode for a 4-operand instruction */ 392static boolean 393emit_op4(struct svga_shader_emitter *emit, 394 SVGA3dShaderInstToken inst, 395 SVGA3dShaderDestToken dest, 396 struct src_register src0, 397 struct src_register src1, 398 struct src_register src2, 399 struct src_register src3) 400{ 401 return (emit_instruction(emit, inst) && 402 emit_dst(emit, dest) && 403 emit_src(emit, src0) && 404 emit_src(emit, src1) && 405 emit_src(emit, src2) && 406 emit_src(emit, src3)); 407} 408 409 410/** 411 * Apply the absolute value modifier to the given src_register, returning 412 * a new src_register. 413 */ 414static struct src_register 415absolute(struct src_register src) 416{ 417 src.base.srcMod = SVGA3DSRCMOD_ABS; 418 return src; 419} 420 421 422/** 423 * Apply the negation modifier to the given src_register, returning 424 * a new src_register. 425 */ 426static struct src_register 427negate(struct src_register src) 428{ 429 switch (src.base.srcMod) { 430 case SVGA3DSRCMOD_ABS: 431 src.base.srcMod = SVGA3DSRCMOD_ABSNEG; 432 break; 433 case SVGA3DSRCMOD_ABSNEG: 434 src.base.srcMod = SVGA3DSRCMOD_ABS; 435 break; 436 case SVGA3DSRCMOD_NEG: 437 src.base.srcMod = SVGA3DSRCMOD_NONE; 438 break; 439 case SVGA3DSRCMOD_NONE: 440 src.base.srcMod = SVGA3DSRCMOD_NEG; 441 break; 442 } 443 return src; 444} 445 446 447 448/* Replace the src with the temporary specified in the dst, but copying 449 * only the necessary channels, and preserving the original swizzle (which is 450 * important given that several opcodes have constraints in the allowed 451 * swizzles). 452 */ 453static boolean 454emit_repl(struct svga_shader_emitter *emit, 455 SVGA3dShaderDestToken dst, 456 struct src_register *src0) 457{ 458 unsigned src0_swizzle; 459 unsigned chan; 460 461 assert(SVGA3dShaderGetRegType(dst.value) == SVGA3DREG_TEMP); 462 463 src0_swizzle = src0->base.swizzle; 464 465 dst.mask = 0; 466 for (chan = 0; chan < 4; ++chan) { 467 unsigned swizzle = (src0_swizzle >> (chan *2)) & 0x3; 468 dst.mask |= 1 << swizzle; 469 } 470 assert(dst.mask); 471 472 src0->base.swizzle = SVGA3DSWIZZLE_NONE; 473 474 if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, *src0 )) 475 return FALSE; 476 477 *src0 = src( dst ); 478 src0->base.swizzle = src0_swizzle; 479 480 return TRUE; 481} 482 483 484/** 485 * Submit/emit an instruction with zero operands. 486 */ 487static boolean 488submit_op0(struct svga_shader_emitter *emit, 489 SVGA3dShaderInstToken inst, 490 SVGA3dShaderDestToken dest) 491{ 492 return (emit_instruction( emit, inst ) && 493 emit_dst( emit, dest )); 494} 495 496 497/** 498 * Submit/emit an instruction with one operand. 499 */ 500static boolean 501submit_op1(struct svga_shader_emitter *emit, 502 SVGA3dShaderInstToken inst, 503 SVGA3dShaderDestToken dest, 504 struct src_register src0) 505{ 506 return emit_op1( emit, inst, dest, src0 ); 507} 508 509 510/** 511 * Submit/emit an instruction with two operands. 512 * 513 * SVGA shaders may not refer to >1 constant register in a single 514 * instruction. This function checks for that usage and inserts a 515 * move to temporary if detected. 516 * 517 * The same applies to input registers -- at most a single input 518 * register may be read by any instruction. 519 */ 520static boolean 521submit_op2(struct svga_shader_emitter *emit, 522 SVGA3dShaderInstToken inst, 523 SVGA3dShaderDestToken dest, 524 struct src_register src0, 525 struct src_register src1) 526{ 527 SVGA3dShaderDestToken temp; 528 SVGA3dShaderRegType type0, type1; 529 boolean need_temp = FALSE; 530 531 temp.value = 0; 532 type0 = SVGA3dShaderGetRegType( src0.base.value ); 533 type1 = SVGA3dShaderGetRegType( src1.base.value ); 534 535 if (type0 == SVGA3DREG_CONST && 536 type1 == SVGA3DREG_CONST && 537 src0.base.num != src1.base.num) 538 need_temp = TRUE; 539 540 if (type0 == SVGA3DREG_INPUT && 541 type1 == SVGA3DREG_INPUT && 542 src0.base.num != src1.base.num) 543 need_temp = TRUE; 544 545 if (need_temp) { 546 temp = get_temp( emit ); 547 548 if (!emit_repl( emit, temp, &src0 )) 549 return FALSE; 550 } 551 552 if (!emit_op2( emit, inst, dest, src0, src1 )) 553 return FALSE; 554 555 if (need_temp) 556 release_temp( emit, temp ); 557 558 return TRUE; 559} 560 561 562/** 563 * Submit/emit an instruction with three operands. 564 * 565 * SVGA shaders may not refer to >1 constant register in a single 566 * instruction. This function checks for that usage and inserts a 567 * move to temporary if detected. 568 */ 569static boolean 570submit_op3(struct svga_shader_emitter *emit, 571 SVGA3dShaderInstToken inst, 572 SVGA3dShaderDestToken dest, 573 struct src_register src0, 574 struct src_register src1, 575 struct src_register src2) 576{ 577 SVGA3dShaderDestToken temp0; 578 SVGA3dShaderDestToken temp1; 579 boolean need_temp0 = FALSE; 580 boolean need_temp1 = FALSE; 581 SVGA3dShaderRegType type0, type1, type2; 582 583 temp0.value = 0; 584 temp1.value = 0; 585 type0 = SVGA3dShaderGetRegType( src0.base.value ); 586 type1 = SVGA3dShaderGetRegType( src1.base.value ); 587 type2 = SVGA3dShaderGetRegType( src2.base.value ); 588 589 if (inst.op != SVGA3DOP_SINCOS) { 590 if (type0 == SVGA3DREG_CONST && 591 ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) || 592 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) 593 need_temp0 = TRUE; 594 595 if (type1 == SVGA3DREG_CONST && 596 (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num)) 597 need_temp1 = TRUE; 598 } 599 600 if (type0 == SVGA3DREG_INPUT && 601 ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) || 602 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) 603 need_temp0 = TRUE; 604 605 if (type1 == SVGA3DREG_INPUT && 606 (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num)) 607 need_temp1 = TRUE; 608 609 if (need_temp0) { 610 temp0 = get_temp( emit ); 611 612 if (!emit_repl( emit, temp0, &src0 )) 613 return FALSE; 614 } 615 616 if (need_temp1) { 617 temp1 = get_temp( emit ); 618 619 if (!emit_repl( emit, temp1, &src1 )) 620 return FALSE; 621 } 622 623 if (!emit_op3( emit, inst, dest, src0, src1, src2 )) 624 return FALSE; 625 626 if (need_temp1) 627 release_temp( emit, temp1 ); 628 if (need_temp0) 629 release_temp( emit, temp0 ); 630 return TRUE; 631} 632 633 634/** 635 * Submit/emit an instruction with four operands. 636 * 637 * SVGA shaders may not refer to >1 constant register in a single 638 * instruction. This function checks for that usage and inserts a 639 * move to temporary if detected. 640 */ 641static boolean 642submit_op4(struct svga_shader_emitter *emit, 643 SVGA3dShaderInstToken inst, 644 SVGA3dShaderDestToken dest, 645 struct src_register src0, 646 struct src_register src1, 647 struct src_register src2, 648 struct src_register src3) 649{ 650 SVGA3dShaderDestToken temp0; 651 SVGA3dShaderDestToken temp3; 652 boolean need_temp0 = FALSE; 653 boolean need_temp3 = FALSE; 654 SVGA3dShaderRegType type0, type1, type2, type3; 655 656 temp0.value = 0; 657 temp3.value = 0; 658 type0 = SVGA3dShaderGetRegType( src0.base.value ); 659 type1 = SVGA3dShaderGetRegType( src1.base.value ); 660 type2 = SVGA3dShaderGetRegType( src2.base.value ); 661 type3 = SVGA3dShaderGetRegType( src2.base.value ); 662 663 /* Make life a little easier - this is only used by the TXD 664 * instruction which is guaranteed not to have a constant/input reg 665 * in one slot at least: 666 */ 667 assert(type1 == SVGA3DREG_SAMPLER); 668 (void) type1; 669 670 if (type0 == SVGA3DREG_CONST && 671 ((type3 == SVGA3DREG_CONST && src0.base.num != src3.base.num) || 672 (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num))) 673 need_temp0 = TRUE; 674 675 if (type3 == SVGA3DREG_CONST && 676 (type2 == SVGA3DREG_CONST && src3.base.num != src2.base.num)) 677 need_temp3 = TRUE; 678 679 if (type0 == SVGA3DREG_INPUT && 680 ((type3 == SVGA3DREG_INPUT && src0.base.num != src3.base.num) || 681 (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num))) 682 need_temp0 = TRUE; 683 684 if (type3 == SVGA3DREG_INPUT && 685 (type2 == SVGA3DREG_INPUT && src3.base.num != src2.base.num)) 686 need_temp3 = TRUE; 687 688 if (need_temp0) { 689 temp0 = get_temp( emit ); 690 691 if (!emit_repl( emit, temp0, &src0 )) 692 return FALSE; 693 } 694 695 if (need_temp3) { 696 temp3 = get_temp( emit ); 697 698 if (!emit_repl( emit, temp3, &src3 )) 699 return FALSE; 700 } 701 702 if (!emit_op4( emit, inst, dest, src0, src1, src2, src3 )) 703 return FALSE; 704 705 if (need_temp3) 706 release_temp( emit, temp3 ); 707 if (need_temp0) 708 release_temp( emit, temp0 ); 709 return TRUE; 710} 711 712 713/** 714 * Do the src and dest registers refer to the same register? 715 */ 716static boolean 717alias_src_dst(struct src_register src, 718 SVGA3dShaderDestToken dst) 719{ 720 if (src.base.num != dst.num) 721 return FALSE; 722 723 if (SVGA3dShaderGetRegType(dst.value) != 724 SVGA3dShaderGetRegType(src.base.value)) 725 return FALSE; 726 727 return TRUE; 728} 729 730 731/** 732 * Helper for emitting SVGA immediate values using the SVGA3DOP_DEF[I] 733 * instructions. 734 */ 735static boolean 736emit_def_const(struct svga_shader_emitter *emit, 737 SVGA3dShaderConstType type, 738 unsigned idx, float a, float b, float c, float d) 739{ 740 SVGA3DOpDefArgs def; 741 SVGA3dShaderInstToken opcode; 742 743 switch (type) { 744 case SVGA3D_CONST_TYPE_FLOAT: 745 opcode = inst_token( SVGA3DOP_DEF ); 746 def.dst = dst_register( SVGA3DREG_CONST, idx ); 747 def.constValues[0] = a; 748 def.constValues[1] = b; 749 def.constValues[2] = c; 750 def.constValues[3] = d; 751 break; 752 case SVGA3D_CONST_TYPE_INT: 753 opcode = inst_token( SVGA3DOP_DEFI ); 754 def.dst = dst_register( SVGA3DREG_CONSTINT, idx ); 755 def.constIValues[0] = (int)a; 756 def.constIValues[1] = (int)b; 757 def.constIValues[2] = (int)c; 758 def.constIValues[3] = (int)d; 759 break; 760 default: 761 assert(0); 762 opcode = inst_token( SVGA3DOP_NOP ); 763 break; 764 } 765 766 if (!emit_instruction(emit, opcode) || 767 !svga_shader_emit_dwords( emit, def.values, ARRAY_SIZE(def.values))) 768 return FALSE; 769 770 return TRUE; 771} 772 773 774static boolean 775create_loop_const( struct svga_shader_emitter *emit ) 776{ 777 unsigned idx = emit->nr_hw_int_const++; 778 779 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx, 780 255, /* iteration count */ 781 0, /* initial value */ 782 1, /* step size */ 783 0 /* not used, must be 0 */)) 784 return FALSE; 785 786 emit->loop_const_idx = idx; 787 emit->created_loop_const = TRUE; 788 789 return TRUE; 790} 791 792static boolean 793create_arl_consts( struct svga_shader_emitter *emit ) 794{ 795 int i; 796 797 for (i = 0; i < emit->num_arl_consts; i += 4) { 798 int j; 799 unsigned idx = emit->nr_hw_float_const++; 800 float vals[4]; 801 for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) { 802 vals[j] = (float) emit->arl_consts[i + j].number; 803 emit->arl_consts[i + j].idx = idx; 804 switch (j) { 805 case 0: 806 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X; 807 break; 808 case 1: 809 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y; 810 break; 811 case 2: 812 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z; 813 break; 814 case 3: 815 emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W; 816 break; 817 } 818 } 819 while (j < 4) 820 vals[j++] = 0; 821 822 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx, 823 vals[0], vals[1], 824 vals[2], vals[3])) 825 return FALSE; 826 } 827 828 return TRUE; 829} 830 831 832/** 833 * Return the register which holds the pixel shaders front/back- 834 * facing value. 835 */ 836static struct src_register 837get_vface( struct svga_shader_emitter *emit ) 838{ 839 assert(emit->emitted_vface); 840 return src_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE); 841} 842 843 844/** 845 * Create/emit a "common" constant with values {0, 0.5, -1, 1}. 846 * We can swizzle this to produce other useful constants such as 847 * {0, 0, 0, 0}, {1, 1, 1, 1}, etc. 848 */ 849static boolean 850create_common_immediate( struct svga_shader_emitter *emit ) 851{ 852 unsigned idx = emit->nr_hw_float_const++; 853 854 /* Emit the constant (0, 0.5, -1, 1) and use swizzling to generate 855 * other useful vectors. 856 */ 857 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 858 idx, 0.0f, 0.5f, -1.0f, 1.0f )) 859 return FALSE; 860 emit->common_immediate_idx[0] = idx; 861 idx++; 862 863 /* Emit constant {2, 0, 0, 0} (only the 2 is used for now) */ 864 if (emit->key.vs.adjust_attrib_range) { 865 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 866 idx, 2.0f, 0.0f, 0.0f, 0.0f )) 867 return FALSE; 868 emit->common_immediate_idx[1] = idx; 869 } 870 else { 871 emit->common_immediate_idx[1] = -1; 872 } 873 874 emit->created_common_immediate = TRUE; 875 876 return TRUE; 877} 878 879 880/** 881 * Return swizzle/position for the given value in the "common" immediate. 882 */ 883static inline unsigned 884common_immediate_swizzle(float value) 885{ 886 if (value == 0.0f) 887 return TGSI_SWIZZLE_X; 888 else if (value == 0.5f) 889 return TGSI_SWIZZLE_Y; 890 else if (value == -1.0f) 891 return TGSI_SWIZZLE_Z; 892 else if (value == 1.0f) 893 return TGSI_SWIZZLE_W; 894 else { 895 assert(!"illegal value in common_immediate_swizzle"); 896 return TGSI_SWIZZLE_X; 897 } 898} 899 900 901/** 902 * Returns an immediate reg where all the terms are either 0, 1, 2 or 0.5 903 */ 904static struct src_register 905get_immediate(struct svga_shader_emitter *emit, 906 float x, float y, float z, float w) 907{ 908 unsigned sx = common_immediate_swizzle(x); 909 unsigned sy = common_immediate_swizzle(y); 910 unsigned sz = common_immediate_swizzle(z); 911 unsigned sw = common_immediate_swizzle(w); 912 assert(emit->created_common_immediate); 913 assert(emit->common_immediate_idx[0] >= 0); 914 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), 915 sx, sy, sz, sw); 916} 917 918 919/** 920 * returns {0, 0, 0, 0} immediate 921 */ 922static struct src_register 923get_zero_immediate( struct svga_shader_emitter *emit ) 924{ 925 assert(emit->created_common_immediate); 926 assert(emit->common_immediate_idx[0] >= 0); 927 return swizzle(src_register( SVGA3DREG_CONST, 928 emit->common_immediate_idx[0]), 929 0, 0, 0, 0); 930} 931 932 933/** 934 * returns {1, 1, 1, 1} immediate 935 */ 936static struct src_register 937get_one_immediate( struct svga_shader_emitter *emit ) 938{ 939 assert(emit->created_common_immediate); 940 assert(emit->common_immediate_idx[0] >= 0); 941 return swizzle(src_register( SVGA3DREG_CONST, 942 emit->common_immediate_idx[0]), 943 3, 3, 3, 3); 944} 945 946 947/** 948 * returns {0.5, 0.5, 0.5, 0.5} immediate 949 */ 950static struct src_register 951get_half_immediate( struct svga_shader_emitter *emit ) 952{ 953 assert(emit->created_common_immediate); 954 assert(emit->common_immediate_idx[0] >= 0); 955 return swizzle(src_register(SVGA3DREG_CONST, emit->common_immediate_idx[0]), 956 1, 1, 1, 1); 957} 958 959 960/** 961 * returns {2, 2, 2, 2} immediate 962 */ 963static struct src_register 964get_two_immediate( struct svga_shader_emitter *emit ) 965{ 966 /* Note we use the second common immediate here */ 967 assert(emit->created_common_immediate); 968 assert(emit->common_immediate_idx[1] >= 0); 969 return swizzle(src_register( SVGA3DREG_CONST, 970 emit->common_immediate_idx[1]), 971 0, 0, 0, 0); 972} 973 974 975/** 976 * returns the loop const 977 */ 978static struct src_register 979get_loop_const( struct svga_shader_emitter *emit ) 980{ 981 assert(emit->created_loop_const); 982 assert(emit->loop_const_idx >= 0); 983 return src_register( SVGA3DREG_CONSTINT, 984 emit->loop_const_idx ); 985} 986 987 988static struct src_register 989get_fake_arl_const( struct svga_shader_emitter *emit ) 990{ 991 struct src_register reg; 992 int idx = 0, swizzle = 0, i; 993 994 for (i = 0; i < emit->num_arl_consts; ++ i) { 995 if (emit->arl_consts[i].arl_num == emit->current_arl) { 996 idx = emit->arl_consts[i].idx; 997 swizzle = emit->arl_consts[i].swizzle; 998 } 999 } 1000 1001 reg = src_register( SVGA3DREG_CONST, idx ); 1002 return scalar(reg, swizzle); 1003} 1004 1005 1006/** 1007 * Return a register which holds the width and height of the texture 1008 * currently bound to the given sampler. 1009 */ 1010static struct src_register 1011get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num ) 1012{ 1013 int idx; 1014 struct src_register reg; 1015 1016 /* the width/height indexes start right after constants */ 1017 idx = emit->key.tex[sampler_num].width_height_idx + 1018 emit->info.file_max[TGSI_FILE_CONSTANT] + 1; 1019 1020 reg = src_register( SVGA3DREG_CONST, idx ); 1021 return reg; 1022} 1023 1024 1025static boolean 1026emit_fake_arl(struct svga_shader_emitter *emit, 1027 const struct tgsi_full_instruction *insn) 1028{ 1029 const struct src_register src0 = 1030 translate_src_register(emit, &insn->Src[0] ); 1031 struct src_register src1 = get_fake_arl_const( emit ); 1032 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1033 SVGA3dShaderDestToken tmp = get_temp( emit ); 1034 1035 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) 1036 return FALSE; 1037 1038 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ), 1039 src1)) 1040 return FALSE; 1041 1042 /* replicate the original swizzle */ 1043 src1 = src(tmp); 1044 src1.base.swizzle = src0.base.swizzle; 1045 1046 return submit_op1( emit, inst_token( SVGA3DOP_MOVA ), 1047 dst, src1 ); 1048} 1049 1050 1051static boolean 1052emit_if(struct svga_shader_emitter *emit, 1053 const struct tgsi_full_instruction *insn) 1054{ 1055 struct src_register src0 = 1056 translate_src_register(emit, &insn->Src[0]); 1057 struct src_register zero = get_zero_immediate(emit); 1058 SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC ); 1059 1060 if_token.control = SVGA3DOPCOMPC_NE; 1061 1062 if (SVGA3dShaderGetRegType(src0.base.value) == SVGA3DREG_CONST) { 1063 /* 1064 * Max different constant registers readable per IFC instruction is 1. 1065 */ 1066 SVGA3dShaderDestToken tmp = get_temp( emit ); 1067 1068 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0)) 1069 return FALSE; 1070 1071 src0 = scalar(src( tmp ), TGSI_SWIZZLE_X); 1072 } 1073 1074 emit->dynamic_branching_level++; 1075 1076 return (emit_instruction( emit, if_token ) && 1077 emit_src( emit, src0 ) && 1078 emit_src( emit, zero ) ); 1079} 1080 1081 1082static boolean 1083emit_else(struct svga_shader_emitter *emit, 1084 const struct tgsi_full_instruction *insn) 1085{ 1086 return emit_instruction(emit, inst_token(SVGA3DOP_ELSE)); 1087} 1088 1089 1090static boolean 1091emit_endif(struct svga_shader_emitter *emit, 1092 const struct tgsi_full_instruction *insn) 1093{ 1094 emit->dynamic_branching_level--; 1095 1096 return emit_instruction(emit, inst_token(SVGA3DOP_ENDIF)); 1097} 1098 1099 1100/** 1101 * Translate the following TGSI FLR instruction. 1102 * FLR DST, SRC 1103 * To the following SVGA3D instruction sequence. 1104 * FRC TMP, SRC 1105 * SUB DST, SRC, TMP 1106 */ 1107static boolean 1108emit_floor(struct svga_shader_emitter *emit, 1109 const struct tgsi_full_instruction *insn ) 1110{ 1111 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1112 const struct src_register src0 = 1113 translate_src_register(emit, &insn->Src[0] ); 1114 SVGA3dShaderDestToken temp = get_temp( emit ); 1115 1116 /* FRC TMP, SRC */ 1117 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 )) 1118 return FALSE; 1119 1120 /* SUB DST, SRC, TMP */ 1121 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0, 1122 negate( src( temp ) ) )) 1123 return FALSE; 1124 1125 return TRUE; 1126} 1127 1128 1129/** 1130 * Translate the following TGSI CEIL instruction. 1131 * CEIL DST, SRC 1132 * To the following SVGA3D instruction sequence. 1133 * FRC TMP, -SRC 1134 * ADD DST, SRC, TMP 1135 */ 1136static boolean 1137emit_ceil(struct svga_shader_emitter *emit, 1138 const struct tgsi_full_instruction *insn) 1139{ 1140 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); 1141 const struct src_register src0 = 1142 translate_src_register(emit, &insn->Src[0]); 1143 SVGA3dShaderDestToken temp = get_temp(emit); 1144 1145 /* FRC TMP, -SRC */ 1146 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), temp, negate(src0))) 1147 return FALSE; 1148 1149 /* ADD DST, SRC, TMP */ 1150 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), dst, src0, src(temp))) 1151 return FALSE; 1152 1153 return TRUE; 1154} 1155 1156 1157/** 1158 * Translate the following TGSI DIV instruction. 1159 * DIV DST.xy, SRC0, SRC1 1160 * To the following SVGA3D instruction sequence. 1161 * RCP TMP.x, SRC1.xxxx 1162 * RCP TMP.y, SRC1.yyyy 1163 * MUL DST.xy, SRC0, TMP 1164 */ 1165static boolean 1166emit_div(struct svga_shader_emitter *emit, 1167 const struct tgsi_full_instruction *insn ) 1168{ 1169 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1170 const struct src_register src0 = 1171 translate_src_register(emit, &insn->Src[0] ); 1172 const struct src_register src1 = 1173 translate_src_register(emit, &insn->Src[1] ); 1174 SVGA3dShaderDestToken temp = get_temp( emit ); 1175 unsigned i; 1176 1177 /* For each enabled element, perform a RCP instruction. Note that 1178 * RCP is scalar in SVGA3D: 1179 */ 1180 for (i = 0; i < 4; i++) { 1181 unsigned channel = 1 << i; 1182 if (dst.mask & channel) { 1183 /* RCP TMP.?, SRC1.???? */ 1184 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 1185 writemask(temp, channel), 1186 scalar(src1, i) )) 1187 return FALSE; 1188 } 1189 } 1190 1191 /* Vector mul: 1192 * MUL DST, SRC0, TMP 1193 */ 1194 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0, 1195 src( temp ) )) 1196 return FALSE; 1197 1198 return TRUE; 1199} 1200 1201 1202/** 1203 * Translate the following TGSI DP2 instruction. 1204 * DP2 DST, SRC1, SRC2 1205 * To the following SVGA3D instruction sequence. 1206 * MUL TMP, SRC1, SRC2 1207 * ADD DST, TMP.xxxx, TMP.yyyy 1208 */ 1209static boolean 1210emit_dp2(struct svga_shader_emitter *emit, 1211 const struct tgsi_full_instruction *insn ) 1212{ 1213 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1214 const struct src_register src0 = 1215 translate_src_register(emit, &insn->Src[0]); 1216 const struct src_register src1 = 1217 translate_src_register(emit, &insn->Src[1]); 1218 SVGA3dShaderDestToken temp = get_temp( emit ); 1219 struct src_register temp_src0, temp_src1; 1220 1221 /* MUL TMP, SRC1, SRC2 */ 1222 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 )) 1223 return FALSE; 1224 1225 temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X); 1226 temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y); 1227 1228 /* ADD DST, TMP.xxxx, TMP.yyyy */ 1229 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, 1230 temp_src0, temp_src1 )) 1231 return FALSE; 1232 1233 return TRUE; 1234} 1235 1236 1237/** 1238 * Sine / Cosine helper function. 1239 */ 1240static boolean 1241do_emit_sincos(struct svga_shader_emitter *emit, 1242 SVGA3dShaderDestToken dst, 1243 struct src_register src0) 1244{ 1245 src0 = scalar(src0, TGSI_SWIZZLE_X); 1246 return submit_op1(emit, inst_token(SVGA3DOP_SINCOS), dst, src0); 1247} 1248 1249 1250/** 1251 * Translate TGSI SIN instruction into: 1252 * SCS TMP SRC 1253 * MOV DST TMP.yyyy 1254 */ 1255static boolean 1256emit_sin(struct svga_shader_emitter *emit, 1257 const struct tgsi_full_instruction *insn ) 1258{ 1259 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1260 struct src_register src0 = 1261 translate_src_register(emit, &insn->Src[0] ); 1262 SVGA3dShaderDestToken temp = get_temp( emit ); 1263 1264 /* SCS TMP SRC */ 1265 if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0)) 1266 return FALSE; 1267 1268 src0 = scalar(src( temp ), TGSI_SWIZZLE_Y); 1269 1270 /* MOV DST TMP.yyyy */ 1271 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) 1272 return FALSE; 1273 1274 return TRUE; 1275} 1276 1277 1278/* 1279 * Translate TGSI COS instruction into: 1280 * SCS TMP SRC 1281 * MOV DST TMP.xxxx 1282 */ 1283static boolean 1284emit_cos(struct svga_shader_emitter *emit, 1285 const struct tgsi_full_instruction *insn) 1286{ 1287 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1288 struct src_register src0 = 1289 translate_src_register(emit, &insn->Src[0] ); 1290 SVGA3dShaderDestToken temp = get_temp( emit ); 1291 1292 /* SCS TMP SRC */ 1293 if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 )) 1294 return FALSE; 1295 1296 src0 = scalar(src( temp ), TGSI_SWIZZLE_X); 1297 1298 /* MOV DST TMP.xxxx */ 1299 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 )) 1300 return FALSE; 1301 1302 return TRUE; 1303} 1304 1305 1306/** 1307 * Translate/emit TGSI SSG (Set Sign: -1, 0, +1) instruction. 1308 */ 1309static boolean 1310emit_ssg(struct svga_shader_emitter *emit, 1311 const struct tgsi_full_instruction *insn) 1312{ 1313 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1314 struct src_register src0 = 1315 translate_src_register(emit, &insn->Src[0] ); 1316 SVGA3dShaderDestToken temp0 = get_temp( emit ); 1317 SVGA3dShaderDestToken temp1 = get_temp( emit ); 1318 struct src_register zero, one; 1319 1320 if (emit->unit == PIPE_SHADER_VERTEX) { 1321 /* SGN DST, SRC0, TMP0, TMP1 */ 1322 return submit_op3( emit, inst_token( SVGA3DOP_SGN ), dst, src0, 1323 src( temp0 ), src( temp1 ) ); 1324 } 1325 1326 one = get_one_immediate(emit); 1327 zero = get_zero_immediate(emit); 1328 1329 /* CMP TMP0, SRC0, one, zero */ 1330 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), 1331 writemask( temp0, dst.mask ), src0, one, zero )) 1332 return FALSE; 1333 1334 /* CMP TMP1, negate(SRC0), negate(one), zero */ 1335 if (!submit_op3( emit, inst_token( SVGA3DOP_CMP ), 1336 writemask( temp1, dst.mask ), negate( src0 ), negate( one ), 1337 zero )) 1338 return FALSE; 1339 1340 /* ADD DST, TMP0, TMP1 */ 1341 return submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src( temp0 ), 1342 src( temp1 ) ); 1343} 1344 1345 1346/** 1347 * Translate/emit the conditional discard instruction (discard if 1348 * any of X,Y,Z,W are negative). 1349 */ 1350static boolean 1351emit_cond_discard(struct svga_shader_emitter *emit, 1352 const struct tgsi_full_instruction *insn) 1353{ 1354 const struct tgsi_full_src_register *reg = &insn->Src[0]; 1355 struct src_register src0, srcIn; 1356 const boolean special = (reg->Register.Absolute || 1357 reg->Register.Negate || 1358 reg->Register.Indirect || 1359 reg->Register.SwizzleX != 0 || 1360 reg->Register.SwizzleY != 1 || 1361 reg->Register.SwizzleZ != 2 || 1362 reg->Register.File != TGSI_FILE_TEMPORARY); 1363 SVGA3dShaderDestToken temp; 1364 1365 src0 = srcIn = translate_src_register( emit, reg ); 1366 1367 if (special) { 1368 /* need a temp reg */ 1369 temp = get_temp( emit ); 1370 } 1371 1372 if (special) { 1373 /* move the source into a temp register */ 1374 submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, src0); 1375 1376 src0 = src( temp ); 1377 } 1378 1379 /* Do the discard by checking if any of the XYZW components are < 0. 1380 * Note that ps_2_0 and later take XYZW in consideration, while ps_1_x 1381 * only used XYZ. The MSDN documentation about this is incorrect. 1382 */ 1383 if (!submit_op0( emit, inst_token( SVGA3DOP_TEXKILL ), dst(src0) )) 1384 return FALSE; 1385 1386 return TRUE; 1387} 1388 1389 1390/** 1391 * Translate/emit the unconditional discard instruction (usually found inside 1392 * an IF/ELSE/ENDIF block). 1393 */ 1394static boolean 1395emit_discard(struct svga_shader_emitter *emit, 1396 const struct tgsi_full_instruction *insn) 1397{ 1398 SVGA3dShaderDestToken temp; 1399 struct src_register one = get_one_immediate(emit); 1400 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_TEXKILL ); 1401 1402 /* texkill doesn't allow negation on the operand so lets move 1403 * negation of {1} to a temp register */ 1404 temp = get_temp( emit ); 1405 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, 1406 negate( one ) )) 1407 return FALSE; 1408 1409 return submit_op0( emit, inst, temp ); 1410} 1411 1412 1413/** 1414 * Test if r1 and r2 are the same register. 1415 */ 1416static boolean 1417same_register(struct src_register r1, struct src_register r2) 1418{ 1419 return (r1.base.num == r2.base.num && 1420 r1.base.type_upper == r2.base.type_upper && 1421 r1.base.type_lower == r2.base.type_lower); 1422} 1423 1424 1425 1426/** 1427 * Implement conditionals by initializing destination reg to 'fail', 1428 * then set predicate reg with UFOP_SETP, then move 'pass' to dest 1429 * based on predicate reg. 1430 * 1431 * SETP src0, cmp, src1 -- do this first to avoid aliasing problems. 1432 * MOV dst, fail 1433 * MOV dst, pass, p0 1434 */ 1435static boolean 1436emit_conditional(struct svga_shader_emitter *emit, 1437 enum pipe_compare_func compare_func, 1438 SVGA3dShaderDestToken dst, 1439 struct src_register src0, 1440 struct src_register src1, 1441 struct src_register pass, 1442 struct src_register fail) 1443{ 1444 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); 1445 SVGA3dShaderInstToken setp_token; 1446 1447 switch (compare_func) { 1448 case PIPE_FUNC_NEVER: 1449 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1450 dst, fail ); 1451 break; 1452 case PIPE_FUNC_LESS: 1453 setp_token = inst_token_setp(SVGA3DOPCOMP_LT); 1454 break; 1455 case PIPE_FUNC_EQUAL: 1456 setp_token = inst_token_setp(SVGA3DOPCOMP_EQ); 1457 break; 1458 case PIPE_FUNC_LEQUAL: 1459 setp_token = inst_token_setp(SVGA3DOPCOMP_LE); 1460 break; 1461 case PIPE_FUNC_GREATER: 1462 setp_token = inst_token_setp(SVGA3DOPCOMP_GT); 1463 break; 1464 case PIPE_FUNC_NOTEQUAL: 1465 setp_token = inst_token_setp(SVGA3DOPCOMPC_NE); 1466 break; 1467 case PIPE_FUNC_GEQUAL: 1468 setp_token = inst_token_setp(SVGA3DOPCOMP_GE); 1469 break; 1470 case PIPE_FUNC_ALWAYS: 1471 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1472 dst, pass ); 1473 break; 1474 } 1475 1476 if (same_register(src(dst), pass)) { 1477 /* We'll get bad results if the dst and pass registers are the same 1478 * so use a temp register containing pass. 1479 */ 1480 SVGA3dShaderDestToken temp = get_temp(emit); 1481 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), temp, pass)) 1482 return FALSE; 1483 pass = src(temp); 1484 } 1485 1486 /* SETP src0, COMPOP, src1 */ 1487 if (!submit_op2( emit, setp_token, pred_reg, 1488 src0, src1 )) 1489 return FALSE; 1490 1491 /* MOV dst, fail */ 1492 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), dst, fail)) 1493 return FALSE; 1494 1495 /* MOV dst, pass (predicated) 1496 * 1497 * Note that the predicate reg (and possible modifiers) is passed 1498 * as the first source argument. 1499 */ 1500 if (!submit_op2(emit, 1501 inst_token_predicated(SVGA3DOP_MOV), dst, 1502 src(pred_reg), pass)) 1503 return FALSE; 1504 1505 return TRUE; 1506} 1507 1508 1509/** 1510 * Helper for emiting 'selection' commands. Basically: 1511 * if (src0 OP src1) 1512 * dst = 1.0; 1513 * else 1514 * dst = 0.0; 1515 */ 1516static boolean 1517emit_select(struct svga_shader_emitter *emit, 1518 enum pipe_compare_func compare_func, 1519 SVGA3dShaderDestToken dst, 1520 struct src_register src0, 1521 struct src_register src1 ) 1522{ 1523 /* There are some SVGA instructions which implement some selects 1524 * directly, but they are only available in the vertex shader. 1525 */ 1526 if (emit->unit == PIPE_SHADER_VERTEX) { 1527 switch (compare_func) { 1528 case PIPE_FUNC_GEQUAL: 1529 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 ); 1530 case PIPE_FUNC_LEQUAL: 1531 return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 ); 1532 case PIPE_FUNC_GREATER: 1533 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 ); 1534 case PIPE_FUNC_LESS: 1535 return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 ); 1536 default: 1537 break; 1538 } 1539 } 1540 1541 /* Otherwise, need to use the setp approach: 1542 */ 1543 { 1544 struct src_register one, zero; 1545 /* zero immediate is 0,0,0,1 */ 1546 zero = get_zero_immediate(emit); 1547 one = get_one_immediate(emit); 1548 1549 return emit_conditional(emit, compare_func, dst, src0, src1, one, zero); 1550 } 1551} 1552 1553 1554/** 1555 * Translate/emit a TGSI SEQ, SNE, SLT, SGE, etc. instruction. 1556 */ 1557static boolean 1558emit_select_op(struct svga_shader_emitter *emit, 1559 unsigned compare, 1560 const struct tgsi_full_instruction *insn) 1561{ 1562 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1563 struct src_register src0 = translate_src_register( 1564 emit, &insn->Src[0] ); 1565 struct src_register src1 = translate_src_register( 1566 emit, &insn->Src[1] ); 1567 1568 return emit_select( emit, compare, dst, src0, src1 ); 1569} 1570 1571 1572/** 1573 * Translate TGSI CMP instruction. Component-wise: 1574 * dst = (src0 < 0.0) ? src1 : src2 1575 */ 1576static boolean 1577emit_cmp(struct svga_shader_emitter *emit, 1578 const struct tgsi_full_instruction *insn) 1579{ 1580 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 1581 const struct src_register src0 = 1582 translate_src_register(emit, &insn->Src[0] ); 1583 const struct src_register src1 = 1584 translate_src_register(emit, &insn->Src[1] ); 1585 const struct src_register src2 = 1586 translate_src_register(emit, &insn->Src[2] ); 1587 1588 if (emit->unit == PIPE_SHADER_VERTEX) { 1589 struct src_register zero = get_zero_immediate(emit); 1590 /* We used to simulate CMP with SLT+LRP. But that didn't work when 1591 * src1 or src2 was Inf/NaN. In particular, GLSL sqrt(0) failed 1592 * because it involves a CMP to handle the 0 case. 1593 * Use a conditional expression instead. 1594 */ 1595 return emit_conditional(emit, PIPE_FUNC_LESS, dst, 1596 src0, zero, src1, src2); 1597 } 1598 else { 1599 assert(emit->unit == PIPE_SHADER_FRAGMENT); 1600 1601 /* CMP DST, SRC0, SRC2, SRC1 */ 1602 return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, 1603 src0, src2, src1); 1604 } 1605} 1606 1607 1608/** 1609 * Translate/emit 2-operand (coord, sampler) texture instructions. 1610 */ 1611static boolean 1612emit_tex2(struct svga_shader_emitter *emit, 1613 const struct tgsi_full_instruction *insn, 1614 SVGA3dShaderDestToken dst) 1615{ 1616 SVGA3dShaderInstToken inst; 1617 struct src_register texcoord; 1618 struct src_register sampler; 1619 SVGA3dShaderDestToken tmp; 1620 1621 inst.value = 0; 1622 1623 switch (insn->Instruction.Opcode) { 1624 case TGSI_OPCODE_TEX: 1625 inst.op = SVGA3DOP_TEX; 1626 break; 1627 case TGSI_OPCODE_TXP: 1628 inst.op = SVGA3DOP_TEX; 1629 inst.control = SVGA3DOPCONT_PROJECT; 1630 break; 1631 case TGSI_OPCODE_TXB: 1632 inst.op = SVGA3DOP_TEX; 1633 inst.control = SVGA3DOPCONT_BIAS; 1634 break; 1635 case TGSI_OPCODE_TXL: 1636 inst.op = SVGA3DOP_TEXLDL; 1637 break; 1638 default: 1639 assert(0); 1640 return FALSE; 1641 } 1642 1643 texcoord = translate_src_register( emit, &insn->Src[0] ); 1644 sampler = translate_src_register( emit, &insn->Src[1] ); 1645 1646 if (emit->key.tex[sampler.base.num].unnormalized || 1647 emit->dynamic_branching_level > 0) 1648 tmp = get_temp( emit ); 1649 1650 /* Can't do mipmapping inside dynamic branch constructs. Force LOD 1651 * zero in that case. 1652 */ 1653 if (emit->dynamic_branching_level > 0 && 1654 inst.op == SVGA3DOP_TEX && 1655 SVGA3dShaderGetRegType(texcoord.base.value) == SVGA3DREG_TEMP) { 1656 struct src_register zero = get_zero_immediate(emit); 1657 1658 /* MOV tmp, texcoord */ 1659 if (!submit_op1( emit, 1660 inst_token( SVGA3DOP_MOV ), 1661 tmp, 1662 texcoord )) 1663 return FALSE; 1664 1665 /* MOV tmp.w, zero */ 1666 if (!submit_op1( emit, 1667 inst_token( SVGA3DOP_MOV ), 1668 writemask( tmp, TGSI_WRITEMASK_W ), 1669 zero )) 1670 return FALSE; 1671 1672 texcoord = src( tmp ); 1673 inst.op = SVGA3DOP_TEXLDL; 1674 } 1675 1676 /* Explicit normalization of texcoords: 1677 */ 1678 if (emit->key.tex[sampler.base.num].unnormalized) { 1679 struct src_register wh = get_tex_dimensions( emit, sampler.base.num ); 1680 1681 /* MUL tmp, SRC0, WH */ 1682 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 1683 tmp, texcoord, wh )) 1684 return FALSE; 1685 1686 texcoord = src( tmp ); 1687 } 1688 1689 return submit_op2( emit, inst, dst, texcoord, sampler ); 1690} 1691 1692 1693/** 1694 * Translate/emit 4-operand (coord, ddx, ddy, sampler) texture instructions. 1695 */ 1696static boolean 1697emit_tex4(struct svga_shader_emitter *emit, 1698 const struct tgsi_full_instruction *insn, 1699 SVGA3dShaderDestToken dst ) 1700{ 1701 SVGA3dShaderInstToken inst; 1702 struct src_register texcoord; 1703 struct src_register ddx; 1704 struct src_register ddy; 1705 struct src_register sampler; 1706 1707 texcoord = translate_src_register( emit, &insn->Src[0] ); 1708 ddx = translate_src_register( emit, &insn->Src[1] ); 1709 ddy = translate_src_register( emit, &insn->Src[2] ); 1710 sampler = translate_src_register( emit, &insn->Src[3] ); 1711 1712 inst.value = 0; 1713 1714 switch (insn->Instruction.Opcode) { 1715 case TGSI_OPCODE_TXD: 1716 inst.op = SVGA3DOP_TEXLDD; /* 4 args! */ 1717 break; 1718 default: 1719 assert(0); 1720 return FALSE; 1721 } 1722 1723 return submit_op4( emit, inst, dst, texcoord, sampler, ddx, ddy ); 1724} 1725 1726 1727/** 1728 * Emit texture swizzle code. We do this here since SVGA samplers don't 1729 * directly support swizzles. 1730 */ 1731static boolean 1732emit_tex_swizzle(struct svga_shader_emitter *emit, 1733 SVGA3dShaderDestToken dst, 1734 struct src_register src, 1735 unsigned swizzle_x, 1736 unsigned swizzle_y, 1737 unsigned swizzle_z, 1738 unsigned swizzle_w) 1739{ 1740 const unsigned swizzleIn[4] = {swizzle_x, swizzle_y, swizzle_z, swizzle_w}; 1741 unsigned srcSwizzle[4]; 1742 unsigned srcWritemask = 0x0, zeroWritemask = 0x0, oneWritemask = 0x0; 1743 unsigned i; 1744 1745 /* build writemasks and srcSwizzle terms */ 1746 for (i = 0; i < 4; i++) { 1747 if (swizzleIn[i] == PIPE_SWIZZLE_0) { 1748 srcSwizzle[i] = TGSI_SWIZZLE_X + i; 1749 zeroWritemask |= (1 << i); 1750 } 1751 else if (swizzleIn[i] == PIPE_SWIZZLE_1) { 1752 srcSwizzle[i] = TGSI_SWIZZLE_X + i; 1753 oneWritemask |= (1 << i); 1754 } 1755 else { 1756 srcSwizzle[i] = swizzleIn[i]; 1757 srcWritemask |= (1 << i); 1758 } 1759 } 1760 1761 /* write x/y/z/w comps */ 1762 if (dst.mask & srcWritemask) { 1763 if (!submit_op1(emit, 1764 inst_token(SVGA3DOP_MOV), 1765 writemask(dst, srcWritemask), 1766 swizzle(src, 1767 srcSwizzle[0], 1768 srcSwizzle[1], 1769 srcSwizzle[2], 1770 srcSwizzle[3]))) 1771 return FALSE; 1772 } 1773 1774 /* write 0 comps */ 1775 if (dst.mask & zeroWritemask) { 1776 if (!submit_op1(emit, 1777 inst_token(SVGA3DOP_MOV), 1778 writemask(dst, zeroWritemask), 1779 get_zero_immediate(emit))) 1780 return FALSE; 1781 } 1782 1783 /* write 1 comps */ 1784 if (dst.mask & oneWritemask) { 1785 if (!submit_op1(emit, 1786 inst_token(SVGA3DOP_MOV), 1787 writemask(dst, oneWritemask), 1788 get_one_immediate(emit))) 1789 return FALSE; 1790 } 1791 1792 return TRUE; 1793} 1794 1795 1796/** 1797 * Translate/emit a TGSI texture sample instruction. 1798 */ 1799static boolean 1800emit_tex(struct svga_shader_emitter *emit, 1801 const struct tgsi_full_instruction *insn) 1802{ 1803 SVGA3dShaderDestToken dst = 1804 translate_dst_register( emit, insn, 0 ); 1805 struct src_register src0 = 1806 translate_src_register( emit, &insn->Src[0] ); 1807 struct src_register src1 = 1808 translate_src_register( emit, &insn->Src[1] ); 1809 1810 SVGA3dShaderDestToken tex_result; 1811 const unsigned unit = src1.base.num; 1812 1813 /* check for shadow samplers */ 1814 boolean compare = (emit->key.tex[unit].compare_mode == 1815 PIPE_TEX_COMPARE_R_TO_TEXTURE); 1816 1817 /* texture swizzle */ 1818 boolean swizzle = (emit->key.tex[unit].swizzle_r != PIPE_SWIZZLE_X || 1819 emit->key.tex[unit].swizzle_g != PIPE_SWIZZLE_Y || 1820 emit->key.tex[unit].swizzle_b != PIPE_SWIZZLE_Z || 1821 emit->key.tex[unit].swizzle_a != PIPE_SWIZZLE_W); 1822 1823 boolean saturate = insn->Instruction.Saturate; 1824 1825 /* If doing compare processing or tex swizzle or saturation, we need to put 1826 * the fetched color into a temporary so it can be used as a source later on. 1827 */ 1828 if (compare || swizzle || saturate) { 1829 tex_result = get_temp( emit ); 1830 } 1831 else { 1832 tex_result = dst; 1833 } 1834 1835 switch(insn->Instruction.Opcode) { 1836 case TGSI_OPCODE_TEX: 1837 case TGSI_OPCODE_TXB: 1838 case TGSI_OPCODE_TXP: 1839 case TGSI_OPCODE_TXL: 1840 if (!emit_tex2( emit, insn, tex_result )) 1841 return FALSE; 1842 break; 1843 case TGSI_OPCODE_TXD: 1844 if (!emit_tex4( emit, insn, tex_result )) 1845 return FALSE; 1846 break; 1847 default: 1848 assert(0); 1849 } 1850 1851 if (compare) { 1852 SVGA3dShaderDestToken dst2; 1853 1854 if (swizzle || saturate) 1855 dst2 = tex_result; 1856 else 1857 dst2 = dst; 1858 1859 if (dst.mask & TGSI_WRITEMASK_XYZ) { 1860 SVGA3dShaderDestToken src0_zdivw = get_temp( emit ); 1861 /* When sampling a depth texture, the result of the comparison is in 1862 * the Y component. 1863 */ 1864 struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y); 1865 struct src_register r_coord; 1866 1867 if (insn->Instruction.Opcode == TGSI_OPCODE_TXP) { 1868 /* Divide texcoord R by Q */ 1869 if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 1870 writemask(src0_zdivw, TGSI_WRITEMASK_X), 1871 scalar(src0, TGSI_SWIZZLE_W) )) 1872 return FALSE; 1873 1874 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 1875 writemask(src0_zdivw, TGSI_WRITEMASK_X), 1876 scalar(src0, TGSI_SWIZZLE_Z), 1877 scalar(src(src0_zdivw), TGSI_SWIZZLE_X) )) 1878 return FALSE; 1879 1880 r_coord = scalar(src(src0_zdivw), TGSI_SWIZZLE_X); 1881 } 1882 else { 1883 r_coord = scalar(src0, TGSI_SWIZZLE_Z); 1884 } 1885 1886 /* Compare texture sample value against R component of texcoord */ 1887 if (!emit_select(emit, 1888 emit->key.tex[unit].compare_func, 1889 writemask( dst2, TGSI_WRITEMASK_XYZ ), 1890 r_coord, 1891 tex_src_x)) 1892 return FALSE; 1893 } 1894 1895 if (dst.mask & TGSI_WRITEMASK_W) { 1896 struct src_register one = get_one_immediate(emit); 1897 1898 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 1899 writemask( dst2, TGSI_WRITEMASK_W ), 1900 one )) 1901 return FALSE; 1902 } 1903 } 1904 1905 if (saturate && !swizzle) { 1906 /* MOV_SAT real_dst, dst */ 1907 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) )) 1908 return FALSE; 1909 } 1910 else if (swizzle) { 1911 /* swizzle from tex_result to dst (handles saturation too, if any) */ 1912 emit_tex_swizzle(emit, 1913 dst, src(tex_result), 1914 emit->key.tex[unit].swizzle_r, 1915 emit->key.tex[unit].swizzle_g, 1916 emit->key.tex[unit].swizzle_b, 1917 emit->key.tex[unit].swizzle_a); 1918 } 1919 1920 return TRUE; 1921} 1922 1923 1924static boolean 1925emit_bgnloop(struct svga_shader_emitter *emit, 1926 const struct tgsi_full_instruction *insn) 1927{ 1928 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP ); 1929 struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 ); 1930 struct src_register const_int = get_loop_const( emit ); 1931 1932 emit->dynamic_branching_level++; 1933 1934 return (emit_instruction( emit, inst ) && 1935 emit_src( emit, loop_reg ) && 1936 emit_src( emit, const_int ) ); 1937} 1938 1939 1940static boolean 1941emit_endloop(struct svga_shader_emitter *emit, 1942 const struct tgsi_full_instruction *insn) 1943{ 1944 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP ); 1945 1946 emit->dynamic_branching_level--; 1947 1948 return emit_instruction( emit, inst ); 1949} 1950 1951 1952/** 1953 * Translate/emit TGSI BREAK (out of loop) instruction. 1954 */ 1955static boolean 1956emit_brk(struct svga_shader_emitter *emit, 1957 const struct tgsi_full_instruction *insn) 1958{ 1959 SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK ); 1960 return emit_instruction( emit, inst ); 1961} 1962 1963 1964/** 1965 * Emit simple instruction which operates on one scalar value (not 1966 * a vector). Ex: LG2, RCP, RSQ. 1967 */ 1968static boolean 1969emit_scalar_op1(struct svga_shader_emitter *emit, 1970 SVGA3dShaderOpCodeType opcode, 1971 const struct tgsi_full_instruction *insn) 1972{ 1973 SVGA3dShaderInstToken inst; 1974 SVGA3dShaderDestToken dst; 1975 struct src_register src; 1976 1977 inst = inst_token( opcode ); 1978 dst = translate_dst_register( emit, insn, 0 ); 1979 src = translate_src_register( emit, &insn->Src[0] ); 1980 src = scalar( src, TGSI_SWIZZLE_X ); 1981 1982 return submit_op1( emit, inst, dst, src ); 1983} 1984 1985 1986/** 1987 * Translate/emit a simple instruction (one which has no special-case 1988 * code) such as ADD, MUL, MIN, MAX. 1989 */ 1990static boolean 1991emit_simple_instruction(struct svga_shader_emitter *emit, 1992 SVGA3dShaderOpCodeType opcode, 1993 const struct tgsi_full_instruction *insn) 1994{ 1995 const struct tgsi_full_src_register *src = insn->Src; 1996 SVGA3dShaderInstToken inst; 1997 SVGA3dShaderDestToken dst; 1998 1999 inst = inst_token( opcode ); 2000 dst = translate_dst_register( emit, insn, 0 ); 2001 2002 switch (insn->Instruction.NumSrcRegs) { 2003 case 0: 2004 return submit_op0( emit, inst, dst ); 2005 case 1: 2006 return submit_op1( emit, inst, dst, 2007 translate_src_register( emit, &src[0] )); 2008 case 2: 2009 return submit_op2( emit, inst, dst, 2010 translate_src_register( emit, &src[0] ), 2011 translate_src_register( emit, &src[1] ) ); 2012 case 3: 2013 return submit_op3( emit, inst, dst, 2014 translate_src_register( emit, &src[0] ), 2015 translate_src_register( emit, &src[1] ), 2016 translate_src_register( emit, &src[2] ) ); 2017 default: 2018 assert(0); 2019 return FALSE; 2020 } 2021} 2022 2023 2024/** 2025 * TGSI_OPCODE_MOVE is only special-cased here to detect the 2026 * svga_fragment_shader::constant_color_output case. 2027 */ 2028static boolean 2029emit_mov(struct svga_shader_emitter *emit, 2030 const struct tgsi_full_instruction *insn) 2031{ 2032 const struct tgsi_full_src_register *src = &insn->Src[0]; 2033 const struct tgsi_full_dst_register *dst = &insn->Dst[0]; 2034 2035 if (emit->unit == PIPE_SHADER_FRAGMENT && 2036 dst->Register.File == TGSI_FILE_OUTPUT && 2037 dst->Register.Index == 0 && 2038 src->Register.File == TGSI_FILE_CONSTANT && 2039 !src->Register.Indirect) { 2040 emit->constant_color_output = TRUE; 2041 } 2042 2043 return emit_simple_instruction(emit, SVGA3DOP_MOV, insn); 2044} 2045 2046 2047/** 2048 * Translate TGSI SQRT instruction 2049 * if src1 == 0 2050 * mov dst, src1 2051 * else 2052 * rsq temp, src1 2053 * rcp dst, temp 2054 * endif 2055 */ 2056static boolean 2057emit_sqrt(struct svga_shader_emitter *emit, 2058 const struct tgsi_full_instruction *insn) 2059{ 2060 const struct src_register src1 = translate_src_register(emit, &insn->Src[0]); 2061 const struct src_register zero = get_zero_immediate(emit); 2062 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); 2063 SVGA3dShaderDestToken temp = get_temp(emit); 2064 SVGA3dShaderInstToken if_token = inst_token(SVGA3DOP_IFC); 2065 boolean ret = TRUE; 2066 2067 if_token.control = SVGA3DOPCOMP_EQ; 2068 2069 if (!(emit_instruction(emit, if_token) && 2070 emit_src(emit, src1) && 2071 emit_src(emit, zero))) { 2072 ret = FALSE; 2073 goto cleanup; 2074 } 2075 2076 if (!submit_op1(emit, 2077 inst_token(SVGA3DOP_MOV), 2078 dst, src1)) { 2079 ret = FALSE; 2080 goto cleanup; 2081 } 2082 2083 if (!emit_instruction(emit, inst_token(SVGA3DOP_ELSE))) { 2084 ret = FALSE; 2085 goto cleanup; 2086 } 2087 2088 if (!submit_op1(emit, 2089 inst_token(SVGA3DOP_RSQ), 2090 temp, src1)) { 2091 ret = FALSE; 2092 goto cleanup; 2093 } 2094 2095 if (!submit_op1(emit, 2096 inst_token(SVGA3DOP_RCP), 2097 dst, src(temp))) { 2098 ret = FALSE; 2099 goto cleanup; 2100 } 2101 2102 if (!emit_instruction(emit, inst_token(SVGA3DOP_ENDIF))) { 2103 ret = FALSE; 2104 goto cleanup; 2105 } 2106 2107cleanup: 2108 release_temp(emit, temp); 2109 2110 return ret; 2111} 2112 2113 2114/** 2115 * Translate/emit TGSI DDX, DDY instructions. 2116 */ 2117static boolean 2118emit_deriv(struct svga_shader_emitter *emit, 2119 const struct tgsi_full_instruction *insn ) 2120{ 2121 if (emit->dynamic_branching_level > 0 && 2122 insn->Src[0].Register.File == TGSI_FILE_TEMPORARY) 2123 { 2124 SVGA3dShaderDestToken dst = 2125 translate_dst_register( emit, insn, 0 ); 2126 2127 /* Deriv opcodes not valid inside dynamic branching, workaround 2128 * by zeroing out the destination. 2129 */ 2130 if (!submit_op1(emit, 2131 inst_token( SVGA3DOP_MOV ), 2132 dst, 2133 get_zero_immediate(emit))) 2134 return FALSE; 2135 2136 return TRUE; 2137 } 2138 else { 2139 SVGA3dShaderOpCodeType opcode; 2140 const struct tgsi_full_src_register *reg = &insn->Src[0]; 2141 SVGA3dShaderInstToken inst; 2142 SVGA3dShaderDestToken dst; 2143 struct src_register src0; 2144 2145 switch (insn->Instruction.Opcode) { 2146 case TGSI_OPCODE_DDX: 2147 opcode = SVGA3DOP_DSX; 2148 break; 2149 case TGSI_OPCODE_DDY: 2150 opcode = SVGA3DOP_DSY; 2151 break; 2152 default: 2153 return FALSE; 2154 } 2155 2156 inst = inst_token( opcode ); 2157 dst = translate_dst_register( emit, insn, 0 ); 2158 src0 = translate_src_register( emit, reg ); 2159 2160 /* We cannot use negate or abs on source to dsx/dsy instruction. 2161 */ 2162 if (reg->Register.Absolute || 2163 reg->Register.Negate) { 2164 SVGA3dShaderDestToken temp = get_temp( emit ); 2165 2166 if (!emit_repl( emit, temp, &src0 )) 2167 return FALSE; 2168 } 2169 2170 return submit_op1( emit, inst, dst, src0 ); 2171 } 2172} 2173 2174 2175/** 2176 * Translate/emit ARL (Address Register Load) instruction. Used to 2177 * move a value into the special 'address' register. Used to implement 2178 * indirect/variable indexing into arrays. 2179 */ 2180static boolean 2181emit_arl(struct svga_shader_emitter *emit, 2182 const struct tgsi_full_instruction *insn) 2183{ 2184 ++emit->current_arl; 2185 if (emit->unit == PIPE_SHADER_FRAGMENT) { 2186 /* MOVA not present in pixel shader instruction set. 2187 * Ignore this instruction altogether since it is 2188 * only used for loop counters -- and for that 2189 * we reference aL directly. 2190 */ 2191 return TRUE; 2192 } 2193 if (svga_arl_needs_adjustment( emit )) { 2194 return emit_fake_arl( emit, insn ); 2195 } else { 2196 /* no need to adjust, just emit straight arl */ 2197 return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn); 2198 } 2199} 2200 2201 2202static boolean 2203emit_pow(struct svga_shader_emitter *emit, 2204 const struct tgsi_full_instruction *insn) 2205{ 2206 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2207 struct src_register src0 = translate_src_register( 2208 emit, &insn->Src[0] ); 2209 struct src_register src1 = translate_src_register( 2210 emit, &insn->Src[1] ); 2211 boolean need_tmp = FALSE; 2212 2213 /* POW can only output to a temporary */ 2214 if (insn->Dst[0].Register.File != TGSI_FILE_TEMPORARY) 2215 need_tmp = TRUE; 2216 2217 /* POW src1 must not be the same register as dst */ 2218 if (alias_src_dst( src1, dst )) 2219 need_tmp = TRUE; 2220 2221 /* it's a scalar op */ 2222 src0 = scalar( src0, TGSI_SWIZZLE_X ); 2223 src1 = scalar( src1, TGSI_SWIZZLE_X ); 2224 2225 if (need_tmp) { 2226 SVGA3dShaderDestToken tmp = 2227 writemask(get_temp( emit ), TGSI_WRITEMASK_X ); 2228 2229 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1)) 2230 return FALSE; 2231 2232 return submit_op1(emit, inst_token( SVGA3DOP_MOV ), 2233 dst, scalar(src(tmp), 0) ); 2234 } 2235 else { 2236 return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1); 2237 } 2238} 2239 2240 2241/** 2242 * Emit a LRP (linear interpolation) instruction. 2243 */ 2244static boolean 2245submit_lrp(struct svga_shader_emitter *emit, 2246 SVGA3dShaderDestToken dst, 2247 struct src_register src0, 2248 struct src_register src1, 2249 struct src_register src2) 2250{ 2251 SVGA3dShaderDestToken tmp; 2252 boolean need_dst_tmp = FALSE; 2253 2254 /* The dst reg must be a temporary, and not be the same as src0 or src2 */ 2255 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || 2256 alias_src_dst(src0, dst) || 2257 alias_src_dst(src2, dst)) 2258 need_dst_tmp = TRUE; 2259 2260 if (need_dst_tmp) { 2261 tmp = get_temp( emit ); 2262 tmp.mask = dst.mask; 2263 } 2264 else { 2265 tmp = dst; 2266 } 2267 2268 if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2)) 2269 return FALSE; 2270 2271 if (need_dst_tmp) { 2272 if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp ))) 2273 return FALSE; 2274 } 2275 2276 return TRUE; 2277} 2278 2279 2280/** 2281 * Translate/emit LRP (Linear Interpolation) instruction. 2282 */ 2283static boolean 2284emit_lrp(struct svga_shader_emitter *emit, 2285 const struct tgsi_full_instruction *insn) 2286{ 2287 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2288 const struct src_register src0 = translate_src_register( 2289 emit, &insn->Src[0] ); 2290 const struct src_register src1 = translate_src_register( 2291 emit, &insn->Src[1] ); 2292 const struct src_register src2 = translate_src_register( 2293 emit, &insn->Src[2] ); 2294 2295 return submit_lrp(emit, dst, src0, src1, src2); 2296} 2297 2298/** 2299 * Translate/emit DST (Distance function) instruction. 2300 */ 2301static boolean 2302emit_dst_insn(struct svga_shader_emitter *emit, 2303 const struct tgsi_full_instruction *insn) 2304{ 2305 if (emit->unit == PIPE_SHADER_VERTEX) { 2306 /* SVGA/DX9 has a DST instruction, but only for vertex shaders: 2307 */ 2308 return emit_simple_instruction(emit, SVGA3DOP_DST, insn); 2309 } 2310 else { 2311 /* result[0] = 1 * 1; 2312 * result[1] = a[1] * b[1]; 2313 * result[2] = a[2] * 1; 2314 * result[3] = 1 * b[3]; 2315 */ 2316 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2317 SVGA3dShaderDestToken tmp; 2318 const struct src_register src0 = translate_src_register( 2319 emit, &insn->Src[0] ); 2320 const struct src_register src1 = translate_src_register( 2321 emit, &insn->Src[1] ); 2322 boolean need_tmp = FALSE; 2323 2324 if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP || 2325 alias_src_dst(src0, dst) || 2326 alias_src_dst(src1, dst)) 2327 need_tmp = TRUE; 2328 2329 if (need_tmp) { 2330 tmp = get_temp( emit ); 2331 } 2332 else { 2333 tmp = dst; 2334 } 2335 2336 /* tmp.xw = 1.0 2337 */ 2338 if (tmp.mask & TGSI_WRITEMASK_XW) { 2339 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2340 writemask(tmp, TGSI_WRITEMASK_XW ), 2341 get_one_immediate(emit))) 2342 return FALSE; 2343 } 2344 2345 /* tmp.yz = src0 2346 */ 2347 if (tmp.mask & TGSI_WRITEMASK_YZ) { 2348 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2349 writemask(tmp, TGSI_WRITEMASK_YZ ), 2350 src0)) 2351 return FALSE; 2352 } 2353 2354 /* tmp.yw = tmp * src1 2355 */ 2356 if (tmp.mask & TGSI_WRITEMASK_YW) { 2357 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 2358 writemask(tmp, TGSI_WRITEMASK_YW ), 2359 src(tmp), 2360 src1)) 2361 return FALSE; 2362 } 2363 2364 /* dst = tmp 2365 */ 2366 if (need_tmp) { 2367 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2368 dst, 2369 src(tmp))) 2370 return FALSE; 2371 } 2372 } 2373 2374 return TRUE; 2375} 2376 2377 2378static boolean 2379emit_exp(struct svga_shader_emitter *emit, 2380 const struct tgsi_full_instruction *insn) 2381{ 2382 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2383 struct src_register src0 = 2384 translate_src_register( emit, &insn->Src[0] ); 2385 SVGA3dShaderDestToken fraction; 2386 2387 if (dst.mask & TGSI_WRITEMASK_Y) 2388 fraction = dst; 2389 else if (dst.mask & TGSI_WRITEMASK_X) 2390 fraction = get_temp( emit ); 2391 else 2392 fraction.value = 0; 2393 2394 /* If y is being written, fill it with src0 - floor(src0). 2395 */ 2396 if (dst.mask & TGSI_WRITEMASK_XY) { 2397 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), 2398 writemask( fraction, TGSI_WRITEMASK_Y ), 2399 src0 )) 2400 return FALSE; 2401 } 2402 2403 /* If x is being written, fill it with 2 ^ floor(src0). 2404 */ 2405 if (dst.mask & TGSI_WRITEMASK_X) { 2406 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), 2407 writemask( dst, TGSI_WRITEMASK_X ), 2408 src0, 2409 scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) ) 2410 return FALSE; 2411 2412 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), 2413 writemask( dst, TGSI_WRITEMASK_X ), 2414 scalar( src( dst ), TGSI_SWIZZLE_X ) ) ) 2415 return FALSE; 2416 2417 if (!(dst.mask & TGSI_WRITEMASK_Y)) 2418 release_temp( emit, fraction ); 2419 } 2420 2421 /* If z is being written, fill it with 2 ^ src0 (partial precision). 2422 */ 2423 if (dst.mask & TGSI_WRITEMASK_Z) { 2424 if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ), 2425 writemask( dst, TGSI_WRITEMASK_Z ), 2426 src0 ) ) 2427 return FALSE; 2428 } 2429 2430 /* If w is being written, fill it with one. 2431 */ 2432 if (dst.mask & TGSI_WRITEMASK_W) { 2433 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2434 writemask(dst, TGSI_WRITEMASK_W), 2435 get_one_immediate(emit))) 2436 return FALSE; 2437 } 2438 2439 return TRUE; 2440} 2441 2442 2443/** 2444 * Translate/emit LIT (Lighting helper) instruction. 2445 */ 2446static boolean 2447emit_lit(struct svga_shader_emitter *emit, 2448 const struct tgsi_full_instruction *insn) 2449{ 2450 if (emit->unit == PIPE_SHADER_VERTEX) { 2451 /* SVGA/DX9 has a LIT instruction, but only for vertex shaders: 2452 */ 2453 return emit_simple_instruction(emit, SVGA3DOP_LIT, insn); 2454 } 2455 else { 2456 /* D3D vs. GL semantics can be fairly easily accommodated by 2457 * variations on this sequence. 2458 * 2459 * GL: 2460 * tmp.y = src.x 2461 * tmp.z = pow(src.y,src.w) 2462 * p0 = src0.xxxx > 0 2463 * result = zero.wxxw 2464 * (p0) result.yz = tmp 2465 * 2466 * D3D: 2467 * tmp.y = src.x 2468 * tmp.z = pow(src.y,src.w) 2469 * p0 = src0.xxyy > 0 2470 * result = zero.wxxw 2471 * (p0) result.yz = tmp 2472 * 2473 * Will implement the GL version for now. 2474 */ 2475 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2476 SVGA3dShaderDestToken tmp = get_temp( emit ); 2477 const struct src_register src0 = translate_src_register( 2478 emit, &insn->Src[0] ); 2479 2480 /* tmp = pow(src.y, src.w) 2481 */ 2482 if (dst.mask & TGSI_WRITEMASK_Z) { 2483 if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), 2484 tmp, 2485 scalar(src0, 1), 2486 scalar(src0, 3))) 2487 return FALSE; 2488 } 2489 2490 /* tmp.y = src.x 2491 */ 2492 if (dst.mask & TGSI_WRITEMASK_Y) { 2493 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2494 writemask(tmp, TGSI_WRITEMASK_Y ), 2495 scalar(src0, 0))) 2496 return FALSE; 2497 } 2498 2499 /* Can't quite do this with emit conditional due to the extra 2500 * writemask on the predicated mov: 2501 */ 2502 { 2503 SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 ); 2504 struct src_register predsrc; 2505 2506 /* D3D vs GL semantics: 2507 */ 2508 if (0) 2509 predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */ 2510 else 2511 predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */ 2512 2513 /* SETP src0.xxyy, GT, {0}.x */ 2514 if (!submit_op2( emit, 2515 inst_token_setp(SVGA3DOPCOMP_GT), 2516 pred_reg, 2517 predsrc, 2518 get_zero_immediate(emit))) 2519 return FALSE; 2520 2521 /* MOV dst, fail */ 2522 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, 2523 get_immediate(emit, 1.0f, 0.0f, 0.0f, 1.0f))) 2524 return FALSE; 2525 2526 /* MOV dst.yz, tmp (predicated) 2527 * 2528 * Note that the predicate reg (and possible modifiers) is passed 2529 * as the first source argument. 2530 */ 2531 if (dst.mask & TGSI_WRITEMASK_YZ) { 2532 if (!submit_op2( emit, 2533 inst_token_predicated(SVGA3DOP_MOV), 2534 writemask(dst, TGSI_WRITEMASK_YZ), 2535 src( pred_reg ), src( tmp ) )) 2536 return FALSE; 2537 } 2538 } 2539 } 2540 2541 return TRUE; 2542} 2543 2544 2545static boolean 2546emit_ex2(struct svga_shader_emitter *emit, 2547 const struct tgsi_full_instruction *insn) 2548{ 2549 SVGA3dShaderInstToken inst; 2550 SVGA3dShaderDestToken dst; 2551 struct src_register src0; 2552 2553 inst = inst_token( SVGA3DOP_EXP ); 2554 dst = translate_dst_register( emit, insn, 0 ); 2555 src0 = translate_src_register( emit, &insn->Src[0] ); 2556 src0 = scalar( src0, TGSI_SWIZZLE_X ); 2557 2558 if (dst.mask != TGSI_WRITEMASK_XYZW) { 2559 SVGA3dShaderDestToken tmp = get_temp( emit ); 2560 2561 if (!submit_op1( emit, inst, tmp, src0 )) 2562 return FALSE; 2563 2564 return submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2565 dst, 2566 scalar( src( tmp ), TGSI_SWIZZLE_X ) ); 2567 } 2568 2569 return submit_op1( emit, inst, dst, src0 ); 2570} 2571 2572 2573static boolean 2574emit_log(struct svga_shader_emitter *emit, 2575 const struct tgsi_full_instruction *insn) 2576{ 2577 SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 ); 2578 struct src_register src0 = 2579 translate_src_register( emit, &insn->Src[0] ); 2580 SVGA3dShaderDestToken abs_tmp; 2581 struct src_register abs_src0; 2582 SVGA3dShaderDestToken log2_abs; 2583 2584 abs_tmp.value = 0; 2585 2586 if (dst.mask & TGSI_WRITEMASK_Z) 2587 log2_abs = dst; 2588 else if (dst.mask & TGSI_WRITEMASK_XY) 2589 log2_abs = get_temp( emit ); 2590 else 2591 log2_abs.value = 0; 2592 2593 /* If z is being written, fill it with log2( abs( src0 ) ). 2594 */ 2595 if (dst.mask & TGSI_WRITEMASK_XYZ) { 2596 if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS) 2597 abs_src0 = src0; 2598 else { 2599 abs_tmp = get_temp( emit ); 2600 2601 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2602 abs_tmp, 2603 src0 ) ) 2604 return FALSE; 2605 2606 abs_src0 = src( abs_tmp ); 2607 } 2608 2609 abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) ); 2610 2611 if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ), 2612 writemask( log2_abs, TGSI_WRITEMASK_Z ), 2613 abs_src0 ) ) 2614 return FALSE; 2615 } 2616 2617 if (dst.mask & TGSI_WRITEMASK_XY) { 2618 SVGA3dShaderDestToken floor_log2; 2619 2620 if (dst.mask & TGSI_WRITEMASK_X) 2621 floor_log2 = dst; 2622 else 2623 floor_log2 = get_temp( emit ); 2624 2625 /* If x is being written, fill it with floor( log2( abs( src0 ) ) ). 2626 */ 2627 if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), 2628 writemask( floor_log2, TGSI_WRITEMASK_X ), 2629 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) ) 2630 return FALSE; 2631 2632 if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), 2633 writemask( floor_log2, TGSI_WRITEMASK_X ), 2634 scalar( src( log2_abs ), TGSI_SWIZZLE_Z ), 2635 negate( src( floor_log2 ) ) ) ) 2636 return FALSE; 2637 2638 /* If y is being written, fill it with 2639 * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ). 2640 */ 2641 if (dst.mask & TGSI_WRITEMASK_Y) { 2642 if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ), 2643 writemask( dst, TGSI_WRITEMASK_Y ), 2644 negate( scalar( src( floor_log2 ), 2645 TGSI_SWIZZLE_X ) ) ) ) 2646 return FALSE; 2647 2648 if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 2649 writemask( dst, TGSI_WRITEMASK_Y ), 2650 src( dst ), 2651 abs_src0 ) ) 2652 return FALSE; 2653 } 2654 2655 if (!(dst.mask & TGSI_WRITEMASK_X)) 2656 release_temp( emit, floor_log2 ); 2657 2658 if (!(dst.mask & TGSI_WRITEMASK_Z)) 2659 release_temp( emit, log2_abs ); 2660 } 2661 2662 if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod && 2663 src0.base.srcMod != SVGA3DSRCMOD_ABS) 2664 release_temp( emit, abs_tmp ); 2665 2666 /* If w is being written, fill it with one. 2667 */ 2668 if (dst.mask & TGSI_WRITEMASK_W) { 2669 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 2670 writemask(dst, TGSI_WRITEMASK_W), 2671 get_one_immediate(emit))) 2672 return FALSE; 2673 } 2674 2675 return TRUE; 2676} 2677 2678 2679/** 2680 * Translate TGSI TRUNC or ROUND instruction. 2681 * We need to truncate toward zero. Ex: trunc(-1.9) = -1 2682 * Different approaches are needed for VS versus PS. 2683 */ 2684static boolean 2685emit_trunc_round(struct svga_shader_emitter *emit, 2686 const struct tgsi_full_instruction *insn, 2687 boolean round) 2688{ 2689 SVGA3dShaderDestToken dst = translate_dst_register(emit, insn, 0); 2690 const struct src_register src0 = 2691 translate_src_register(emit, &insn->Src[0] ); 2692 SVGA3dShaderDestToken t1 = get_temp(emit); 2693 2694 if (round) { 2695 SVGA3dShaderDestToken t0 = get_temp(emit); 2696 struct src_register half = get_half_immediate(emit); 2697 2698 /* t0 = abs(src0) + 0.5 */ 2699 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t0, 2700 absolute(src0), half)) 2701 return FALSE; 2702 2703 /* t1 = fract(t0) */ 2704 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, src(t0))) 2705 return FALSE; 2706 2707 /* t1 = t0 - t1 */ 2708 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, src(t0), 2709 negate(src(t1)))) 2710 return FALSE; 2711 } 2712 else { 2713 /* trunc */ 2714 2715 /* t1 = fract(abs(src0)) */ 2716 if (!submit_op1(emit, inst_token(SVGA3DOP_FRC), t1, absolute(src0))) 2717 return FALSE; 2718 2719 /* t1 = abs(src0) - t1 */ 2720 if (!submit_op2(emit, inst_token(SVGA3DOP_ADD), t1, absolute(src0), 2721 negate(src(t1)))) 2722 return FALSE; 2723 } 2724 2725 /* 2726 * Now we need to multiply t1 by the sign of the original value. 2727 */ 2728 if (emit->unit == PIPE_SHADER_VERTEX) { 2729 /* For VS: use SGN instruction */ 2730 /* Need two extra/dummy registers: */ 2731 SVGA3dShaderDestToken t2 = get_temp(emit), t3 = get_temp(emit), 2732 t4 = get_temp(emit); 2733 2734 /* t2 = sign(src0) */ 2735 if (!submit_op3(emit, inst_token(SVGA3DOP_SGN), t2, src0, 2736 src(t3), src(t4))) 2737 return FALSE; 2738 2739 /* dst = t1 * t2 */ 2740 if (!submit_op2(emit, inst_token(SVGA3DOP_MUL), dst, src(t1), src(t2))) 2741 return FALSE; 2742 } 2743 else { 2744 /* For FS: Use CMP instruction */ 2745 return submit_op3(emit, inst_token( SVGA3DOP_CMP ), dst, 2746 src0, src(t1), negate(src(t1))); 2747 } 2748 2749 return TRUE; 2750} 2751 2752 2753/** 2754 * Translate/emit "begin subroutine" instruction/marker/label. 2755 */ 2756static boolean 2757emit_bgnsub(struct svga_shader_emitter *emit, 2758 unsigned position, 2759 const struct tgsi_full_instruction *insn) 2760{ 2761 unsigned i; 2762 2763 /* Note that we've finished the main function and are now emitting 2764 * subroutines. This affects how we terminate the generated 2765 * shader. 2766 */ 2767 emit->in_main_func = FALSE; 2768 2769 for (i = 0; i < emit->nr_labels; i++) { 2770 if (emit->label[i] == position) { 2771 return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) && 2772 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) && 2773 emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); 2774 } 2775 } 2776 2777 assert(0); 2778 return TRUE; 2779} 2780 2781 2782/** 2783 * Translate/emit subroutine call instruction. 2784 */ 2785static boolean 2786emit_call(struct svga_shader_emitter *emit, 2787 const struct tgsi_full_instruction *insn) 2788{ 2789 unsigned position = insn->Label.Label; 2790 unsigned i; 2791 2792 for (i = 0; i < emit->nr_labels; i++) { 2793 if (emit->label[i] == position) 2794 break; 2795 } 2796 2797 if (emit->nr_labels == ARRAY_SIZE(emit->label)) 2798 return FALSE; 2799 2800 if (i == emit->nr_labels) { 2801 emit->label[i] = position; 2802 emit->nr_labels++; 2803 } 2804 2805 return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) && 2806 emit_src( emit, src_register( SVGA3DREG_LABEL, i ))); 2807} 2808 2809 2810/** 2811 * Called at the end of the shader. Actually, emit special "fix-up" 2812 * code for the vertex/fragment shader. 2813 */ 2814static boolean 2815emit_end(struct svga_shader_emitter *emit) 2816{ 2817 if (emit->unit == PIPE_SHADER_VERTEX) { 2818 return emit_vs_postamble( emit ); 2819 } 2820 else { 2821 return emit_ps_postamble( emit ); 2822 } 2823} 2824 2825 2826/** 2827 * Translate any TGSI instruction to SVGA. 2828 */ 2829static boolean 2830svga_emit_instruction(struct svga_shader_emitter *emit, 2831 unsigned position, 2832 const struct tgsi_full_instruction *insn) 2833{ 2834 switch (insn->Instruction.Opcode) { 2835 2836 case TGSI_OPCODE_ARL: 2837 return emit_arl( emit, insn ); 2838 2839 case TGSI_OPCODE_TEX: 2840 case TGSI_OPCODE_TXB: 2841 case TGSI_OPCODE_TXP: 2842 case TGSI_OPCODE_TXL: 2843 case TGSI_OPCODE_TXD: 2844 return emit_tex( emit, insn ); 2845 2846 case TGSI_OPCODE_DDX: 2847 case TGSI_OPCODE_DDY: 2848 return emit_deriv( emit, insn ); 2849 2850 case TGSI_OPCODE_BGNSUB: 2851 return emit_bgnsub( emit, position, insn ); 2852 2853 case TGSI_OPCODE_ENDSUB: 2854 return TRUE; 2855 2856 case TGSI_OPCODE_CAL: 2857 return emit_call( emit, insn ); 2858 2859 case TGSI_OPCODE_FLR: 2860 return emit_floor( emit, insn ); 2861 2862 case TGSI_OPCODE_TRUNC: 2863 return emit_trunc_round( emit, insn, FALSE ); 2864 2865 case TGSI_OPCODE_ROUND: 2866 return emit_trunc_round( emit, insn, TRUE ); 2867 2868 case TGSI_OPCODE_CEIL: 2869 return emit_ceil( emit, insn ); 2870 2871 case TGSI_OPCODE_CMP: 2872 return emit_cmp( emit, insn ); 2873 2874 case TGSI_OPCODE_DIV: 2875 return emit_div( emit, insn ); 2876 2877 case TGSI_OPCODE_DP2: 2878 return emit_dp2( emit, insn ); 2879 2880 case TGSI_OPCODE_COS: 2881 return emit_cos( emit, insn ); 2882 2883 case TGSI_OPCODE_SIN: 2884 return emit_sin( emit, insn ); 2885 2886 case TGSI_OPCODE_END: 2887 /* TGSI always finishes the main func with an END */ 2888 return emit_end( emit ); 2889 2890 case TGSI_OPCODE_KILL_IF: 2891 return emit_cond_discard( emit, insn ); 2892 2893 /* Selection opcodes. The underlying language is fairly 2894 * non-orthogonal about these. 2895 */ 2896 case TGSI_OPCODE_SEQ: 2897 return emit_select_op( emit, PIPE_FUNC_EQUAL, insn ); 2898 2899 case TGSI_OPCODE_SNE: 2900 return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn ); 2901 2902 case TGSI_OPCODE_SGT: 2903 return emit_select_op( emit, PIPE_FUNC_GREATER, insn ); 2904 2905 case TGSI_OPCODE_SGE: 2906 return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn ); 2907 2908 case TGSI_OPCODE_SLT: 2909 return emit_select_op( emit, PIPE_FUNC_LESS, insn ); 2910 2911 case TGSI_OPCODE_SLE: 2912 return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn ); 2913 2914 case TGSI_OPCODE_POW: 2915 return emit_pow( emit, insn ); 2916 2917 case TGSI_OPCODE_EX2: 2918 return emit_ex2( emit, insn ); 2919 2920 case TGSI_OPCODE_EXP: 2921 return emit_exp( emit, insn ); 2922 2923 case TGSI_OPCODE_LOG: 2924 return emit_log( emit, insn ); 2925 2926 case TGSI_OPCODE_LG2: 2927 return emit_scalar_op1( emit, SVGA3DOP_LOG, insn ); 2928 2929 case TGSI_OPCODE_RSQ: 2930 return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn ); 2931 2932 case TGSI_OPCODE_RCP: 2933 return emit_scalar_op1( emit, SVGA3DOP_RCP, insn ); 2934 2935 case TGSI_OPCODE_CONT: 2936 /* not expected (we return PIPE_SHADER_CAP_CONT_SUPPORTED = 0) */ 2937 return FALSE; 2938 2939 case TGSI_OPCODE_RET: 2940 /* This is a noop -- we tell mesa that we can't support RET 2941 * within a function (early return), so this will always be 2942 * followed by an ENDSUB. 2943 */ 2944 return TRUE; 2945 2946 /* These aren't actually used by any of the frontends we care 2947 * about: 2948 */ 2949 case TGSI_OPCODE_AND: 2950 case TGSI_OPCODE_OR: 2951 case TGSI_OPCODE_I2F: 2952 case TGSI_OPCODE_NOT: 2953 case TGSI_OPCODE_SHL: 2954 case TGSI_OPCODE_ISHR: 2955 case TGSI_OPCODE_XOR: 2956 return FALSE; 2957 2958 case TGSI_OPCODE_IF: 2959 return emit_if( emit, insn ); 2960 case TGSI_OPCODE_ELSE: 2961 return emit_else( emit, insn ); 2962 case TGSI_OPCODE_ENDIF: 2963 return emit_endif( emit, insn ); 2964 2965 case TGSI_OPCODE_BGNLOOP: 2966 return emit_bgnloop( emit, insn ); 2967 case TGSI_OPCODE_ENDLOOP: 2968 return emit_endloop( emit, insn ); 2969 case TGSI_OPCODE_BRK: 2970 return emit_brk( emit, insn ); 2971 2972 case TGSI_OPCODE_KILL: 2973 return emit_discard( emit, insn ); 2974 2975 case TGSI_OPCODE_DST: 2976 return emit_dst_insn( emit, insn ); 2977 2978 case TGSI_OPCODE_LIT: 2979 return emit_lit( emit, insn ); 2980 2981 case TGSI_OPCODE_LRP: 2982 return emit_lrp( emit, insn ); 2983 2984 case TGSI_OPCODE_SSG: 2985 return emit_ssg( emit, insn ); 2986 2987 case TGSI_OPCODE_MOV: 2988 return emit_mov( emit, insn ); 2989 2990 case TGSI_OPCODE_SQRT: 2991 return emit_sqrt( emit, insn ); 2992 2993 default: 2994 { 2995 SVGA3dShaderOpCodeType opcode = 2996 translate_opcode(insn->Instruction.Opcode); 2997 2998 if (opcode == SVGA3DOP_LAST_INST) 2999 return FALSE; 3000 3001 if (!emit_simple_instruction( emit, opcode, insn )) 3002 return FALSE; 3003 } 3004 } 3005 3006 return TRUE; 3007} 3008 3009 3010/** 3011 * Translate/emit a TGSI IMMEDIATE declaration. 3012 * An immediate vector is a constant that's hard-coded into the shader. 3013 */ 3014static boolean 3015svga_emit_immediate(struct svga_shader_emitter *emit, 3016 const struct tgsi_full_immediate *imm) 3017{ 3018 static const float id[4] = {0,0,0,1}; 3019 float value[4]; 3020 unsigned i; 3021 3022 assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5); 3023 for (i = 0; i < 4 && i < imm->Immediate.NrTokens - 1; i++) { 3024 float f = imm->u[i].Float; 3025 value[i] = util_is_inf_or_nan(f) ? 0.0f : f; 3026 } 3027 3028 /* If the immediate has less than four values, fill in the remaining 3029 * positions from id={0,0,0,1}. 3030 */ 3031 for ( ; i < 4; i++ ) 3032 value[i] = id[i]; 3033 3034 return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 3035 emit->imm_start + emit->internal_imm_count++, 3036 value[0], value[1], value[2], value[3]); 3037} 3038 3039 3040static boolean 3041make_immediate(struct svga_shader_emitter *emit, 3042 float a, float b, float c, float d, 3043 struct src_register *out ) 3044{ 3045 unsigned idx = emit->nr_hw_float_const++; 3046 3047 if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, 3048 idx, a, b, c, d )) 3049 return FALSE; 3050 3051 *out = src_register( SVGA3DREG_CONST, idx ); 3052 3053 return TRUE; 3054} 3055 3056 3057/** 3058 * Emit special VS instructions at top of shader. 3059 */ 3060static boolean 3061emit_vs_preamble(struct svga_shader_emitter *emit) 3062{ 3063 if (!emit->key.vs.need_prescale) { 3064 if (!make_immediate( emit, 0, 0, .5, .5, 3065 &emit->imm_0055)) 3066 return FALSE; 3067 } 3068 3069 return TRUE; 3070} 3071 3072 3073/** 3074 * Emit special PS instructions at top of shader. 3075 */ 3076static boolean 3077emit_ps_preamble(struct svga_shader_emitter *emit) 3078{ 3079 if (emit->ps_reads_pos && emit->info.reads_z) { 3080 /* 3081 * Assemble the position from various bits of inputs. Depth and W are 3082 * passed in a texcoord this is due to D3D's vPos not hold Z or W. 3083 * Also fixup the perspective interpolation. 3084 * 3085 * temp_pos.xy = vPos.xy 3086 * temp_pos.w = rcp(texcoord1.w); 3087 * temp_pos.z = texcoord1.z * temp_pos.w; 3088 */ 3089 if (!submit_op1( emit, 3090 inst_token(SVGA3DOP_MOV), 3091 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_XY ), 3092 emit->ps_true_pos )) 3093 return FALSE; 3094 3095 if (!submit_op1( emit, 3096 inst_token(SVGA3DOP_RCP), 3097 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_W ), 3098 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_W ) )) 3099 return FALSE; 3100 3101 if (!submit_op2( emit, 3102 inst_token(SVGA3DOP_MUL), 3103 writemask( emit->ps_temp_pos, TGSI_WRITEMASK_Z ), 3104 scalar( emit->ps_depth_pos, TGSI_SWIZZLE_Z ), 3105 scalar( src(emit->ps_temp_pos), TGSI_SWIZZLE_W ) )) 3106 return FALSE; 3107 } 3108 3109 return TRUE; 3110} 3111 3112 3113/** 3114 * Emit special PS instructions at end of shader. 3115 */ 3116static boolean 3117emit_ps_postamble(struct svga_shader_emitter *emit) 3118{ 3119 unsigned i; 3120 3121 /* PS oDepth is incredibly fragile and it's very hard to catch the 3122 * types of usage that break it during shader emit. Easier just to 3123 * redirect the main program to a temporary and then only touch 3124 * oDepth with a hand-crafted MOV below. 3125 */ 3126 if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) { 3127 if (!submit_op1( emit, 3128 inst_token(SVGA3DOP_MOV), 3129 emit->true_pos, 3130 scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) )) 3131 return FALSE; 3132 } 3133 3134 for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { 3135 if (SVGA3dShaderGetRegType(emit->true_color_output[i].value) != 0) { 3136 /* Potentially override output colors with white for XOR 3137 * logicop workaround. 3138 */ 3139 if (emit->unit == PIPE_SHADER_FRAGMENT && 3140 emit->key.fs.white_fragments) { 3141 struct src_register one = get_one_immediate(emit); 3142 3143 if (!submit_op1( emit, 3144 inst_token(SVGA3DOP_MOV), 3145 emit->true_color_output[i], 3146 one )) 3147 return FALSE; 3148 } 3149 else if (emit->unit == PIPE_SHADER_FRAGMENT && 3150 i < emit->key.fs.write_color0_to_n_cbufs) { 3151 /* Write temp color output [0] to true output [i] */ 3152 if (!submit_op1(emit, inst_token(SVGA3DOP_MOV), 3153 emit->true_color_output[i], 3154 src(emit->temp_color_output[0]))) { 3155 return FALSE; 3156 } 3157 } 3158 else { 3159 if (!submit_op1( emit, 3160 inst_token(SVGA3DOP_MOV), 3161 emit->true_color_output[i], 3162 src(emit->temp_color_output[i]) )) 3163 return FALSE; 3164 } 3165 } 3166 } 3167 3168 return TRUE; 3169} 3170 3171 3172/** 3173 * Emit special VS instructions at end of shader. 3174 */ 3175static boolean 3176emit_vs_postamble(struct svga_shader_emitter *emit) 3177{ 3178 /* PSIZ output is incredibly fragile and it's very hard to catch 3179 * the types of usage that break it during shader emit. Easier 3180 * just to redirect the main program to a temporary and then only 3181 * touch PSIZ with a hand-crafted MOV below. 3182 */ 3183 if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) { 3184 if (!submit_op1( emit, 3185 inst_token(SVGA3DOP_MOV), 3186 emit->true_psiz, 3187 scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) )) 3188 return FALSE; 3189 } 3190 3191 /* Need to perform various manipulations on vertex position to cope 3192 * with the different GL and D3D clip spaces. 3193 */ 3194 if (emit->key.vs.need_prescale) { 3195 SVGA3dShaderDestToken temp_pos = emit->temp_pos; 3196 SVGA3dShaderDestToken depth = emit->depth_pos; 3197 SVGA3dShaderDestToken pos = emit->true_pos; 3198 unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1; 3199 struct src_register prescale_scale = src_register( SVGA3DREG_CONST, 3200 offset + 0 ); 3201 struct src_register prescale_trans = src_register( SVGA3DREG_CONST, 3202 offset + 1 ); 3203 3204 if (!submit_op1( emit, 3205 inst_token(SVGA3DOP_MOV), 3206 writemask(depth, TGSI_WRITEMASK_W), 3207 scalar(src(temp_pos), TGSI_SWIZZLE_W) )) 3208 return FALSE; 3209 3210 /* MUL temp_pos.xyz, temp_pos, prescale.scale 3211 * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos 3212 * --> Note that prescale.trans.w == 0 3213 */ 3214 if (!submit_op2( emit, 3215 inst_token(SVGA3DOP_MUL), 3216 writemask(temp_pos, TGSI_WRITEMASK_XYZ), 3217 src(temp_pos), 3218 prescale_scale )) 3219 return FALSE; 3220 3221 if (!submit_op3( emit, 3222 inst_token(SVGA3DOP_MAD), 3223 pos, 3224 swizzle(src(temp_pos), 3, 3, 3, 3), 3225 prescale_trans, 3226 src(temp_pos))) 3227 return FALSE; 3228 3229 /* Also write to depth value */ 3230 if (!submit_op3( emit, 3231 inst_token(SVGA3DOP_MAD), 3232 writemask(depth, TGSI_WRITEMASK_Z), 3233 swizzle(src(temp_pos), 3, 3, 3, 3), 3234 prescale_trans, 3235 src(temp_pos) )) 3236 return FALSE; 3237 } 3238 else { 3239 SVGA3dShaderDestToken temp_pos = emit->temp_pos; 3240 SVGA3dShaderDestToken depth = emit->depth_pos; 3241 SVGA3dShaderDestToken pos = emit->true_pos; 3242 struct src_register imm_0055 = emit->imm_0055; 3243 3244 /* Adjust GL clipping coordinate space to hardware (D3D-style): 3245 * 3246 * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos 3247 * MOV result.position, temp_pos 3248 */ 3249 if (!submit_op2( emit, 3250 inst_token(SVGA3DOP_DP4), 3251 writemask(temp_pos, TGSI_WRITEMASK_Z), 3252 imm_0055, 3253 src(temp_pos) )) 3254 return FALSE; 3255 3256 if (!submit_op1( emit, 3257 inst_token(SVGA3DOP_MOV), 3258 pos, 3259 src(temp_pos) )) 3260 return FALSE; 3261 3262 /* Move the manipulated depth into the extra texcoord reg */ 3263 if (!submit_op1( emit, 3264 inst_token(SVGA3DOP_MOV), 3265 writemask(depth, TGSI_WRITEMASK_ZW), 3266 src(temp_pos) )) 3267 return FALSE; 3268 } 3269 3270 return TRUE; 3271} 3272 3273 3274/** 3275 * For the pixel shader: emit the code which chooses the front 3276 * or back face color depending on triangle orientation. 3277 * This happens at the top of the fragment shader. 3278 * 3279 * 0: IF VFACE :4 3280 * 1: COLOR = FrontColor; 3281 * 2: ELSE 3282 * 3: COLOR = BackColor; 3283 * 4: ENDIF 3284 */ 3285static boolean 3286emit_light_twoside(struct svga_shader_emitter *emit) 3287{ 3288 struct src_register vface, zero; 3289 struct src_register front[2]; 3290 struct src_register back[2]; 3291 SVGA3dShaderDestToken color[2]; 3292 int count = emit->internal_color_count; 3293 unsigned i; 3294 SVGA3dShaderInstToken if_token; 3295 3296 if (count == 0) 3297 return TRUE; 3298 3299 vface = get_vface( emit ); 3300 zero = get_zero_immediate(emit); 3301 3302 /* Can't use get_temp() to allocate the color reg as such 3303 * temporaries will be reclaimed after each instruction by the call 3304 * to reset_temp_regs(). 3305 */ 3306 for (i = 0; i < count; i++) { 3307 color[i] = dst_register( SVGA3DREG_TEMP, emit->nr_hw_temp++ ); 3308 front[i] = emit->input_map[emit->internal_color_idx[i]]; 3309 3310 /* Back is always the next input: 3311 */ 3312 back[i] = front[i]; 3313 back[i].base.num = front[i].base.num + 1; 3314 3315 /* Reassign the input_map to the actual front-face color: 3316 */ 3317 emit->input_map[emit->internal_color_idx[i]] = src(color[i]); 3318 } 3319 3320 if_token = inst_token( SVGA3DOP_IFC ); 3321 3322 if (emit->key.fs.front_ccw) 3323 if_token.control = SVGA3DOPCOMP_LT; 3324 else 3325 if_token.control = SVGA3DOPCOMP_GT; 3326 3327 if (!(emit_instruction( emit, if_token ) && 3328 emit_src( emit, vface ) && 3329 emit_src( emit, zero ) )) 3330 return FALSE; 3331 3332 for (i = 0; i < count; i++) { 3333 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] )) 3334 return FALSE; 3335 } 3336 3337 if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE)))) 3338 return FALSE; 3339 3340 for (i = 0; i < count; i++) { 3341 if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] )) 3342 return FALSE; 3343 } 3344 3345 if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) )) 3346 return FALSE; 3347 3348 return TRUE; 3349} 3350 3351 3352/** 3353 * Emit special setup code for the front/back face register in the FS. 3354 * 0: SETP_GT TEMP, VFACE, 0 3355 * where TEMP is a fake frontface register 3356 */ 3357static boolean 3358emit_frontface(struct svga_shader_emitter *emit) 3359{ 3360 struct src_register vface; 3361 SVGA3dShaderDestToken temp; 3362 struct src_register pass, fail; 3363 3364 vface = get_vface( emit ); 3365 3366 /* Can't use get_temp() to allocate the fake frontface reg as such 3367 * temporaries will be reclaimed after each instruction by the call 3368 * to reset_temp_regs(). 3369 */ 3370 temp = dst_register( SVGA3DREG_TEMP, 3371 emit->nr_hw_temp++ ); 3372 3373 if (emit->key.fs.front_ccw) { 3374 pass = get_zero_immediate(emit); 3375 fail = get_one_immediate(emit); 3376 } else { 3377 pass = get_one_immediate(emit); 3378 fail = get_zero_immediate(emit); 3379 } 3380 3381 if (!emit_conditional(emit, PIPE_FUNC_GREATER, 3382 temp, vface, get_zero_immediate(emit), 3383 pass, fail)) 3384 return FALSE; 3385 3386 /* Reassign the input_map to the actual front-face color: 3387 */ 3388 emit->input_map[emit->internal_frontface_idx] = src(temp); 3389 3390 return TRUE; 3391} 3392 3393 3394/** 3395 * Emit code to invert the T component of the incoming texture coordinate. 3396 * This is used for drawing point sprites when 3397 * pipe_rasterizer_state::sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT. 3398 */ 3399static boolean 3400emit_inverted_texcoords(struct svga_shader_emitter *emit) 3401{ 3402 unsigned inverted_texcoords = emit->inverted_texcoords; 3403 3404 while (inverted_texcoords) { 3405 const unsigned unit = ffs(inverted_texcoords) - 1; 3406 3407 assert(emit->inverted_texcoords & (1 << unit)); 3408 3409 assert(unit < ARRAY_SIZE(emit->ps_true_texcoord)); 3410 3411 assert(unit < ARRAY_SIZE(emit->ps_inverted_texcoord_input)); 3412 3413 assert(emit->ps_inverted_texcoord_input[unit] 3414 < ARRAY_SIZE(emit->input_map)); 3415 3416 /* inverted = coord * (1, -1, 1, 1) + (0, 1, 0, 0) */ 3417 if (!submit_op3(emit, 3418 inst_token(SVGA3DOP_MAD), 3419 dst(emit->ps_inverted_texcoord[unit]), 3420 emit->ps_true_texcoord[unit], 3421 get_immediate(emit, 1.0f, -1.0f, 1.0f, 1.0f), 3422 get_immediate(emit, 0.0f, 1.0f, 0.0f, 0.0f))) 3423 return FALSE; 3424 3425 /* Reassign the input_map entry to the new texcoord register */ 3426 emit->input_map[emit->ps_inverted_texcoord_input[unit]] = 3427 emit->ps_inverted_texcoord[unit]; 3428 3429 inverted_texcoords &= ~(1 << unit); 3430 } 3431 3432 return TRUE; 3433} 3434 3435 3436/** 3437 * Emit code to adjust vertex shader inputs/attributes: 3438 * - Change range from [0,1] to [-1,1] (for normalized byte/short attribs). 3439 * - Set attrib W component = 1. 3440 */ 3441static boolean 3442emit_adjusted_vertex_attribs(struct svga_shader_emitter *emit) 3443{ 3444 unsigned adjust_mask = (emit->key.vs.adjust_attrib_range | 3445 emit->key.vs.adjust_attrib_w_1); 3446 3447 while (adjust_mask) { 3448 /* Adjust vertex attrib range and/or set W component = 1 */ 3449 const unsigned index = u_bit_scan(&adjust_mask); 3450 struct src_register tmp; 3451 3452 /* allocate a temp reg */ 3453 tmp = src_register(SVGA3DREG_TEMP, emit->nr_hw_temp); 3454 emit->nr_hw_temp++; 3455 3456 if (emit->key.vs.adjust_attrib_range & (1 << index)) { 3457 /* The vertex input/attribute is supposed to be a signed value in 3458 * the range [-1,1] but we actually fetched/converted it to the 3459 * range [0,1]. This most likely happens when the app specifies a 3460 * signed byte attribute but we interpreted it as unsigned bytes. 3461 * See also svga_translate_vertex_format(). 3462 * 3463 * Here, we emit some extra instructions to adjust 3464 * the attribute values from [0,1] to [-1,1]. 3465 * 3466 * The adjustment we implement is: 3467 * new_attrib = attrib * 2.0; 3468 * if (attrib >= 0.5) 3469 * new_attrib = new_attrib - 2.0; 3470 * This isn't exactly right (it's off by a bit or so) but close enough. 3471 */ 3472 SVGA3dShaderDestToken pred_reg = dst_register(SVGA3DREG_PREDICATE, 0); 3473 3474 /* tmp = attrib * 2.0 */ 3475 if (!submit_op2(emit, 3476 inst_token(SVGA3DOP_MUL), 3477 dst(tmp), 3478 emit->input_map[index], 3479 get_two_immediate(emit))) 3480 return FALSE; 3481 3482 /* pred = (attrib >= 0.5) */ 3483 if (!submit_op2(emit, 3484 inst_token_setp(SVGA3DOPCOMP_GE), 3485 pred_reg, 3486 emit->input_map[index], /* vert attrib */ 3487 get_half_immediate(emit))) /* 0.5 */ 3488 return FALSE; 3489 3490 /* sub(pred) tmp, tmp, 2.0 */ 3491 if (!submit_op3(emit, 3492 inst_token_predicated(SVGA3DOP_SUB), 3493 dst(tmp), 3494 src(pred_reg), 3495 tmp, 3496 get_two_immediate(emit))) 3497 return FALSE; 3498 } 3499 else { 3500 /* just copy the vertex input attrib to the temp register */ 3501 if (!submit_op1(emit, 3502 inst_token(SVGA3DOP_MOV), 3503 dst(tmp), 3504 emit->input_map[index])) 3505 return FALSE; 3506 } 3507 3508 if (emit->key.vs.adjust_attrib_w_1 & (1 << index)) { 3509 /* move 1 into W position of tmp */ 3510 if (!submit_op1(emit, 3511 inst_token(SVGA3DOP_MOV), 3512 writemask(dst(tmp), TGSI_WRITEMASK_W), 3513 get_one_immediate(emit))) 3514 return FALSE; 3515 } 3516 3517 /* Reassign the input_map entry to the new tmp register */ 3518 emit->input_map[index] = tmp; 3519 } 3520 3521 return TRUE; 3522} 3523 3524 3525/** 3526 * Determine if we need to create the "common" immediate value which is 3527 * used for generating useful vector constants such as {0,0,0,0} and 3528 * {1,1,1,1}. 3529 * We could just do this all the time except that we want to conserve 3530 * registers whenever possible. 3531 */ 3532static boolean 3533needs_to_create_common_immediate(const struct svga_shader_emitter *emit) 3534{ 3535 unsigned i; 3536 3537 if (emit->unit == PIPE_SHADER_FRAGMENT) { 3538 if (emit->key.fs.light_twoside) 3539 return TRUE; 3540 3541 if (emit->key.fs.white_fragments) 3542 return TRUE; 3543 3544 if (emit->emit_frontface) 3545 return TRUE; 3546 3547 if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 || 3548 emit->info.opcode_count[TGSI_OPCODE_SSG] >= 1 || 3549 emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1) 3550 return TRUE; 3551 3552 if (emit->inverted_texcoords) 3553 return TRUE; 3554 3555 /* look for any PIPE_SWIZZLE_0/ONE terms */ 3556 for (i = 0; i < emit->key.num_textures; i++) { 3557 if (emit->key.tex[i].swizzle_r > PIPE_SWIZZLE_W || 3558 emit->key.tex[i].swizzle_g > PIPE_SWIZZLE_W || 3559 emit->key.tex[i].swizzle_b > PIPE_SWIZZLE_W || 3560 emit->key.tex[i].swizzle_a > PIPE_SWIZZLE_W) 3561 return TRUE; 3562 } 3563 3564 for (i = 0; i < emit->key.num_textures; i++) { 3565 if (emit->key.tex[i].compare_mode 3566 == PIPE_TEX_COMPARE_R_TO_TEXTURE) 3567 return TRUE; 3568 } 3569 } 3570 else if (emit->unit == PIPE_SHADER_VERTEX) { 3571 if (emit->info.opcode_count[TGSI_OPCODE_CMP] >= 1) 3572 return TRUE; 3573 if (emit->key.vs.adjust_attrib_range || 3574 emit->key.vs.adjust_attrib_w_1) 3575 return TRUE; 3576 } 3577 3578 if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 || 3579 emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1 || 3580 emit->info.opcode_count[TGSI_OPCODE_DDX] >= 1 || 3581 emit->info.opcode_count[TGSI_OPCODE_DDY] >= 1 || 3582 emit->info.opcode_count[TGSI_OPCODE_ROUND] >= 1 || 3583 emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 || 3584 emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 || 3585 emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 || 3586 emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 || 3587 emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 || 3588 emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 || 3589 emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 || 3590 emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 || 3591 emit->info.opcode_count[TGSI_OPCODE_KILL] >= 1 || 3592 emit->info.opcode_count[TGSI_OPCODE_SQRT] >= 1) 3593 return TRUE; 3594 3595 return FALSE; 3596} 3597 3598 3599/** 3600 * Do we need to create a looping constant? 3601 */ 3602static boolean 3603needs_to_create_loop_const(const struct svga_shader_emitter *emit) 3604{ 3605 return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1); 3606} 3607 3608 3609static boolean 3610needs_to_create_arl_consts(const struct svga_shader_emitter *emit) 3611{ 3612 return (emit->num_arl_consts > 0); 3613} 3614 3615 3616static boolean 3617pre_parse_add_indirect( struct svga_shader_emitter *emit, 3618 int num, int current_arl) 3619{ 3620 unsigned i; 3621 assert(num < 0); 3622 3623 for (i = 0; i < emit->num_arl_consts; ++i) { 3624 if (emit->arl_consts[i].arl_num == current_arl) 3625 break; 3626 } 3627 /* new entry */ 3628 if (emit->num_arl_consts == i) { 3629 ++emit->num_arl_consts; 3630 } 3631 emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ? 3632 num : 3633 emit->arl_consts[i].number; 3634 emit->arl_consts[i].arl_num = current_arl; 3635 return TRUE; 3636} 3637 3638 3639static boolean 3640pre_parse_instruction( struct svga_shader_emitter *emit, 3641 const struct tgsi_full_instruction *insn, 3642 int current_arl) 3643{ 3644 if (insn->Src[0].Register.Indirect && 3645 insn->Src[0].Indirect.File == TGSI_FILE_ADDRESS) { 3646 const struct tgsi_full_src_register *reg = &insn->Src[0]; 3647 if (reg->Register.Index < 0) { 3648 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3649 } 3650 } 3651 3652 if (insn->Src[1].Register.Indirect && 3653 insn->Src[1].Indirect.File == TGSI_FILE_ADDRESS) { 3654 const struct tgsi_full_src_register *reg = &insn->Src[1]; 3655 if (reg->Register.Index < 0) { 3656 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3657 } 3658 } 3659 3660 if (insn->Src[2].Register.Indirect && 3661 insn->Src[2].Indirect.File == TGSI_FILE_ADDRESS) { 3662 const struct tgsi_full_src_register *reg = &insn->Src[2]; 3663 if (reg->Register.Index < 0) { 3664 pre_parse_add_indirect(emit, reg->Register.Index, current_arl); 3665 } 3666 } 3667 3668 return TRUE; 3669} 3670 3671 3672static boolean 3673pre_parse_tokens( struct svga_shader_emitter *emit, 3674 const struct tgsi_token *tokens ) 3675{ 3676 struct tgsi_parse_context parse; 3677 int current_arl = 0; 3678 3679 tgsi_parse_init( &parse, tokens ); 3680 3681 while (!tgsi_parse_end_of_tokens( &parse )) { 3682 tgsi_parse_token( &parse ); 3683 switch (parse.FullToken.Token.Type) { 3684 case TGSI_TOKEN_TYPE_IMMEDIATE: 3685 case TGSI_TOKEN_TYPE_DECLARATION: 3686 break; 3687 case TGSI_TOKEN_TYPE_INSTRUCTION: 3688 if (parse.FullToken.FullInstruction.Instruction.Opcode == 3689 TGSI_OPCODE_ARL) { 3690 ++current_arl; 3691 } 3692 if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction, 3693 current_arl )) 3694 return FALSE; 3695 break; 3696 default: 3697 break; 3698 } 3699 3700 } 3701 return TRUE; 3702} 3703 3704 3705static boolean 3706svga_shader_emit_helpers(struct svga_shader_emitter *emit) 3707{ 3708 if (needs_to_create_common_immediate( emit )) { 3709 create_common_immediate( emit ); 3710 } 3711 if (needs_to_create_loop_const( emit )) { 3712 create_loop_const( emit ); 3713 } 3714 if (needs_to_create_arl_consts( emit )) { 3715 create_arl_consts( emit ); 3716 } 3717 3718 if (emit->unit == PIPE_SHADER_FRAGMENT) { 3719 if (!svga_shader_emit_samplers_decl( emit )) 3720 return FALSE; 3721 3722 if (!emit_ps_preamble( emit )) 3723 return FALSE; 3724 3725 if (emit->key.fs.light_twoside) { 3726 if (!emit_light_twoside( emit )) 3727 return FALSE; 3728 } 3729 if (emit->emit_frontface) { 3730 if (!emit_frontface( emit )) 3731 return FALSE; 3732 } 3733 if (emit->inverted_texcoords) { 3734 if (!emit_inverted_texcoords( emit )) 3735 return FALSE; 3736 } 3737 } 3738 else { 3739 assert(emit->unit == PIPE_SHADER_VERTEX); 3740 if (emit->key.vs.adjust_attrib_range) { 3741 if (!emit_adjusted_vertex_attribs(emit) || 3742 emit->key.vs.adjust_attrib_w_1) { 3743 return FALSE; 3744 } 3745 } 3746 } 3747 3748 return TRUE; 3749} 3750 3751 3752/** 3753 * This is the main entrypoint into the TGSI instruction translater. 3754 * Translate TGSI shader tokens into an SVGA shader. 3755 */ 3756boolean 3757svga_shader_emit_instructions(struct svga_shader_emitter *emit, 3758 const struct tgsi_token *tokens) 3759{ 3760 struct tgsi_parse_context parse; 3761 const struct tgsi_token *new_tokens = NULL; 3762 boolean ret = TRUE; 3763 boolean helpers_emitted = FALSE; 3764 unsigned line_nr = 0; 3765 3766 if (emit->unit == PIPE_SHADER_FRAGMENT && emit->key.fs.pstipple) { 3767 unsigned unit; 3768 3769 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 3770 TGSI_FILE_INPUT); 3771 3772 if (new_tokens) { 3773 /* Setup texture state for stipple */ 3774 emit->sampler_target[unit] = TGSI_TEXTURE_2D; 3775 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 3776 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 3777 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 3778 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 3779 3780 emit->pstipple_sampler_unit = unit; 3781 3782 tokens = new_tokens; 3783 } 3784 } 3785 3786 tgsi_parse_init( &parse, tokens ); 3787 emit->internal_imm_count = 0; 3788 3789 if (emit->unit == PIPE_SHADER_VERTEX) { 3790 ret = emit_vs_preamble( emit ); 3791 if (!ret) 3792 goto done; 3793 } 3794 3795 pre_parse_tokens(emit, tokens); 3796 3797 while (!tgsi_parse_end_of_tokens( &parse )) { 3798 tgsi_parse_token( &parse ); 3799 3800 switch (parse.FullToken.Token.Type) { 3801 case TGSI_TOKEN_TYPE_IMMEDIATE: 3802 ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate ); 3803 if (!ret) 3804 goto done; 3805 break; 3806 3807 case TGSI_TOKEN_TYPE_DECLARATION: 3808 ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration ); 3809 if (!ret) 3810 goto done; 3811 break; 3812 3813 case TGSI_TOKEN_TYPE_INSTRUCTION: 3814 if (!helpers_emitted) { 3815 if (!svga_shader_emit_helpers( emit )) 3816 goto done; 3817 helpers_emitted = TRUE; 3818 } 3819 ret = svga_emit_instruction( emit, 3820 line_nr++, 3821 &parse.FullToken.FullInstruction ); 3822 if (!ret) 3823 goto done; 3824 break; 3825 default: 3826 break; 3827 } 3828 3829 reset_temp_regs( emit ); 3830 } 3831 3832 /* Need to terminate the current subroutine. Note that the 3833 * hardware doesn't tolerate shaders without sub-routines 3834 * terminating with RET+END. 3835 */ 3836 if (!emit->in_main_func) { 3837 ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) ); 3838 if (!ret) 3839 goto done; 3840 } 3841 3842 assert(emit->dynamic_branching_level == 0); 3843 3844 /* Need to terminate the whole shader: 3845 */ 3846 ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) ); 3847 if (!ret) 3848 goto done; 3849 3850done: 3851 tgsi_parse_free( &parse ); 3852 if (new_tokens) { 3853 tgsi_free_tokens(new_tokens); 3854 } 3855 3856 return ret; 3857} 3858