1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include <stdarg.h> 29 30#include "i915_context.h" 31#include "i915_debug.h" 32#include "i915_debug_private.h" 33#include "i915_fpc.h" 34#include "i915_reg.h" 35 36#include "pipe/p_shader_tokens.h" 37#include "tgsi/tgsi_dump.h" 38#include "tgsi/tgsi_from_mesa.h" 39#include "tgsi/tgsi_info.h" 40#include "tgsi/tgsi_parse.h" 41#include "util/log.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44#include "util/u_string.h" 45 46#include "draw/draw_vertex.h" 47 48#ifndef M_PI 49#define M_PI 3.14159265358979323846 50#endif 51 52/** 53 * Simple pass-through fragment shader to use when we don't have 54 * a real shader (or it fails to compile for some reason). 55 */ 56static unsigned passthrough_program[] = { 57 _3DSTATE_PIXEL_SHADER_PROGRAM | ((1 * 3) - 1), 58 /* move to output color: 59 */ 60 (A0_MOV | (REG_TYPE_OC << A0_DEST_TYPE_SHIFT) | A0_DEST_CHANNEL_ALL | 61 (REG_TYPE_R << A0_SRC0_TYPE_SHIFT) | (0 << A0_SRC0_NR_SHIFT)), 62 ((SRC_ONE << A1_SRC0_CHANNEL_X_SHIFT) | 63 (SRC_ZERO << A1_SRC0_CHANNEL_Y_SHIFT) | 64 (SRC_ZERO << A1_SRC0_CHANNEL_Z_SHIFT) | 65 (SRC_ONE << A1_SRC0_CHANNEL_W_SHIFT)), 66 0}; 67 68/** 69 * component-wise negation of ureg 70 */ 71static inline int 72negate(int reg, int x, int y, int z, int w) 73{ 74 /* Another neat thing about the UREG representation */ 75 return reg ^ (((x & 1) << UREG_CHANNEL_X_NEGATE_SHIFT) | 76 ((y & 1) << UREG_CHANNEL_Y_NEGATE_SHIFT) | 77 ((z & 1) << UREG_CHANNEL_Z_NEGATE_SHIFT) | 78 ((w & 1) << UREG_CHANNEL_W_NEGATE_SHIFT)); 79} 80 81/** 82 * In the event of a translation failure, we'll generate a simple color 83 * pass-through program. 84 */ 85static void 86i915_use_passthrough_shader(struct i915_fragment_shader *fs) 87{ 88 fs->program = (uint32_t *)MALLOC(sizeof(passthrough_program)); 89 if (fs->program) { 90 memcpy(fs->program, passthrough_program, sizeof(passthrough_program)); 91 fs->program_len = ARRAY_SIZE(passthrough_program); 92 } 93 fs->num_constants = 0; 94} 95 96void 97i915_program_error(struct i915_fp_compile *p, const char *msg, ...) 98{ 99 if (p->log_program_errors) { 100 va_list args; 101 102 va_start(args, msg); 103 mesa_loge_v(msg, args); 104 va_end(args); 105 } 106 107 p->error = 1; 108} 109 110static uint32_t 111get_mapping(struct i915_fragment_shader *fs, enum tgsi_semantic semantic, 112 int index) 113{ 114 int i; 115 for (i = 0; i < I915_TEX_UNITS; i++) { 116 if (fs->texcoords[i].semantic == -1) { 117 fs->texcoords[i].semantic = semantic; 118 fs->texcoords[i].index = index; 119 return i; 120 } 121 if (fs->texcoords[i].semantic == semantic && 122 fs->texcoords[i].index == index) 123 return i; 124 } 125 debug_printf("Exceeded max generics\n"); 126 return 0; 127} 128 129/** 130 * Construct a ureg for the given source register. Will emit 131 * constants, apply swizzling and negation as needed. 132 */ 133static uint32_t 134src_vector(struct i915_fp_compile *p, 135 const struct i915_full_src_register *source, 136 struct i915_fragment_shader *fs) 137{ 138 uint32_t index = source->Register.Index; 139 uint32_t src = 0, sem_name, sem_ind; 140 141 switch (source->Register.File) { 142 case TGSI_FILE_TEMPORARY: 143 if (source->Register.Index >= I915_MAX_TEMPORARY) { 144 i915_program_error(p, "Exceeded max temporary reg"); 145 return 0; 146 } 147 src = UREG(REG_TYPE_R, index); 148 break; 149 case TGSI_FILE_INPUT: 150 /* XXX: Packing COL1, FOGC into a single attribute works for 151 * texenv programs, but will fail for real fragment programs 152 * that use these attributes and expect them to be a full 4 153 * components wide. Could use a texcoord to pass these 154 * attributes if necessary, but that won't work in the general 155 * case. 156 * 157 * We also use a texture coordinate to pass wpos when possible. 158 */ 159 160 sem_name = p->shader->info.input_semantic_name[index]; 161 sem_ind = p->shader->info.input_semantic_index[index]; 162 163 switch (sem_name) { 164 case TGSI_SEMANTIC_GENERIC: 165 case TGSI_SEMANTIC_TEXCOORD: 166 case TGSI_SEMANTIC_PCOORD: 167 case TGSI_SEMANTIC_POSITION: { 168 if (sem_name == TGSI_SEMANTIC_PCOORD) 169 fs->reads_pntc = true; 170 171 int real_tex_unit = get_mapping(fs, sem_name, sem_ind); 172 src = i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, 173 D0_CHANNEL_ALL); 174 break; 175 } 176 case TGSI_SEMANTIC_COLOR: 177 if (sem_ind == 0) { 178 src = i915_emit_decl(p, REG_TYPE_T, T_DIFFUSE, D0_CHANNEL_ALL); 179 } else { 180 /* secondary color */ 181 assert(sem_ind == 1); 182 src = i915_emit_decl(p, REG_TYPE_T, T_SPECULAR, D0_CHANNEL_XYZ); 183 src = swizzle(src, X, Y, Z, ONE); 184 } 185 break; 186 case TGSI_SEMANTIC_FOG: 187 src = i915_emit_decl(p, REG_TYPE_T, T_FOG_W, D0_CHANNEL_W); 188 src = swizzle(src, W, W, W, W); 189 break; 190 case TGSI_SEMANTIC_FACE: { 191 /* for back/front faces */ 192 int real_tex_unit = get_mapping(fs, sem_name, sem_ind); 193 src = 194 i915_emit_decl(p, REG_TYPE_T, T_TEX0 + real_tex_unit, D0_CHANNEL_X); 195 break; 196 } 197 default: 198 i915_program_error(p, "Bad source->Index"); 199 return 0; 200 } 201 break; 202 203 case TGSI_FILE_IMMEDIATE: { 204 assert(index < p->num_immediates); 205 206 uint8_t swiz[4] = { 207 source->Register.SwizzleX, 208 source->Register.SwizzleY, 209 source->Register.SwizzleZ, 210 source->Register.SwizzleW 211 }; 212 213 uint8_t neg[4] = { 214 source->Register.Negate, 215 source->Register.Negate, 216 source->Register.Negate, 217 source->Register.Negate 218 }; 219 220 unsigned i; 221 222 for (i = 0; i < 4; i++) { 223 if (swiz[i] == TGSI_SWIZZLE_ZERO || swiz[i] == TGSI_SWIZZLE_ONE) { 224 continue; 225 } else if (p->immediates[index][swiz[i]] == 0.0) { 226 swiz[i] = TGSI_SWIZZLE_ZERO; 227 } else if (p->immediates[index][swiz[i]] == 1.0) { 228 swiz[i] = TGSI_SWIZZLE_ONE; 229 } else if (p->immediates[index][swiz[i]] == -1.0) { 230 swiz[i] = TGSI_SWIZZLE_ONE; 231 neg[i] ^= 1; 232 } else { 233 break; 234 } 235 } 236 237 if (i == 4) { 238 return negate(swizzle(UREG(REG_TYPE_R, 0), 239 swiz[0], swiz[1], swiz[2], swiz[3]), 240 neg[0], neg[1], neg[2], neg[3]); 241 } 242 243 index = p->immediates_map[index]; 244 FALLTHROUGH; 245 } 246 247 case TGSI_FILE_CONSTANT: 248 src = UREG(REG_TYPE_CONST, index); 249 break; 250 251 default: 252 i915_program_error(p, "Bad source->File"); 253 return 0; 254 } 255 256 src = swizzle(src, source->Register.SwizzleX, source->Register.SwizzleY, 257 source->Register.SwizzleZ, source->Register.SwizzleW); 258 259 /* No HW abs flag, so we have to max with the negation. */ 260 if (source->Register.Absolute) { 261 uint32_t tmp = i915_get_utemp(p); 262 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src, 263 negate(src, 1, 1, 1, 1), 0); 264 src = tmp; 265 } 266 267 /* There's both negate-all-components and per-component negation. 268 * Try to handle both here. 269 */ 270 { 271 int n = source->Register.Negate; 272 src = negate(src, n, n, n, n); 273 } 274 275 return src; 276} 277 278/** 279 * Construct a ureg for a destination register. 280 */ 281static uint32_t 282get_result_vector(struct i915_fp_compile *p, 283 const struct i915_full_dst_register *dest) 284{ 285 switch (dest->Register.File) { 286 case TGSI_FILE_OUTPUT: { 287 uint32_t sem_name = 288 p->shader->info.output_semantic_name[dest->Register.Index]; 289 switch (sem_name) { 290 case TGSI_SEMANTIC_POSITION: 291 return UREG(REG_TYPE_OD, 0); 292 case TGSI_SEMANTIC_COLOR: 293 return UREG(REG_TYPE_OC, 0); 294 default: 295 i915_program_error(p, "Bad inst->DstReg.Index/semantics"); 296 return 0; 297 } 298 } 299 case TGSI_FILE_TEMPORARY: 300 return UREG(REG_TYPE_R, dest->Register.Index); 301 default: 302 i915_program_error(p, "Bad inst->DstReg.File"); 303 return 0; 304 } 305} 306 307/** 308 * Compute flags for saturation and writemask. 309 */ 310static uint32_t 311get_result_flags(const struct i915_full_instruction *inst) 312{ 313 const uint32_t writeMask = inst->Dst[0].Register.WriteMask; 314 uint32_t flags = 0x0; 315 316 if (inst->Instruction.Saturate) 317 flags |= A0_DEST_SATURATE; 318 319 if (writeMask & TGSI_WRITEMASK_X) 320 flags |= A0_DEST_CHANNEL_X; 321 if (writeMask & TGSI_WRITEMASK_Y) 322 flags |= A0_DEST_CHANNEL_Y; 323 if (writeMask & TGSI_WRITEMASK_Z) 324 flags |= A0_DEST_CHANNEL_Z; 325 if (writeMask & TGSI_WRITEMASK_W) 326 flags |= A0_DEST_CHANNEL_W; 327 328 return flags; 329} 330 331/** 332 * Convert TGSI_TEXTURE_x token to DO_SAMPLE_TYPE_x token 333 */ 334static uint32_t 335translate_tex_src_target(struct i915_fp_compile *p, uint32_t tex) 336{ 337 switch (tex) { 338 case TGSI_TEXTURE_SHADOW1D: 339 FALLTHROUGH; 340 case TGSI_TEXTURE_1D: 341 return D0_SAMPLE_TYPE_2D; 342 343 case TGSI_TEXTURE_SHADOW2D: 344 FALLTHROUGH; 345 case TGSI_TEXTURE_2D: 346 return D0_SAMPLE_TYPE_2D; 347 348 case TGSI_TEXTURE_SHADOWRECT: 349 FALLTHROUGH; 350 case TGSI_TEXTURE_RECT: 351 return D0_SAMPLE_TYPE_2D; 352 353 case TGSI_TEXTURE_3D: 354 return D0_SAMPLE_TYPE_VOLUME; 355 356 case TGSI_TEXTURE_CUBE: 357 return D0_SAMPLE_TYPE_CUBE; 358 359 default: 360 i915_program_error(p, "TexSrc type"); 361 return 0; 362 } 363} 364 365/** 366 * Return the number of coords needed to access a given TGSI_TEXTURE_* 367 */ 368uint32_t 369i915_coord_mask(enum tgsi_opcode opcode, enum tgsi_texture_type tex) 370{ 371 uint32_t coord_mask = 0; 372 373 if (opcode == TGSI_OPCODE_TXP || opcode == TGSI_OPCODE_TXB) 374 coord_mask |= TGSI_WRITEMASK_W; 375 376 switch (tex) { 377 case TGSI_TEXTURE_1D: /* See the 1D coord swizzle below. */ 378 case TGSI_TEXTURE_2D: 379 case TGSI_TEXTURE_RECT: 380 return coord_mask | TGSI_WRITEMASK_XY; 381 382 case TGSI_TEXTURE_SHADOW1D: 383 case TGSI_TEXTURE_SHADOW2D: 384 case TGSI_TEXTURE_SHADOWRECT: 385 case TGSI_TEXTURE_3D: 386 case TGSI_TEXTURE_CUBE: 387 return coord_mask | TGSI_WRITEMASK_XYZ; 388 389 default: 390 unreachable("bad texture target"); 391 } 392} 393 394/** 395 * Generate texel lookup instruction. 396 */ 397static void 398emit_tex(struct i915_fp_compile *p, const struct i915_full_instruction *inst, 399 uint32_t opcode, struct i915_fragment_shader *fs) 400{ 401 uint32_t texture = inst->Texture.Texture; 402 uint32_t unit = inst->Src[1].Register.Index; 403 uint32_t tex = translate_tex_src_target(p, texture); 404 uint32_t sampler = i915_emit_decl(p, REG_TYPE_S, unit, tex); 405 uint32_t coord = src_vector(p, &inst->Src[0], fs); 406 407 /* For 1D textures, set the Y coord to the same as X. Otherwise, we could 408 * select the wrong LOD based on the uninitialized Y coord when we sample our 409 * 1D textures as 2D. 410 */ 411 if (texture == TGSI_TEXTURE_1D || texture == TGSI_TEXTURE_SHADOW1D) 412 coord = swizzle(coord, X, X, Z, W); 413 414 i915_emit_texld(p, get_result_vector(p, &inst->Dst[0]), 415 get_result_flags(inst), sampler, coord, opcode, 416 i915_coord_mask(inst->Instruction.Opcode, texture)); 417} 418 419/** 420 * Generate a simple arithmetic instruction 421 * \param opcode the i915 opcode 422 * \param numArgs the number of input/src arguments 423 */ 424static void 425emit_simple_arith(struct i915_fp_compile *p, 426 const struct i915_full_instruction *inst, uint32_t opcode, 427 uint32_t numArgs, struct i915_fragment_shader *fs) 428{ 429 uint32_t arg1, arg2, arg3; 430 431 assert(numArgs <= 3); 432 433 arg1 = (numArgs < 1) ? 0 : src_vector(p, &inst->Src[0], fs); 434 arg2 = (numArgs < 2) ? 0 : src_vector(p, &inst->Src[1], fs); 435 arg3 = (numArgs < 3) ? 0 : src_vector(p, &inst->Src[2], fs); 436 437 i915_emit_arith(p, opcode, get_result_vector(p, &inst->Dst[0]), 438 get_result_flags(inst), 0, arg1, arg2, arg3); 439} 440 441/** As above, but swap the first two src regs */ 442static void 443emit_simple_arith_swap2(struct i915_fp_compile *p, 444 const struct i915_full_instruction *inst, 445 uint32_t opcode, uint32_t numArgs, 446 struct i915_fragment_shader *fs) 447{ 448 struct i915_full_instruction inst2; 449 450 assert(numArgs == 2); 451 452 /* transpose first two registers */ 453 inst2 = *inst; 454 inst2.Src[0] = inst->Src[1]; 455 inst2.Src[1] = inst->Src[0]; 456 457 emit_simple_arith(p, &inst2, opcode, numArgs, fs); 458} 459 460/* 461 * Translate TGSI instruction to i915 instruction. 462 * 463 * Possible concerns: 464 * 465 * DDX, DDY -- return 0 466 * SIN, COS -- could use another taylor step? 467 * LIT -- results seem a little different to sw mesa 468 * LOG -- different to mesa on negative numbers, but this is conformant. 469 */ 470static void 471i915_translate_instruction(struct i915_fp_compile *p, 472 const struct i915_full_instruction *inst, 473 struct i915_fragment_shader *fs) 474{ 475 uint32_t src0, src1, src2, flags; 476 uint32_t tmp = 0; 477 478 switch (inst->Instruction.Opcode) { 479 case TGSI_OPCODE_ADD: 480 emit_simple_arith(p, inst, A0_ADD, 2, fs); 481 break; 482 483 case TGSI_OPCODE_CEIL: 484 src0 = src_vector(p, &inst->Src[0], fs); 485 tmp = i915_get_utemp(p); 486 flags = get_result_flags(inst); 487 i915_emit_arith(p, A0_FLR, tmp, flags & A0_DEST_CHANNEL_ALL, 0, 488 negate(src0, 1, 1, 1, 1), 0, 0); 489 i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), flags, 0, 490 negate(tmp, 1, 1, 1, 1), 0, 0); 491 break; 492 493 case TGSI_OPCODE_CMP: 494 src0 = src_vector(p, &inst->Src[0], fs); 495 src1 = src_vector(p, &inst->Src[1], fs); 496 src2 = src_vector(p, &inst->Src[2], fs); 497 i915_emit_arith(p, A0_CMP, get_result_vector(p, &inst->Dst[0]), 498 get_result_flags(inst), 0, src0, src2, 499 src1); /* NOTE: order of src2, src1 */ 500 break; 501 502 case TGSI_OPCODE_DDX: 503 case TGSI_OPCODE_DDY: 504 /* XXX We just output 0 here */ 505 debug_printf("Punting DDX/DDY\n"); 506 src0 = get_result_vector(p, &inst->Dst[0]); 507 i915_emit_arith(p, A0_MOV, get_result_vector(p, &inst->Dst[0]), 508 get_result_flags(inst), 0, 509 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0, 0); 510 break; 511 512 case TGSI_OPCODE_DP2: 513 src0 = src_vector(p, &inst->Src[0], fs); 514 src1 = src_vector(p, &inst->Src[1], fs); 515 516 i915_emit_arith(p, A0_DP3, get_result_vector(p, &inst->Dst[0]), 517 get_result_flags(inst), 0, 518 swizzle(src0, X, Y, ZERO, ZERO), src1, 0); 519 break; 520 521 case TGSI_OPCODE_DP3: 522 emit_simple_arith(p, inst, A0_DP3, 2, fs); 523 break; 524 525 case TGSI_OPCODE_DP4: 526 emit_simple_arith(p, inst, A0_DP4, 2, fs); 527 break; 528 529 case TGSI_OPCODE_DST: 530 src0 = src_vector(p, &inst->Src[0], fs); 531 src1 = src_vector(p, &inst->Src[1], fs); 532 533 /* result[0] = 1 * 1; 534 * result[1] = a[1] * b[1]; 535 * result[2] = a[2] * 1; 536 * result[3] = 1 * b[3]; 537 */ 538 i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]), 539 get_result_flags(inst), 0, swizzle(src0, ONE, Y, Z, ONE), 540 swizzle(src1, ONE, Y, ONE, W), 0); 541 break; 542 543 case TGSI_OPCODE_END: 544 /* no-op */ 545 break; 546 547 case TGSI_OPCODE_EX2: 548 src0 = src_vector(p, &inst->Src[0], fs); 549 550 i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), 551 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 552 0); 553 break; 554 555 case TGSI_OPCODE_FLR: 556 emit_simple_arith(p, inst, A0_FLR, 1, fs); 557 break; 558 559 case TGSI_OPCODE_FRC: 560 emit_simple_arith(p, inst, A0_FRC, 1, fs); 561 break; 562 563 case TGSI_OPCODE_KILL_IF: 564 /* kill if src[0].x < 0 || src[0].y < 0 ... */ 565 src0 = src_vector(p, &inst->Src[0], fs); 566 tmp = i915_get_utemp(p); 567 568 i915_emit_texld(p, tmp, /* dest reg: a dummy reg */ 569 A0_DEST_CHANNEL_ALL, /* dest writemask */ 570 0, /* sampler */ 571 src0, /* coord*/ 572 T0_TEXKILL, /* opcode */ 573 TGSI_WRITEMASK_XYZW);/* coord_mask */ 574 break; 575 576 case TGSI_OPCODE_KILL: 577 /* unconditional kill */ 578 tmp = i915_get_utemp(p); 579 580 i915_emit_texld(p, tmp, /* dest reg: a dummy reg */ 581 A0_DEST_CHANNEL_ALL, /* dest writemask */ 582 0, /* sampler */ 583 negate(swizzle(UREG(REG_TYPE_R, 0), ONE, ONE, ONE, ONE), 584 1, 1, 1, 1), /* coord */ 585 T0_TEXKILL, /* opcode */ 586 TGSI_WRITEMASK_X); /* coord_mask */ 587 break; 588 589 case TGSI_OPCODE_LG2: 590 src0 = src_vector(p, &inst->Src[0], fs); 591 592 i915_emit_arith(p, A0_LOG, get_result_vector(p, &inst->Dst[0]), 593 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 594 0); 595 break; 596 597 case TGSI_OPCODE_LIT: 598 src0 = src_vector(p, &inst->Src[0], fs); 599 tmp = i915_get_utemp(p); 600 601 /* tmp = max( a.xyzw, a.00zw ) 602 * XXX: Clamp tmp.w to -128..128 603 * tmp.y = log(tmp.y) 604 * tmp.y = tmp.w * tmp.y 605 * tmp.y = exp(tmp.y) 606 * result = cmp (a.11-x1, a.1x01, a.1xy1 ) 607 */ 608 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0, 609 swizzle(src0, ZERO, ZERO, Z, W), 0); 610 611 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_Y, 0, 612 swizzle(tmp, Y, Y, Y, Y), 0, 0); 613 614 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_Y, 0, 615 swizzle(tmp, ZERO, Y, ZERO, ZERO), 616 swizzle(tmp, ZERO, W, ZERO, ZERO), 0); 617 618 i915_emit_arith(p, A0_EXP, tmp, A0_DEST_CHANNEL_Y, 0, 619 swizzle(tmp, Y, Y, Y, Y), 0, 0); 620 621 i915_emit_arith( 622 p, A0_CMP, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 623 0, negate(swizzle(tmp, ONE, ONE, X, ONE), 0, 0, 1, 0), 624 swizzle(tmp, ONE, X, ZERO, ONE), swizzle(tmp, ONE, X, Y, ONE)); 625 626 break; 627 628 case TGSI_OPCODE_LRP: 629 src0 = src_vector(p, &inst->Src[0], fs); 630 src1 = src_vector(p, &inst->Src[1], fs); 631 src2 = src_vector(p, &inst->Src[2], fs); 632 flags = get_result_flags(inst); 633 tmp = i915_get_utemp(p); 634 635 /* b*a + c*(1-a) 636 * 637 * b*a + c - ca 638 * 639 * tmp = b*a + c, 640 * result = (-c)*a + tmp 641 */ 642 i915_emit_arith(p, A0_MAD, tmp, flags & A0_DEST_CHANNEL_ALL, 0, src1, 643 src0, src2); 644 645 i915_emit_arith(p, A0_MAD, get_result_vector(p, &inst->Dst[0]), flags, 0, 646 negate(src2, 1, 1, 1, 1), src0, tmp); 647 break; 648 649 case TGSI_OPCODE_MAD: 650 emit_simple_arith(p, inst, A0_MAD, 3, fs); 651 break; 652 653 case TGSI_OPCODE_MAX: 654 emit_simple_arith(p, inst, A0_MAX, 2, fs); 655 break; 656 657 case TGSI_OPCODE_MIN: 658 emit_simple_arith(p, inst, A0_MIN, 2, fs); 659 break; 660 661 case TGSI_OPCODE_MOV: 662 emit_simple_arith(p, inst, A0_MOV, 1, fs); 663 break; 664 665 case TGSI_OPCODE_MUL: 666 emit_simple_arith(p, inst, A0_MUL, 2, fs); 667 break; 668 669 case TGSI_OPCODE_NOP: 670 break; 671 672 case TGSI_OPCODE_POW: 673 src0 = src_vector(p, &inst->Src[0], fs); 674 src1 = src_vector(p, &inst->Src[1], fs); 675 tmp = i915_get_utemp(p); 676 flags = get_result_flags(inst); 677 678 /* XXX: masking on intermediate values, here and elsewhere. 679 */ 680 i915_emit_arith(p, A0_LOG, tmp, A0_DEST_CHANNEL_X, 0, 681 swizzle(src0, X, X, X, X), 0, 0); 682 683 i915_emit_arith(p, A0_MUL, tmp, A0_DEST_CHANNEL_X, 0, tmp, src1, 0); 684 685 i915_emit_arith(p, A0_EXP, get_result_vector(p, &inst->Dst[0]), flags, 0, 686 swizzle(tmp, X, X, X, X), 0, 0); 687 break; 688 689 case TGSI_OPCODE_RET: 690 /* XXX: no-op? */ 691 break; 692 693 case TGSI_OPCODE_RCP: 694 src0 = src_vector(p, &inst->Src[0], fs); 695 696 i915_emit_arith(p, A0_RCP, get_result_vector(p, &inst->Dst[0]), 697 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 698 0); 699 break; 700 701 case TGSI_OPCODE_RSQ: 702 src0 = src_vector(p, &inst->Src[0], fs); 703 704 i915_emit_arith(p, A0_RSQ, get_result_vector(p, &inst->Dst[0]), 705 get_result_flags(inst), 0, swizzle(src0, X, X, X, X), 0, 706 0); 707 break; 708 709 case TGSI_OPCODE_SEQ: { 710 const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0), 711 SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO); 712 713 /* if we're both >= and <= then we're == */ 714 src0 = src_vector(p, &inst->Src[0], fs); 715 src1 = src_vector(p, &inst->Src[1], fs); 716 tmp = i915_get_utemp(p); 717 718 if (src0 == zero || src1 == zero) { 719 if (src0 == zero) 720 src0 = src1; 721 722 /* x == 0 is equivalent to -abs(x) >= 0, but the latter requires only 723 * two instructions instead of three. 724 */ 725 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0, 726 negate(src0, 1, 1, 1, 1), 0); 727 i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]), 728 get_result_flags(inst), 0, 729 negate(tmp, 1, 1, 1, 1), zero, 0); 730 } else { 731 i915_emit_arith(p, A0_SGE, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0); 732 733 i915_emit_arith(p, A0_SGE, get_result_vector(p, &inst->Dst[0]), 734 get_result_flags(inst), 0, src1, src0, 0); 735 736 i915_emit_arith(p, A0_MUL, get_result_vector(p, &inst->Dst[0]), 737 get_result_flags(inst), 0, 738 get_result_vector(p, &inst->Dst[0]), tmp, 0); 739 } 740 741 break; 742 } 743 744 case TGSI_OPCODE_SGE: 745 emit_simple_arith(p, inst, A0_SGE, 2, fs); 746 break; 747 748 case TGSI_OPCODE_SLE: 749 /* like SGE, but swap reg0, reg1 */ 750 emit_simple_arith_swap2(p, inst, A0_SGE, 2, fs); 751 break; 752 753 case TGSI_OPCODE_SLT: 754 emit_simple_arith(p, inst, A0_SLT, 2, fs); 755 break; 756 757 case TGSI_OPCODE_SGT: 758 /* like SLT, but swap reg0, reg1 */ 759 emit_simple_arith_swap2(p, inst, A0_SLT, 2, fs); 760 break; 761 762 case TGSI_OPCODE_SNE: { 763 const uint32_t zero = swizzle(UREG(REG_TYPE_R, 0), 764 SRC_ZERO, SRC_ZERO, SRC_ZERO, SRC_ZERO); 765 766 /* if we're < or > then we're != */ 767 src0 = src_vector(p, &inst->Src[0], fs); 768 src1 = src_vector(p, &inst->Src[1], fs); 769 tmp = i915_get_utemp(p); 770 771 if (src0 == zero || src1 == zero) { 772 if (src0 == zero) 773 src0 = src1; 774 775 /* x != 0 is equivalent to -abs(x) < 0, but the latter requires only 776 * two instructions instead of three. 777 */ 778 i915_emit_arith(p, A0_MAX, tmp, A0_DEST_CHANNEL_ALL, 0, src0, 779 negate(src0, 1, 1, 1, 1), 0); 780 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]), 781 get_result_flags(inst), 0, 782 negate(tmp, 1, 1, 1, 1), zero, 0); 783 } else { 784 i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, src1, 0); 785 786 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]), 787 get_result_flags(inst), 0, src1, src0, 0); 788 789 i915_emit_arith(p, A0_ADD, get_result_vector(p, &inst->Dst[0]), 790 get_result_flags(inst), 0, 791 get_result_vector(p, &inst->Dst[0]), tmp, 0); 792 } 793 break; 794 } 795 796 case TGSI_OPCODE_SSG: 797 /* compute (src>0) - (src<0) */ 798 src0 = src_vector(p, &inst->Src[0], fs); 799 tmp = i915_get_utemp(p); 800 801 i915_emit_arith(p, A0_SLT, tmp, A0_DEST_CHANNEL_ALL, 0, src0, 802 swizzle(src0, ZERO, ZERO, ZERO, ZERO), 0); 803 804 i915_emit_arith(p, A0_SLT, get_result_vector(p, &inst->Dst[0]), 805 get_result_flags(inst), 0, 806 swizzle(src0, ZERO, ZERO, ZERO, ZERO), src0, 0); 807 808 i915_emit_arith( 809 p, A0_ADD, get_result_vector(p, &inst->Dst[0]), get_result_flags(inst), 810 0, get_result_vector(p, &inst->Dst[0]), negate(tmp, 1, 1, 1, 1), 0); 811 break; 812 813 case TGSI_OPCODE_TEX: 814 emit_tex(p, inst, T0_TEXLD, fs); 815 break; 816 817 case TGSI_OPCODE_TRUNC: 818 emit_simple_arith(p, inst, A0_TRC, 1, fs); 819 break; 820 821 case TGSI_OPCODE_TXB: 822 emit_tex(p, inst, T0_TEXLDB, fs); 823 break; 824 825 case TGSI_OPCODE_TXP: 826 emit_tex(p, inst, T0_TEXLDP, fs); 827 break; 828 829 default: 830 i915_program_error(p, "bad opcode %s (%d)", 831 tgsi_get_opcode_name(inst->Instruction.Opcode), 832 inst->Instruction.Opcode); 833 return; 834 } 835 836 i915_release_utemps(p); 837} 838 839static void 840i915_translate_token(struct i915_fp_compile *p, 841 const union i915_full_token *token, 842 struct i915_fragment_shader *fs) 843{ 844 struct i915_fragment_shader *ifs = p->shader; 845 switch (token->Token.Type) { 846 case TGSI_TOKEN_TYPE_PROPERTY: 847 /* Ignore properties where we only support one value. */ 848 assert(token->FullProperty.Property.PropertyName == 849 TGSI_PROPERTY_FS_COORD_ORIGIN || 850 token->FullProperty.Property.PropertyName == 851 TGSI_PROPERTY_FS_COORD_PIXEL_CENTER || 852 token->FullProperty.Property.PropertyName == 853 TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS || 854 token->FullProperty.Property.PropertyName == 855 TGSI_PROPERTY_SEPARABLE_PROGRAM); 856 break; 857 858 case TGSI_TOKEN_TYPE_DECLARATION: 859 if (token->FullDeclaration.Declaration.File == TGSI_FILE_CONSTANT) { 860 if (token->FullDeclaration.Range.Last >= I915_MAX_CONSTANT) { 861 i915_program_error(p, "Exceeded %d max uniforms", 862 I915_MAX_CONSTANT); 863 } else { 864 uint32_t i; 865 for (i = token->FullDeclaration.Range.First; 866 i <= token->FullDeclaration.Range.Last; i++) { 867 ifs->constant_flags[i] = I915_CONSTFLAG_USER; 868 ifs->num_constants = MAX2(ifs->num_constants, i + 1); 869 } 870 } 871 } else if (token->FullDeclaration.Declaration.File == 872 TGSI_FILE_TEMPORARY) { 873 if (token->FullDeclaration.Range.Last >= I915_MAX_TEMPORARY) { 874 i915_program_error(p, "Exceeded %d max TGSI temps", 875 I915_MAX_TEMPORARY); 876 } else { 877 uint32_t i; 878 for (i = token->FullDeclaration.Range.First; 879 i <= token->FullDeclaration.Range.Last; i++) { 880 /* XXX just use shader->info->file_mask[TGSI_FILE_TEMPORARY] */ 881 p->temp_flag |= (1 << i); /* mark temp as used */ 882 } 883 } 884 } 885 break; 886 887 case TGSI_TOKEN_TYPE_IMMEDIATE: { 888 const struct tgsi_full_immediate *imm = &token->FullImmediate; 889 const uint32_t pos = p->num_immediates++; 890 uint32_t j; 891 assert(imm->Immediate.NrTokens <= 4 + 1); 892 for (j = 0; j < imm->Immediate.NrTokens - 1; j++) { 893 p->immediates[pos][j] = imm->u[j].Float; 894 } 895 } break; 896 897 case TGSI_TOKEN_TYPE_INSTRUCTION: 898 if (p->first_instruction) { 899 /* resolve location of immediates */ 900 uint32_t i, j; 901 for (i = 0; i < p->num_immediates; i++) { 902 /* find constant slot for this immediate */ 903 for (j = 0; j < I915_MAX_CONSTANT; j++) { 904 if (ifs->constant_flags[j] == 0x0) { 905 memcpy(ifs->constants[j], p->immediates[i], 906 4 * sizeof(float)); 907 /*printf("immediate %d maps to const %d\n", i, j);*/ 908 ifs->constant_flags[j] = 0xf; /* all four comps used */ 909 p->immediates_map[i] = j; 910 ifs->num_constants = MAX2(ifs->num_constants, j + 1); 911 break; 912 } 913 } 914 if (j == I915_MAX_CONSTANT) { 915 i915_program_error(p, "Exceeded %d max uniforms and immediates.", 916 I915_MAX_CONSTANT); 917 } 918 } 919 920 p->first_instruction = false; 921 } 922 923 i915_translate_instruction(p, &token->FullInstruction, fs); 924 break; 925 926 default: 927 assert(0); 928 } 929} 930 931/** 932 * Translate TGSI fragment shader into i915 hardware instructions. 933 * \param p the translation state 934 * \param tokens the TGSI token array 935 */ 936static void 937i915_translate_instructions(struct i915_fp_compile *p, 938 const struct i915_token_list *tokens, 939 struct i915_fragment_shader *fs) 940{ 941 int i; 942 for (i = 0; i < tokens->NumTokens && !p->error; i++) { 943 i915_translate_token(p, &tokens->Tokens[i], fs); 944 } 945} 946 947static struct i915_fp_compile * 948i915_init_compile(struct i915_context *i915, struct i915_fragment_shader *ifs) 949{ 950 struct i915_fp_compile *p = CALLOC_STRUCT(i915_fp_compile); 951 int i; 952 953 p->shader = ifs; 954 955 /* Put new constants at end of const buffer, growing downward. 956 * The problem is we don't know how many user-defined constants might 957 * be specified with pipe->set_constant_buffer(). 958 * Should pre-scan the user's program to determine the highest-numbered 959 * constant referenced. 960 */ 961 ifs->num_constants = 0; 962 memset(ifs->constant_flags, 0, sizeof(ifs->constant_flags)); 963 964 memset(&p->register_phases, 0, sizeof(p->register_phases)); 965 966 for (i = 0; i < I915_TEX_UNITS; i++) 967 ifs->texcoords[i].semantic = -1; 968 969 p->log_program_errors = !i915->no_log_program_errors; 970 971 p->first_instruction = true; 972 973 p->nr_tex_indirect = 1; /* correct? */ 974 p->nr_tex_insn = 0; 975 p->nr_alu_insn = 0; 976 p->nr_decl_insn = 0; 977 978 p->csr = p->program; 979 p->decl = p->declarations; 980 p->decl_s = 0; 981 p->decl_t = 0; 982 p->temp_flag = ~0x0U << I915_MAX_TEMPORARY; 983 p->utemp_flag = ~0x7; 984 985 /* initialize the first program word */ 986 *(p->decl++) = _3DSTATE_PIXEL_SHADER_PROGRAM; 987 988 return p; 989} 990 991/* Copy compile results to the fragment program struct and destroy the 992 * compilation context. 993 */ 994static void 995i915_fini_compile(struct i915_context *i915, struct i915_fp_compile *p) 996{ 997 struct i915_fragment_shader *ifs = p->shader; 998 unsigned long program_size = (unsigned long)(p->csr - p->program); 999 unsigned long decl_size = (unsigned long)(p->decl - p->declarations); 1000 1001 if (p->nr_tex_indirect > I915_MAX_TEX_INDIRECT) 1002 debug_printf("Exceeded max nr indirect texture lookups\n"); 1003 1004 if (p->nr_tex_insn > I915_MAX_TEX_INSN) 1005 i915_program_error(p, "Exceeded max TEX instructions"); 1006 1007 if (p->nr_alu_insn > I915_MAX_ALU_INSN) 1008 i915_program_error(p, "Exceeded max ALU instructions"); 1009 1010 if (p->nr_decl_insn > I915_MAX_DECL_INSN) 1011 i915_program_error(p, "Exceeded max DECL instructions"); 1012 1013 /* hw doesn't seem to like empty frag programs (num_instructions == 1 is just 1014 * TGSI_END), even when the depth write fixup gets emitted below - maybe that 1015 * one is fishy, too? 1016 */ 1017 if (ifs->info.num_instructions == 1) 1018 i915_program_error(p, "Empty fragment shader"); 1019 1020 if (p->error) { 1021 p->NumNativeInstructions = 0; 1022 p->NumNativeAluInstructions = 0; 1023 p->NumNativeTexInstructions = 0; 1024 p->NumNativeTexIndirections = 0; 1025 1026 i915_use_passthrough_shader(ifs); 1027 } else { 1028 p->NumNativeInstructions = 1029 p->nr_alu_insn + p->nr_tex_insn + p->nr_decl_insn; 1030 p->NumNativeAluInstructions = p->nr_alu_insn; 1031 p->NumNativeTexInstructions = p->nr_tex_insn; 1032 p->NumNativeTexIndirections = p->nr_tex_indirect; 1033 1034 /* patch in the program length */ 1035 p->declarations[0] |= program_size + decl_size - 2; 1036 1037 /* Copy compilation results to fragment program struct: 1038 */ 1039 assert(!ifs->program); 1040 1041 ifs->program_len = decl_size + program_size; 1042 ifs->program = (uint32_t *)MALLOC(ifs->program_len * sizeof(uint32_t)); 1043 memcpy(ifs->program, p->declarations, decl_size * sizeof(uint32_t)); 1044 memcpy(&ifs->program[decl_size], p->program, 1045 program_size * sizeof(uint32_t)); 1046 1047 util_debug_message( 1048 &i915->debug, SHADER_INFO, 1049 "%s shader: %d inst, %d tex, %d tex_indirect, %d temps, %d const", 1050 _mesa_shader_stage_to_abbrev(MESA_SHADER_FRAGMENT), (int)program_size, 1051 p->nr_tex_insn, p->nr_tex_indirect, 1052 p->shader->info.file_max[TGSI_FILE_TEMPORARY] + 1, 1053 ifs->num_constants); 1054 } 1055 1056 /* Release the compilation struct: 1057 */ 1058 FREE(p); 1059} 1060 1061/** 1062 * Rather than trying to intercept and jiggle depth writes during 1063 * emit, just move the value into its correct position at the end of 1064 * the program: 1065 */ 1066static void 1067i915_fixup_depth_write(struct i915_fp_compile *p) 1068{ 1069 for (int i = 0; i < p->shader->info.num_outputs; i++) { 1070 if (p->shader->info.output_semantic_name[i] != TGSI_SEMANTIC_POSITION) 1071 continue; 1072 1073 const uint32_t depth = UREG(REG_TYPE_OD, 0); 1074 1075 i915_emit_arith(p, A0_MOV, /* opcode */ 1076 depth, /* dest reg */ 1077 A0_DEST_CHANNEL_W, /* write mask */ 1078 0, /* saturate? */ 1079 swizzle(depth, X, Y, Z, Z), /* src0 */ 1080 0, 0 /* src1, src2 */); 1081 } 1082} 1083 1084void 1085i915_translate_fragment_program(struct i915_context *i915, 1086 struct i915_fragment_shader *fs) 1087{ 1088 struct i915_fp_compile *p; 1089 const struct tgsi_token *tokens = fs->state.tokens; 1090 struct i915_token_list *i_tokens; 1091 1092 if (I915_DBG_ON(DBG_FS)) { 1093 mesa_logi("TGSI fragment shader:"); 1094 tgsi_dump(tokens, 0); 1095 } 1096 1097 p = i915_init_compile(i915, fs); 1098 1099 i_tokens = i915_optimize(tokens); 1100 i915_translate_instructions(p, i_tokens, fs); 1101 i915_fixup_depth_write(p); 1102 1103 i915_fini_compile(i915, p); 1104 i915_optimize_free(i_tokens); 1105 1106 if (I915_DBG_ON(DBG_FS)) { 1107 mesa_logi("i915 fragment shader with %d constants%s", fs->num_constants, 1108 fs->num_constants ? ":" : ""); 1109 1110 for (int i = 0; i < I915_MAX_CONSTANT; i++) { 1111 if (fs->constant_flags[i] && 1112 fs->constant_flags[i] != I915_CONSTFLAG_USER) { 1113 mesa_logi("\t\tC[%d] = { %f, %f, %f, %f }", i, fs->constants[i][0], 1114 fs->constants[i][1], fs->constants[i][2], 1115 fs->constants[i][3]); 1116 } 1117 } 1118 i915_disassemble_program(fs->program, fs->program_len); 1119 } 1120} 1121