1/************************************************************************** 2 * 3 * Copyright 2011 The Chromium OS authors. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "i915_context.h" 29#include "i915_fpc.h" 30#include "i915_reg.h" 31 32#include "pipe/p_shader_tokens.h" 33#include "tgsi/tgsi_dump.h" 34#include "tgsi/tgsi_exec.h" 35#include "tgsi/tgsi_parse.h" 36#include "util/u_math.h" 37#include "util/u_memory.h" 38#include "util/u_string.h" 39 40struct i915_optimize_context { 41 int first_write[TGSI_EXEC_NUM_TEMPS]; 42 int last_read[TGSI_EXEC_NUM_TEMPS]; 43}; 44 45static bool 46same_src_dst_reg(struct i915_full_src_register *s1, 47 struct i915_full_dst_register *d1) 48{ 49 return (s1->Register.File == d1->Register.File && 50 s1->Register.Indirect == d1->Register.Indirect && 51 s1->Register.Dimension == d1->Register.Dimension && 52 s1->Register.Index == d1->Register.Index); 53} 54 55static bool 56same_dst_reg(struct i915_full_dst_register *d1, 57 struct i915_full_dst_register *d2) 58{ 59 return (d1->Register.File == d2->Register.File && 60 d1->Register.Indirect == d2->Register.Indirect && 61 d1->Register.Dimension == d2->Register.Dimension && 62 d1->Register.Index == d2->Register.Index); 63} 64 65static bool 66same_src_reg(struct i915_full_src_register *d1, 67 struct i915_full_src_register *d2) 68{ 69 return (d1->Register.File == d2->Register.File && 70 d1->Register.Indirect == d2->Register.Indirect && 71 d1->Register.Dimension == d2->Register.Dimension && 72 d1->Register.Index == d2->Register.Index && 73 d1->Register.Absolute == d2->Register.Absolute && 74 d1->Register.Negate == d2->Register.Negate); 75} 76 77static const struct { 78 bool is_texture; 79 bool commutes; 80 unsigned neutral_element; 81 unsigned num_dst; 82 unsigned num_src; 83} op_table[TGSI_OPCODE_LAST] = { 84 [TGSI_OPCODE_ADD] = {false, true, TGSI_SWIZZLE_ZERO, 1, 2}, 85 [TGSI_OPCODE_CEIL] = {false, false, 0, 1, 1}, 86 [TGSI_OPCODE_CMP] = {false, false, 0, 1, 2}, 87 [TGSI_OPCODE_COS] = {false, false, 0, 1, 1}, 88 [TGSI_OPCODE_DDX] = {false, false, 0, 1, 0}, 89 [TGSI_OPCODE_DDY] = {false, false, 0, 1, 0}, 90 [TGSI_OPCODE_DP2] = {false, true, TGSI_SWIZZLE_ONE, 1, 2}, 91 [TGSI_OPCODE_DP3] = {false, true, TGSI_SWIZZLE_ONE, 1, 2}, 92 [TGSI_OPCODE_DP4] = {false, true, TGSI_SWIZZLE_ONE, 1, 2}, 93 [TGSI_OPCODE_DST] = {false, false, 0, 1, 2}, 94 [TGSI_OPCODE_END] = {false, false, 0, 0, 0}, 95 [TGSI_OPCODE_EX2] = {false, false, 0, 1, 1}, 96 [TGSI_OPCODE_FLR] = {false, false, 0, 1, 1}, 97 [TGSI_OPCODE_FRC] = {false, false, 0, 1, 1}, 98 [TGSI_OPCODE_KILL_IF] = {false, false, 0, 0, 1}, 99 [TGSI_OPCODE_KILL] = {false, false, 0, 0, 0}, 100 [TGSI_OPCODE_LG2] = {false, false, 0, 1, 1}, 101 [TGSI_OPCODE_LIT] = {false, false, 0, 1, 1}, 102 [TGSI_OPCODE_LRP] = {false, false, 0, 1, 3}, 103 [TGSI_OPCODE_MAX] = {false, false, 0, 1, 2}, 104 [TGSI_OPCODE_MAD] = {false, false, 0, 1, 3}, 105 [TGSI_OPCODE_MIN] = {false, false, 0, 1, 2}, 106 [TGSI_OPCODE_MOV] = {false, false, 0, 1, 1}, 107 [TGSI_OPCODE_MUL] = {false, true, TGSI_SWIZZLE_ONE, 1, 2}, 108 [TGSI_OPCODE_NOP] = {false, false, 0, 0, 0}, 109 [TGSI_OPCODE_POW] = {false, false, 0, 1, 2}, 110 [TGSI_OPCODE_RCP] = {false, false, 0, 1, 1}, 111 [TGSI_OPCODE_RET] = {false, false, 0, 0, 0}, 112 [TGSI_OPCODE_RSQ] = {false, false, 0, 1, 1}, 113 [TGSI_OPCODE_SEQ] = {false, false, 0, 1, 2}, 114 [TGSI_OPCODE_SGE] = {false, false, 0, 1, 2}, 115 [TGSI_OPCODE_SGT] = {false, false, 0, 1, 2}, 116 [TGSI_OPCODE_SIN] = {false, false, 0, 1, 1}, 117 [TGSI_OPCODE_SLE] = {false, false, 0, 1, 2}, 118 [TGSI_OPCODE_SLT] = {false, false, 0, 1, 2}, 119 [TGSI_OPCODE_SNE] = {false, false, 0, 1, 2}, 120 [TGSI_OPCODE_SSG] = {false, false, 0, 1, 1}, 121 [TGSI_OPCODE_TEX] = {true, false, 0, 1, 2}, 122 [TGSI_OPCODE_TRUNC] = {false, false, 0, 1, 1}, 123 [TGSI_OPCODE_TXB] = {true, false, 0, 1, 2}, 124 [TGSI_OPCODE_TXP] = {true, false, 0, 1, 2}, 125}; 126 127static bool 128op_has_dst(unsigned opcode) 129{ 130 return (op_table[opcode].num_dst > 0); 131} 132 133static int 134op_num_dst(unsigned opcode) 135{ 136 return op_table[opcode].num_dst; 137} 138 139static int 140op_num_src(unsigned opcode) 141{ 142 return op_table[opcode].num_src; 143} 144 145static bool 146op_commutes(unsigned opcode) 147{ 148 return op_table[opcode].commutes; 149} 150 151static bool 152is_unswizzled(struct i915_full_src_register *r, unsigned write_mask) 153{ 154 if (write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X) 155 return false; 156 if (write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y) 157 return false; 158 if (write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z) 159 return false; 160 if (write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W) 161 return false; 162 return true; 163} 164 165static bool 166op_is_texture(unsigned opcode) 167{ 168 return op_table[opcode].is_texture; 169} 170 171static unsigned 172op_neutral_element(unsigned opcode) 173{ 174 unsigned ne = op_table[opcode].neutral_element; 175 if (!ne) { 176 debug_printf("No neutral element for opcode %d\n", opcode); 177 ne = TGSI_SWIZZLE_ZERO; 178 } 179 return ne; 180} 181 182/* 183 * Sets the swizzle to the neutral element for the operation for the bits 184 * of writemask which are set, swizzle to identity otherwise. 185 */ 186static void 187set_neutral_element_swizzle(struct i915_full_src_register *r, 188 unsigned write_mask, unsigned neutral) 189{ 190 if (write_mask & TGSI_WRITEMASK_X) 191 r->Register.SwizzleX = neutral; 192 else 193 r->Register.SwizzleX = TGSI_SWIZZLE_X; 194 195 if (write_mask & TGSI_WRITEMASK_Y) 196 r->Register.SwizzleY = neutral; 197 else 198 r->Register.SwizzleY = TGSI_SWIZZLE_Y; 199 200 if (write_mask & TGSI_WRITEMASK_Z) 201 r->Register.SwizzleZ = neutral; 202 else 203 r->Register.SwizzleZ = TGSI_SWIZZLE_Z; 204 205 if (write_mask & TGSI_WRITEMASK_W) 206 r->Register.SwizzleW = neutral; 207 else 208 r->Register.SwizzleW = TGSI_SWIZZLE_W; 209} 210 211static void 212copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i) 213{ 214 o->File = i->File; 215 o->Indirect = i->Indirect; 216 o->Dimension = i->Dimension; 217 o->Index = i->Index; 218 o->SwizzleX = i->SwizzleX; 219 o->SwizzleY = i->SwizzleY; 220 o->SwizzleZ = i->SwizzleZ; 221 o->SwizzleW = i->SwizzleW; 222 o->Absolute = i->Absolute; 223 o->Negate = i->Negate; 224} 225 226static void 227copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i) 228{ 229 o->File = i->File; 230 o->WriteMask = i->WriteMask; 231 o->Indirect = i->Indirect; 232 o->Dimension = i->Dimension; 233 o->Index = i->Index; 234} 235 236static void 237copy_instruction(struct i915_full_instruction *o, 238 const struct tgsi_full_instruction *i) 239{ 240 memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction)); 241 memcpy(&o->Texture, &i->Texture, sizeof(o->Texture)); 242 243 copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register); 244 245 copy_src_reg(&o->Src[0].Register, &i->Src[0].Register); 246 copy_src_reg(&o->Src[1].Register, &i->Src[1].Register); 247 copy_src_reg(&o->Src[2].Register, &i->Src[2].Register); 248} 249 250static void 251copy_token(union i915_full_token *o, union tgsi_full_token *i) 252{ 253 if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 254 memcpy(o, i, sizeof(*o)); 255 else 256 copy_instruction(&o->FullInstruction, &i->FullInstruction); 257} 258 259static void 260liveness_mark_written(struct i915_optimize_context *ctx, 261 struct i915_full_dst_register *dst_reg, int pos) 262{ 263 int dst_reg_index; 264 if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) { 265 dst_reg_index = dst_reg->Register.Index; 266 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS); 267 /* dead -> live transition */ 268 if (ctx->first_write[dst_reg_index] != -1) 269 ctx->first_write[dst_reg_index] = pos; 270 } 271} 272 273static void 274liveness_mark_read(struct i915_optimize_context *ctx, 275 struct i915_full_src_register *src_reg, int pos) 276{ 277 int src_reg_index; 278 if (src_reg->Register.File == TGSI_FILE_TEMPORARY) { 279 src_reg_index = src_reg->Register.Index; 280 assert(src_reg_index < TGSI_EXEC_NUM_TEMPS); 281 /* live -> dead transition */ 282 if (ctx->last_read[src_reg_index] != -1) 283 ctx->last_read[src_reg_index] = pos; 284 } 285} 286 287static void 288liveness_analysis(struct i915_optimize_context *ctx, 289 struct i915_token_list *tokens) 290{ 291 struct i915_full_dst_register *dst_reg; 292 struct i915_full_src_register *src_reg; 293 union i915_full_token *current; 294 unsigned opcode; 295 int num_dst, num_src; 296 int i = 0; 297 298 for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) { 299 ctx->first_write[i] = -1; 300 ctx->last_read[i] = -1; 301 } 302 303 for (i = 0; i < tokens->NumTokens; i++) { 304 current = &tokens->Tokens[i]; 305 306 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 307 continue; 308 309 opcode = current->FullInstruction.Instruction.Opcode; 310 num_dst = op_num_dst(opcode); 311 312 switch (num_dst) { 313 case 1: 314 dst_reg = ¤t->FullInstruction.Dst[0]; 315 liveness_mark_written(ctx, dst_reg, i); 316 FALLTHROUGH; 317 case 0: 318 break; 319 default: 320 debug_printf("Op %d has %d dst regs\n", opcode, num_dst); 321 break; 322 } 323 } 324 325 for (i = tokens->NumTokens - 1; i >= 0; i--) { 326 current = &tokens->Tokens[i]; 327 328 if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) 329 continue; 330 331 opcode = current->FullInstruction.Instruction.Opcode; 332 num_src = op_num_src(opcode); 333 334 switch (num_src) { 335 case 3: 336 src_reg = ¤t->FullInstruction.Src[2]; 337 liveness_mark_read(ctx, src_reg, i); 338 FALLTHROUGH; 339 case 2: 340 src_reg = ¤t->FullInstruction.Src[1]; 341 liveness_mark_read(ctx, src_reg, i); 342 FALLTHROUGH; 343 case 1: 344 src_reg = ¤t->FullInstruction.Src[0]; 345 liveness_mark_read(ctx, src_reg, i); 346 FALLTHROUGH; 347 case 0: 348 break; 349 default: 350 debug_printf("Op %d has %d src regs\n", opcode, num_src); 351 break; 352 } 353 } 354} 355 356static int 357unused_from(struct i915_optimize_context *ctx, 358 struct i915_full_dst_register *dst_reg, int from) 359{ 360 int dst_reg_index = dst_reg->Register.Index; 361 assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS); 362 return (from >= ctx->last_read[dst_reg_index]); 363} 364 365/* Returns a mask with the components used for a texture access instruction */ 366static unsigned 367i915_tex_mask(union i915_full_token *instr) 368{ 369 return i915_coord_mask(instr->FullInstruction.Instruction.Opcode, 370 instr->FullInstruction.Texture.Texture); 371} 372 373static bool 374target_is_texture2d(uint32_t tex) 375{ 376 switch (tex) { 377 case TGSI_TEXTURE_2D: 378 case TGSI_TEXTURE_RECT: 379 return true; 380 default: 381 return false; 382 } 383} 384 385/* 386 * Optimize away useless indirect texture reads: 387 * MOV TEMP[0].xy, IN[0].xyyy 388 * TEX TEMP[1], TEMP[0], SAMP[0], 2D 389 * into: 390 * TEX TEMP[1], IN[0], SAMP[0], 2D 391 * 392 * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/.. 393 */ 394static void 395i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx, 396 struct i915_token_list *tokens, int index) 397{ 398 union i915_full_token *current = &tokens->Tokens[index - 1]; 399 union i915_full_token *next = &tokens->Tokens[index]; 400 401 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 402 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 403 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 404 op_is_texture(next->FullInstruction.Instruction.Opcode) && 405 target_is_texture2d(next->FullInstruction.Texture.Texture) && 406 same_src_dst_reg(&next->FullInstruction.Src[0], 407 ¤t->FullInstruction.Dst[0]) && 408 is_unswizzled(¤t->FullInstruction.Src[0], i915_tex_mask(next)) && 409 unused_from(ctx, ¤t->FullInstruction.Dst[0], index)) { 410 memcpy(&next->FullInstruction.Src[0], ¤t->FullInstruction.Src[0], 411 sizeof(struct i915_src_register)); 412 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 413 } 414} 415 416/* 417 * Optimize away things like: 418 * MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0]) 419 * MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0]) 420 * into: 421 * NOP 422 * MOV OUT[0].xyw, TEMP[1].xyww 423 */ 424static void 425i915_fpc_optimize_mov_after_mov(union i915_full_token *current, 426 union i915_full_token *next) 427{ 428 struct i915_full_src_register *src_reg1, *src_reg2; 429 struct i915_full_dst_register *dst_reg1, *dst_reg2; 430 unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w; 431 432 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 433 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 434 current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 435 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 436 current->FullInstruction.Instruction.Saturate == 437 next->FullInstruction.Instruction.Saturate && 438 same_dst_reg(&next->FullInstruction.Dst[0], 439 ¤t->FullInstruction.Dst[0]) && 440 same_src_reg(&next->FullInstruction.Src[0], 441 ¤t->FullInstruction.Src[0]) && 442 !same_src_dst_reg(¤t->FullInstruction.Src[0], 443 ¤t->FullInstruction.Dst[0])) { 444 src_reg1 = ¤t->FullInstruction.Src[0]; 445 dst_reg1 = ¤t->FullInstruction.Dst[0]; 446 src_reg2 = &next->FullInstruction.Src[0]; 447 dst_reg2 = &next->FullInstruction.Dst[0]; 448 449 /* Start with swizzles from the first mov */ 450 swizzle_x = src_reg1->Register.SwizzleX; 451 swizzle_y = src_reg1->Register.SwizzleY; 452 swizzle_z = src_reg1->Register.SwizzleZ; 453 swizzle_w = src_reg1->Register.SwizzleW; 454 455 /* Pile the second mov on top */ 456 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X) 457 swizzle_x = src_reg2->Register.SwizzleX; 458 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y) 459 swizzle_y = src_reg2->Register.SwizzleY; 460 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z) 461 swizzle_z = src_reg2->Register.SwizzleZ; 462 if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W) 463 swizzle_w = src_reg2->Register.SwizzleW; 464 465 dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask; 466 src_reg2->Register.SwizzleX = swizzle_x; 467 src_reg2->Register.SwizzleY = swizzle_y; 468 src_reg2->Register.SwizzleZ = swizzle_z; 469 src_reg2->Register.SwizzleW = swizzle_w; 470 471 current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 472 473 return; 474 } 475} 476 477/* 478 * Optimize away things like: 479 * MUL OUT[0].xyz, TEMP[1], TEMP[2] 480 * MOV OUT[0].w, TEMP[2] 481 * into: 482 * MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2] 483 * This is useful for optimizing texenv. 484 */ 485static void 486i915_fpc_optimize_mov_after_alu(union i915_full_token *current, 487 union i915_full_token *next) 488{ 489 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 490 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 491 op_commutes(current->FullInstruction.Instruction.Opcode) && 492 current->FullInstruction.Instruction.Saturate == 493 next->FullInstruction.Instruction.Saturate && 494 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 495 same_dst_reg(&next->FullInstruction.Dst[0], 496 ¤t->FullInstruction.Dst[0]) && 497 same_src_reg(&next->FullInstruction.Src[0], 498 ¤t->FullInstruction.Src[1]) && 499 !same_src_dst_reg(&next->FullInstruction.Src[0], 500 ¤t->FullInstruction.Dst[0]) && 501 is_unswizzled(¤t->FullInstruction.Src[0], 502 current->FullInstruction.Dst[0].Register.WriteMask) && 503 is_unswizzled(¤t->FullInstruction.Src[1], 504 current->FullInstruction.Dst[0].Register.WriteMask) && 505 is_unswizzled(&next->FullInstruction.Src[0], 506 next->FullInstruction.Dst[0].Register.WriteMask)) { 507 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 508 509 set_neutral_element_swizzle(¤t->FullInstruction.Src[1], 0, 0); 510 set_neutral_element_swizzle( 511 ¤t->FullInstruction.Src[0], 512 next->FullInstruction.Dst[0].Register.WriteMask, 513 op_neutral_element(current->FullInstruction.Instruction.Opcode)); 514 515 current->FullInstruction.Dst[0].Register.WriteMask = 516 current->FullInstruction.Dst[0].Register.WriteMask | 517 next->FullInstruction.Dst[0].Register.WriteMask; 518 return; 519 } 520 521 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 522 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 523 op_commutes(current->FullInstruction.Instruction.Opcode) && 524 current->FullInstruction.Instruction.Saturate == 525 next->FullInstruction.Instruction.Saturate && 526 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 527 same_dst_reg(&next->FullInstruction.Dst[0], 528 ¤t->FullInstruction.Dst[0]) && 529 same_src_reg(&next->FullInstruction.Src[0], 530 ¤t->FullInstruction.Src[0]) && 531 !same_src_dst_reg(&next->FullInstruction.Src[0], 532 ¤t->FullInstruction.Dst[0]) && 533 is_unswizzled(¤t->FullInstruction.Src[0], 534 current->FullInstruction.Dst[0].Register.WriteMask) && 535 is_unswizzled(¤t->FullInstruction.Src[1], 536 current->FullInstruction.Dst[0].Register.WriteMask) && 537 is_unswizzled(&next->FullInstruction.Src[0], 538 next->FullInstruction.Dst[0].Register.WriteMask)) { 539 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 540 541 set_neutral_element_swizzle(¤t->FullInstruction.Src[0], 0, 0); 542 set_neutral_element_swizzle( 543 ¤t->FullInstruction.Src[1], 544 next->FullInstruction.Dst[0].Register.WriteMask, 545 op_neutral_element(current->FullInstruction.Instruction.Opcode)); 546 547 current->FullInstruction.Dst[0].Register.WriteMask = 548 current->FullInstruction.Dst[0].Register.WriteMask | 549 next->FullInstruction.Dst[0].Register.WriteMask; 550 return; 551 } 552} 553 554/* 555 * Optimize away things like: 556 * MOV TEMP[0].xyz TEMP[0].xyzx 557 * into: 558 * NOP 559 */ 560static bool 561i915_fpc_useless_mov(union tgsi_full_token *tgsi_current) 562{ 563 union i915_full_token current; 564 copy_token(¤t, tgsi_current); 565 if (current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 566 current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 567 op_has_dst(current.FullInstruction.Instruction.Opcode) && 568 !current.FullInstruction.Instruction.Saturate && 569 current.FullInstruction.Src[0].Register.Absolute == 0 && 570 current.FullInstruction.Src[0].Register.Negate == 0 && 571 is_unswizzled(¤t.FullInstruction.Src[0], 572 current.FullInstruction.Dst[0].Register.WriteMask) && 573 same_src_dst_reg(¤t.FullInstruction.Src[0], 574 ¤t.FullInstruction.Dst[0])) { 575 return true; 576 } 577 return false; 578} 579 580/* 581 * Optimize away things like: 582 * *** TEMP[0], TEMP[1], TEMP[2] 583 * MOV OUT[0] TEMP[0] 584 * into: 585 * *** OUT[0], TEMP[1], TEMP[2] 586 */ 587static void 588i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx, 589 struct i915_token_list *tokens, 590 int index) 591{ 592 union i915_full_token *current = &tokens->Tokens[index - 1]; 593 union i915_full_token *next = &tokens->Tokens[index]; 594 595 // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]); 596 if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 597 next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION && 598 next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV && 599 op_has_dst(current->FullInstruction.Instruction.Opcode) && 600 !next->FullInstruction.Instruction.Saturate && 601 next->FullInstruction.Src[0].Register.Absolute == 0 && 602 next->FullInstruction.Src[0].Register.Negate == 0 && 603 unused_from(ctx, ¤t->FullInstruction.Dst[0], index) && 604 current->FullInstruction.Dst[0].Register.WriteMask == 605 TGSI_WRITEMASK_XYZW && 606 is_unswizzled(&next->FullInstruction.Src[0], 607 next->FullInstruction.Dst[0].Register.WriteMask) && 608 current->FullInstruction.Dst[0].Register.WriteMask == 609 next->FullInstruction.Dst[0].Register.WriteMask && 610 same_src_dst_reg(&next->FullInstruction.Src[0], 611 ¤t->FullInstruction.Dst[0])) { 612 next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP; 613 614 current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0]; 615 return; 616 } 617} 618 619struct i915_token_list * 620i915_optimize(const struct tgsi_token *tokens) 621{ 622 struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list)); 623 struct tgsi_parse_context parse; 624 struct i915_optimize_context *ctx; 625 int i = 0; 626 627 ctx = malloc(sizeof(*ctx)); 628 629 out_tokens->NumTokens = 0; 630 631 /* Count the tokens */ 632 tgsi_parse_init(&parse, tokens); 633 while (!tgsi_parse_end_of_tokens(&parse)) { 634 tgsi_parse_token(&parse); 635 out_tokens->NumTokens++; 636 } 637 tgsi_parse_free(&parse); 638 639 /* Allocate our tokens */ 640 out_tokens->Tokens = 641 MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens); 642 643 tgsi_parse_init(&parse, tokens); 644 while (!tgsi_parse_end_of_tokens(&parse)) { 645 tgsi_parse_token(&parse); 646 647 if (i915_fpc_useless_mov(&parse.FullToken)) { 648 out_tokens->NumTokens--; 649 continue; 650 } 651 652 copy_token(&out_tokens->Tokens[i], &parse.FullToken); 653 654 i++; 655 } 656 tgsi_parse_free(&parse); 657 658 liveness_analysis(ctx, out_tokens); 659 660 i = 1; 661 while (i < out_tokens->NumTokens) { 662 i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i); 663 i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i - 1], 664 &out_tokens->Tokens[i]); 665 i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i - 1], 666 &out_tokens->Tokens[i]); 667 i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i); 668 i++; 669 } 670 671 free(ctx); 672 673 return out_tokens; 674} 675 676void 677i915_optimize_free(struct i915_token_list *tokens) 678{ 679 free(tokens->Tokens); 680 free(tokens); 681} 682