1/**************************************************************************
2 *
3 * Copyright 2011 The Chromium OS authors.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL GOOGLE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "i915_context.h"
29#include "i915_fpc.h"
30#include "i915_reg.h"
31
32#include "pipe/p_shader_tokens.h"
33#include "tgsi/tgsi_dump.h"
34#include "tgsi/tgsi_exec.h"
35#include "tgsi/tgsi_parse.h"
36#include "util/u_math.h"
37#include "util/u_memory.h"
38#include "util/u_string.h"
39
40struct i915_optimize_context {
41   int first_write[TGSI_EXEC_NUM_TEMPS];
42   int last_read[TGSI_EXEC_NUM_TEMPS];
43};
44
45static bool
46same_src_dst_reg(struct i915_full_src_register *s1,
47                 struct i915_full_dst_register *d1)
48{
49   return (s1->Register.File == d1->Register.File &&
50           s1->Register.Indirect == d1->Register.Indirect &&
51           s1->Register.Dimension == d1->Register.Dimension &&
52           s1->Register.Index == d1->Register.Index);
53}
54
55static bool
56same_dst_reg(struct i915_full_dst_register *d1,
57             struct i915_full_dst_register *d2)
58{
59   return (d1->Register.File == d2->Register.File &&
60           d1->Register.Indirect == d2->Register.Indirect &&
61           d1->Register.Dimension == d2->Register.Dimension &&
62           d1->Register.Index == d2->Register.Index);
63}
64
65static bool
66same_src_reg(struct i915_full_src_register *d1,
67             struct i915_full_src_register *d2)
68{
69   return (d1->Register.File == d2->Register.File &&
70           d1->Register.Indirect == d2->Register.Indirect &&
71           d1->Register.Dimension == d2->Register.Dimension &&
72           d1->Register.Index == d2->Register.Index &&
73           d1->Register.Absolute == d2->Register.Absolute &&
74           d1->Register.Negate == d2->Register.Negate);
75}
76
77static const struct {
78   bool is_texture;
79   bool commutes;
80   unsigned neutral_element;
81   unsigned num_dst;
82   unsigned num_src;
83} op_table[TGSI_OPCODE_LAST] = {
84   [TGSI_OPCODE_ADD] = {false, true, TGSI_SWIZZLE_ZERO, 1, 2},
85   [TGSI_OPCODE_CEIL] = {false, false, 0, 1, 1},
86   [TGSI_OPCODE_CMP] = {false, false, 0, 1, 2},
87   [TGSI_OPCODE_COS] = {false, false, 0, 1, 1},
88   [TGSI_OPCODE_DDX] = {false, false, 0, 1, 0},
89   [TGSI_OPCODE_DDY] = {false, false, 0, 1, 0},
90   [TGSI_OPCODE_DP2] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
91   [TGSI_OPCODE_DP3] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
92   [TGSI_OPCODE_DP4] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
93   [TGSI_OPCODE_DST] = {false, false, 0, 1, 2},
94   [TGSI_OPCODE_END] = {false, false, 0, 0, 0},
95   [TGSI_OPCODE_EX2] = {false, false, 0, 1, 1},
96   [TGSI_OPCODE_FLR] = {false, false, 0, 1, 1},
97   [TGSI_OPCODE_FRC] = {false, false, 0, 1, 1},
98   [TGSI_OPCODE_KILL_IF] = {false, false, 0, 0, 1},
99   [TGSI_OPCODE_KILL] = {false, false, 0, 0, 0},
100   [TGSI_OPCODE_LG2] = {false, false, 0, 1, 1},
101   [TGSI_OPCODE_LIT] = {false, false, 0, 1, 1},
102   [TGSI_OPCODE_LRP] = {false, false, 0, 1, 3},
103   [TGSI_OPCODE_MAX] = {false, false, 0, 1, 2},
104   [TGSI_OPCODE_MAD] = {false, false, 0, 1, 3},
105   [TGSI_OPCODE_MIN] = {false, false, 0, 1, 2},
106   [TGSI_OPCODE_MOV] = {false, false, 0, 1, 1},
107   [TGSI_OPCODE_MUL] = {false, true, TGSI_SWIZZLE_ONE, 1, 2},
108   [TGSI_OPCODE_NOP] = {false, false, 0, 0, 0},
109   [TGSI_OPCODE_POW] = {false, false, 0, 1, 2},
110   [TGSI_OPCODE_RCP] = {false, false, 0, 1, 1},
111   [TGSI_OPCODE_RET] = {false, false, 0, 0, 0},
112   [TGSI_OPCODE_RSQ] = {false, false, 0, 1, 1},
113   [TGSI_OPCODE_SEQ] = {false, false, 0, 1, 2},
114   [TGSI_OPCODE_SGE] = {false, false, 0, 1, 2},
115   [TGSI_OPCODE_SGT] = {false, false, 0, 1, 2},
116   [TGSI_OPCODE_SIN] = {false, false, 0, 1, 1},
117   [TGSI_OPCODE_SLE] = {false, false, 0, 1, 2},
118   [TGSI_OPCODE_SLT] = {false, false, 0, 1, 2},
119   [TGSI_OPCODE_SNE] = {false, false, 0, 1, 2},
120   [TGSI_OPCODE_SSG] = {false, false, 0, 1, 1},
121   [TGSI_OPCODE_TEX] = {true, false, 0, 1, 2},
122   [TGSI_OPCODE_TRUNC] = {false, false, 0, 1, 1},
123   [TGSI_OPCODE_TXB] = {true, false, 0, 1, 2},
124   [TGSI_OPCODE_TXP] = {true, false, 0, 1, 2},
125};
126
127static bool
128op_has_dst(unsigned opcode)
129{
130   return (op_table[opcode].num_dst > 0);
131}
132
133static int
134op_num_dst(unsigned opcode)
135{
136   return op_table[opcode].num_dst;
137}
138
139static int
140op_num_src(unsigned opcode)
141{
142   return op_table[opcode].num_src;
143}
144
145static bool
146op_commutes(unsigned opcode)
147{
148   return op_table[opcode].commutes;
149}
150
151static bool
152is_unswizzled(struct i915_full_src_register *r, unsigned write_mask)
153{
154   if (write_mask & TGSI_WRITEMASK_X && r->Register.SwizzleX != TGSI_SWIZZLE_X)
155      return false;
156   if (write_mask & TGSI_WRITEMASK_Y && r->Register.SwizzleY != TGSI_SWIZZLE_Y)
157      return false;
158   if (write_mask & TGSI_WRITEMASK_Z && r->Register.SwizzleZ != TGSI_SWIZZLE_Z)
159      return false;
160   if (write_mask & TGSI_WRITEMASK_W && r->Register.SwizzleW != TGSI_SWIZZLE_W)
161      return false;
162   return true;
163}
164
165static bool
166op_is_texture(unsigned opcode)
167{
168   return op_table[opcode].is_texture;
169}
170
171static unsigned
172op_neutral_element(unsigned opcode)
173{
174   unsigned ne = op_table[opcode].neutral_element;
175   if (!ne) {
176      debug_printf("No neutral element for opcode %d\n", opcode);
177      ne = TGSI_SWIZZLE_ZERO;
178   }
179   return ne;
180}
181
182/*
183 * Sets the swizzle to the neutral element for the operation for the bits
184 * of writemask which are set, swizzle to identity otherwise.
185 */
186static void
187set_neutral_element_swizzle(struct i915_full_src_register *r,
188                            unsigned write_mask, unsigned neutral)
189{
190   if (write_mask & TGSI_WRITEMASK_X)
191      r->Register.SwizzleX = neutral;
192   else
193      r->Register.SwizzleX = TGSI_SWIZZLE_X;
194
195   if (write_mask & TGSI_WRITEMASK_Y)
196      r->Register.SwizzleY = neutral;
197   else
198      r->Register.SwizzleY = TGSI_SWIZZLE_Y;
199
200   if (write_mask & TGSI_WRITEMASK_Z)
201      r->Register.SwizzleZ = neutral;
202   else
203      r->Register.SwizzleZ = TGSI_SWIZZLE_Z;
204
205   if (write_mask & TGSI_WRITEMASK_W)
206      r->Register.SwizzleW = neutral;
207   else
208      r->Register.SwizzleW = TGSI_SWIZZLE_W;
209}
210
211static void
212copy_src_reg(struct i915_src_register *o, const struct tgsi_src_register *i)
213{
214   o->File = i->File;
215   o->Indirect = i->Indirect;
216   o->Dimension = i->Dimension;
217   o->Index = i->Index;
218   o->SwizzleX = i->SwizzleX;
219   o->SwizzleY = i->SwizzleY;
220   o->SwizzleZ = i->SwizzleZ;
221   o->SwizzleW = i->SwizzleW;
222   o->Absolute = i->Absolute;
223   o->Negate = i->Negate;
224}
225
226static void
227copy_dst_reg(struct i915_dst_register *o, const struct tgsi_dst_register *i)
228{
229   o->File = i->File;
230   o->WriteMask = i->WriteMask;
231   o->Indirect = i->Indirect;
232   o->Dimension = i->Dimension;
233   o->Index = i->Index;
234}
235
236static void
237copy_instruction(struct i915_full_instruction *o,
238                 const struct tgsi_full_instruction *i)
239{
240   memcpy(&o->Instruction, &i->Instruction, sizeof(o->Instruction));
241   memcpy(&o->Texture, &i->Texture, sizeof(o->Texture));
242
243   copy_dst_reg(&o->Dst[0].Register, &i->Dst[0].Register);
244
245   copy_src_reg(&o->Src[0].Register, &i->Src[0].Register);
246   copy_src_reg(&o->Src[1].Register, &i->Src[1].Register);
247   copy_src_reg(&o->Src[2].Register, &i->Src[2].Register);
248}
249
250static void
251copy_token(union i915_full_token *o, union tgsi_full_token *i)
252{
253   if (i->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
254      memcpy(o, i, sizeof(*o));
255   else
256      copy_instruction(&o->FullInstruction, &i->FullInstruction);
257}
258
259static void
260liveness_mark_written(struct i915_optimize_context *ctx,
261                      struct i915_full_dst_register *dst_reg, int pos)
262{
263   int dst_reg_index;
264   if (dst_reg->Register.File == TGSI_FILE_TEMPORARY) {
265      dst_reg_index = dst_reg->Register.Index;
266      assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
267      /* dead -> live transition */
268      if (ctx->first_write[dst_reg_index] != -1)
269         ctx->first_write[dst_reg_index] = pos;
270   }
271}
272
273static void
274liveness_mark_read(struct i915_optimize_context *ctx,
275                   struct i915_full_src_register *src_reg, int pos)
276{
277   int src_reg_index;
278   if (src_reg->Register.File == TGSI_FILE_TEMPORARY) {
279      src_reg_index = src_reg->Register.Index;
280      assert(src_reg_index < TGSI_EXEC_NUM_TEMPS);
281      /* live -> dead transition */
282      if (ctx->last_read[src_reg_index] != -1)
283         ctx->last_read[src_reg_index] = pos;
284   }
285}
286
287static void
288liveness_analysis(struct i915_optimize_context *ctx,
289                  struct i915_token_list *tokens)
290{
291   struct i915_full_dst_register *dst_reg;
292   struct i915_full_src_register *src_reg;
293   union i915_full_token *current;
294   unsigned opcode;
295   int num_dst, num_src;
296   int i = 0;
297
298   for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {
299      ctx->first_write[i] = -1;
300      ctx->last_read[i] = -1;
301   }
302
303   for (i = 0; i < tokens->NumTokens; i++) {
304      current = &tokens->Tokens[i];
305
306      if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
307         continue;
308
309      opcode = current->FullInstruction.Instruction.Opcode;
310      num_dst = op_num_dst(opcode);
311
312      switch (num_dst) {
313      case 1:
314         dst_reg = &current->FullInstruction.Dst[0];
315         liveness_mark_written(ctx, dst_reg, i);
316         FALLTHROUGH;
317      case 0:
318         break;
319      default:
320         debug_printf("Op %d has %d dst regs\n", opcode, num_dst);
321         break;
322      }
323   }
324
325   for (i = tokens->NumTokens - 1; i >= 0; i--) {
326      current = &tokens->Tokens[i];
327
328      if (current->Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
329         continue;
330
331      opcode = current->FullInstruction.Instruction.Opcode;
332      num_src = op_num_src(opcode);
333
334      switch (num_src) {
335      case 3:
336         src_reg = &current->FullInstruction.Src[2];
337         liveness_mark_read(ctx, src_reg, i);
338         FALLTHROUGH;
339      case 2:
340         src_reg = &current->FullInstruction.Src[1];
341         liveness_mark_read(ctx, src_reg, i);
342         FALLTHROUGH;
343      case 1:
344         src_reg = &current->FullInstruction.Src[0];
345         liveness_mark_read(ctx, src_reg, i);
346         FALLTHROUGH;
347      case 0:
348         break;
349      default:
350         debug_printf("Op %d has %d src regs\n", opcode, num_src);
351         break;
352      }
353   }
354}
355
356static int
357unused_from(struct i915_optimize_context *ctx,
358            struct i915_full_dst_register *dst_reg, int from)
359{
360   int dst_reg_index = dst_reg->Register.Index;
361   assert(dst_reg_index < TGSI_EXEC_NUM_TEMPS);
362   return (from >= ctx->last_read[dst_reg_index]);
363}
364
365/* Returns a mask with the components used for a texture access instruction */
366static unsigned
367i915_tex_mask(union i915_full_token *instr)
368{
369   return i915_coord_mask(instr->FullInstruction.Instruction.Opcode,
370                          instr->FullInstruction.Texture.Texture);
371}
372
373static bool
374target_is_texture2d(uint32_t tex)
375{
376   switch (tex) {
377   case TGSI_TEXTURE_2D:
378   case TGSI_TEXTURE_RECT:
379      return true;
380   default:
381      return false;
382   }
383}
384
385/*
386 * Optimize away useless indirect texture reads:
387 *    MOV TEMP[0].xy, IN[0].xyyy
388 *    TEX TEMP[1], TEMP[0], SAMP[0], 2D
389 * into:
390 *    TEX TEMP[1], IN[0], SAMP[0], 2D
391 *
392 * note: this only seems to work on 2D/RECT textures, but not SHAADOW2D/1D/..
393 */
394static void
395i915_fpc_optimize_mov_before_tex(struct i915_optimize_context *ctx,
396                                 struct i915_token_list *tokens, int index)
397{
398   union i915_full_token *current = &tokens->Tokens[index - 1];
399   union i915_full_token *next = &tokens->Tokens[index];
400
401   if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
402       next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
403       current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
404       op_is_texture(next->FullInstruction.Instruction.Opcode) &&
405       target_is_texture2d(next->FullInstruction.Texture.Texture) &&
406       same_src_dst_reg(&next->FullInstruction.Src[0],
407                        &current->FullInstruction.Dst[0]) &&
408       is_unswizzled(&current->FullInstruction.Src[0], i915_tex_mask(next)) &&
409       unused_from(ctx, &current->FullInstruction.Dst[0], index)) {
410      memcpy(&next->FullInstruction.Src[0], &current->FullInstruction.Src[0],
411             sizeof(struct i915_src_register));
412      current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
413   }
414}
415
416/*
417 * Optimize away things like:
418 *    MOV TEMP[0].xy, TEMP[1].xyyy (first write for TEMP[0])
419 *    MOV TEMP[0].w, TEMP[1].wwww (last write for TEMP[0])
420 * into:
421 *    NOP
422 *    MOV OUT[0].xyw, TEMP[1].xyww
423 */
424static void
425i915_fpc_optimize_mov_after_mov(union i915_full_token *current,
426                                union i915_full_token *next)
427{
428   struct i915_full_src_register *src_reg1, *src_reg2;
429   struct i915_full_dst_register *dst_reg1, *dst_reg2;
430   unsigned swizzle_x, swizzle_y, swizzle_z, swizzle_w;
431
432   if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
433       next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
434       current->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
435       next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
436       current->FullInstruction.Instruction.Saturate ==
437          next->FullInstruction.Instruction.Saturate &&
438       same_dst_reg(&next->FullInstruction.Dst[0],
439                    &current->FullInstruction.Dst[0]) &&
440       same_src_reg(&next->FullInstruction.Src[0],
441                    &current->FullInstruction.Src[0]) &&
442       !same_src_dst_reg(&current->FullInstruction.Src[0],
443                         &current->FullInstruction.Dst[0])) {
444      src_reg1 = &current->FullInstruction.Src[0];
445      dst_reg1 = &current->FullInstruction.Dst[0];
446      src_reg2 = &next->FullInstruction.Src[0];
447      dst_reg2 = &next->FullInstruction.Dst[0];
448
449      /* Start with swizzles from the first mov */
450      swizzle_x = src_reg1->Register.SwizzleX;
451      swizzle_y = src_reg1->Register.SwizzleY;
452      swizzle_z = src_reg1->Register.SwizzleZ;
453      swizzle_w = src_reg1->Register.SwizzleW;
454
455      /* Pile the second mov on top */
456      if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_X)
457         swizzle_x = src_reg2->Register.SwizzleX;
458      if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Y)
459         swizzle_y = src_reg2->Register.SwizzleY;
460      if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_Z)
461         swizzle_z = src_reg2->Register.SwizzleZ;
462      if (dst_reg2->Register.WriteMask & TGSI_WRITEMASK_W)
463         swizzle_w = src_reg2->Register.SwizzleW;
464
465      dst_reg2->Register.WriteMask |= dst_reg1->Register.WriteMask;
466      src_reg2->Register.SwizzleX = swizzle_x;
467      src_reg2->Register.SwizzleY = swizzle_y;
468      src_reg2->Register.SwizzleZ = swizzle_z;
469      src_reg2->Register.SwizzleW = swizzle_w;
470
471      current->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
472
473      return;
474   }
475}
476
477/*
478 * Optimize away things like:
479 *    MUL OUT[0].xyz, TEMP[1], TEMP[2]
480 *    MOV OUT[0].w, TEMP[2]
481 * into:
482 *    MUL OUT[0].xyzw, TEMP[1].xyz1, TEMP[2]
483 * This is useful for optimizing texenv.
484 */
485static void
486i915_fpc_optimize_mov_after_alu(union i915_full_token *current,
487                                union i915_full_token *next)
488{
489   if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
490       next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
491       op_commutes(current->FullInstruction.Instruction.Opcode) &&
492       current->FullInstruction.Instruction.Saturate ==
493          next->FullInstruction.Instruction.Saturate &&
494       next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
495       same_dst_reg(&next->FullInstruction.Dst[0],
496                    &current->FullInstruction.Dst[0]) &&
497       same_src_reg(&next->FullInstruction.Src[0],
498                    &current->FullInstruction.Src[1]) &&
499       !same_src_dst_reg(&next->FullInstruction.Src[0],
500                         &current->FullInstruction.Dst[0]) &&
501       is_unswizzled(&current->FullInstruction.Src[0],
502                     current->FullInstruction.Dst[0].Register.WriteMask) &&
503       is_unswizzled(&current->FullInstruction.Src[1],
504                     current->FullInstruction.Dst[0].Register.WriteMask) &&
505       is_unswizzled(&next->FullInstruction.Src[0],
506                     next->FullInstruction.Dst[0].Register.WriteMask)) {
507      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
508
509      set_neutral_element_swizzle(&current->FullInstruction.Src[1], 0, 0);
510      set_neutral_element_swizzle(
511         &current->FullInstruction.Src[0],
512         next->FullInstruction.Dst[0].Register.WriteMask,
513         op_neutral_element(current->FullInstruction.Instruction.Opcode));
514
515      current->FullInstruction.Dst[0].Register.WriteMask =
516         current->FullInstruction.Dst[0].Register.WriteMask |
517         next->FullInstruction.Dst[0].Register.WriteMask;
518      return;
519   }
520
521   if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
522       next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
523       op_commutes(current->FullInstruction.Instruction.Opcode) &&
524       current->FullInstruction.Instruction.Saturate ==
525          next->FullInstruction.Instruction.Saturate &&
526       next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
527       same_dst_reg(&next->FullInstruction.Dst[0],
528                    &current->FullInstruction.Dst[0]) &&
529       same_src_reg(&next->FullInstruction.Src[0],
530                    &current->FullInstruction.Src[0]) &&
531       !same_src_dst_reg(&next->FullInstruction.Src[0],
532                         &current->FullInstruction.Dst[0]) &&
533       is_unswizzled(&current->FullInstruction.Src[0],
534                     current->FullInstruction.Dst[0].Register.WriteMask) &&
535       is_unswizzled(&current->FullInstruction.Src[1],
536                     current->FullInstruction.Dst[0].Register.WriteMask) &&
537       is_unswizzled(&next->FullInstruction.Src[0],
538                     next->FullInstruction.Dst[0].Register.WriteMask)) {
539      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
540
541      set_neutral_element_swizzle(&current->FullInstruction.Src[0], 0, 0);
542      set_neutral_element_swizzle(
543         &current->FullInstruction.Src[1],
544         next->FullInstruction.Dst[0].Register.WriteMask,
545         op_neutral_element(current->FullInstruction.Instruction.Opcode));
546
547      current->FullInstruction.Dst[0].Register.WriteMask =
548         current->FullInstruction.Dst[0].Register.WriteMask |
549         next->FullInstruction.Dst[0].Register.WriteMask;
550      return;
551   }
552}
553
554/*
555 * Optimize away things like:
556 *    MOV TEMP[0].xyz TEMP[0].xyzx
557 * into:
558 *    NOP
559 */
560static bool
561i915_fpc_useless_mov(union tgsi_full_token *tgsi_current)
562{
563   union i915_full_token current;
564   copy_token(&current, tgsi_current);
565   if (current.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
566       current.FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
567       op_has_dst(current.FullInstruction.Instruction.Opcode) &&
568       !current.FullInstruction.Instruction.Saturate &&
569       current.FullInstruction.Src[0].Register.Absolute == 0 &&
570       current.FullInstruction.Src[0].Register.Negate == 0 &&
571       is_unswizzled(&current.FullInstruction.Src[0],
572                     current.FullInstruction.Dst[0].Register.WriteMask) &&
573       same_src_dst_reg(&current.FullInstruction.Src[0],
574                        &current.FullInstruction.Dst[0])) {
575      return true;
576   }
577   return false;
578}
579
580/*
581 * Optimize away things like:
582 *    *** TEMP[0], TEMP[1], TEMP[2]
583 *    MOV OUT[0] TEMP[0]
584 * into:
585 *    *** OUT[0], TEMP[1], TEMP[2]
586 */
587static void
588i915_fpc_optimize_useless_mov_after_inst(struct i915_optimize_context *ctx,
589                                         struct i915_token_list *tokens,
590                                         int index)
591{
592   union i915_full_token *current = &tokens->Tokens[index - 1];
593   union i915_full_token *next = &tokens->Tokens[index];
594
595   // &out_tokens->Tokens[i-1], &out_tokens->Tokens[i]);
596   if (current->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
597       next->Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION &&
598       next->FullInstruction.Instruction.Opcode == TGSI_OPCODE_MOV &&
599       op_has_dst(current->FullInstruction.Instruction.Opcode) &&
600       !next->FullInstruction.Instruction.Saturate &&
601       next->FullInstruction.Src[0].Register.Absolute == 0 &&
602       next->FullInstruction.Src[0].Register.Negate == 0 &&
603       unused_from(ctx, &current->FullInstruction.Dst[0], index) &&
604       current->FullInstruction.Dst[0].Register.WriteMask ==
605          TGSI_WRITEMASK_XYZW &&
606       is_unswizzled(&next->FullInstruction.Src[0],
607                     next->FullInstruction.Dst[0].Register.WriteMask) &&
608       current->FullInstruction.Dst[0].Register.WriteMask ==
609          next->FullInstruction.Dst[0].Register.WriteMask &&
610       same_src_dst_reg(&next->FullInstruction.Src[0],
611                        &current->FullInstruction.Dst[0])) {
612      next->FullInstruction.Instruction.Opcode = TGSI_OPCODE_NOP;
613
614      current->FullInstruction.Dst[0] = next->FullInstruction.Dst[0];
615      return;
616   }
617}
618
619struct i915_token_list *
620i915_optimize(const struct tgsi_token *tokens)
621{
622   struct i915_token_list *out_tokens = MALLOC(sizeof(struct i915_token_list));
623   struct tgsi_parse_context parse;
624   struct i915_optimize_context *ctx;
625   int i = 0;
626
627   ctx = malloc(sizeof(*ctx));
628
629   out_tokens->NumTokens = 0;
630
631   /* Count the tokens */
632   tgsi_parse_init(&parse, tokens);
633   while (!tgsi_parse_end_of_tokens(&parse)) {
634      tgsi_parse_token(&parse);
635      out_tokens->NumTokens++;
636   }
637   tgsi_parse_free(&parse);
638
639   /* Allocate our tokens */
640   out_tokens->Tokens =
641      MALLOC(sizeof(union i915_full_token) * out_tokens->NumTokens);
642
643   tgsi_parse_init(&parse, tokens);
644   while (!tgsi_parse_end_of_tokens(&parse)) {
645      tgsi_parse_token(&parse);
646
647      if (i915_fpc_useless_mov(&parse.FullToken)) {
648         out_tokens->NumTokens--;
649         continue;
650      }
651
652      copy_token(&out_tokens->Tokens[i], &parse.FullToken);
653
654      i++;
655   }
656   tgsi_parse_free(&parse);
657
658   liveness_analysis(ctx, out_tokens);
659
660   i = 1;
661   while (i < out_tokens->NumTokens) {
662      i915_fpc_optimize_useless_mov_after_inst(ctx, out_tokens, i);
663      i915_fpc_optimize_mov_after_alu(&out_tokens->Tokens[i - 1],
664                                      &out_tokens->Tokens[i]);
665      i915_fpc_optimize_mov_after_mov(&out_tokens->Tokens[i - 1],
666                                      &out_tokens->Tokens[i]);
667      i915_fpc_optimize_mov_before_tex(ctx, out_tokens, i);
668      i++;
669   }
670
671   free(ctx);
672
673   return out_tokens;
674}
675
676void
677i915_optimize_free(struct i915_token_list *tokens)
678{
679   free(tokens->Tokens);
680   free(tokens);
681}
682