1/*
2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Jonathan Marek <jonathan@marek.ca>
25 */
26
27#include "ir2_private.h"
28
29static bool
30is_mov(struct ir2_instr *instr)
31{
32   return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
33          instr->src_count == 1;
34}
35
36static void
37src_combine(struct ir2_src *src, struct ir2_src b)
38{
39   src->num = b.num;
40   src->type = b.type;
41   src->swizzle = swiz_merge(b.swizzle, src->swizzle);
42   if (!src->abs) /* if we have abs we don't care about previous negate */
43      src->negate ^= b.negate;
44   src->abs |= b.abs;
45}
46
47/* cp_src: replace src regs when they refer to a mov instruction
48 * example:
49 *	ALU:      MAXv    R7 = C7, C7
50 *	ALU:      MULADDv R7 = R7, R10, R0.xxxx
51 * becomes:
52 *	ALU:      MULADDv R7 = C7, R10, R0.xxxx
53 */
54void
55cp_src(struct ir2_context *ctx)
56{
57   struct ir2_instr *p;
58
59   ir2_foreach_instr (instr, ctx) {
60      ir2_foreach_src (src, instr) {
61         /* loop to replace recursively */
62         do {
63            if (src->type != IR2_SRC_SSA)
64               break;
65
66            p = &ctx->instr[src->num];
67            /* don't work across blocks to avoid possible issues */
68            if (p->block_idx != instr->block_idx)
69               break;
70
71            if (!is_mov(p))
72               break;
73
74            if (p->alu.saturate)
75               break;
76
77            /* cant apply abs to const src, const src only for alu */
78            if (p->src[0].type == IR2_SRC_CONST &&
79                (src->abs || instr->type != IR2_ALU))
80               break;
81
82            src_combine(src, p->src[0]);
83         } while (1);
84      }
85   }
86}
87
88/* cp_export: replace mov to export when possible
89 * in the cp_src pass we bypass any mov instructions related
90 * to the src registers, but for exports for need something different
91 * example:
92 *	ALU:      MAXv    R3.x___ = C9.x???, C9.x???
93 *	ALU:      MAXv    R3._y__ = R0.?x??, C8.?x??
94 *	ALU:      MAXv    export0 = R3.yyyx, R3.yyyx
95 * becomes:
96 *	ALU:      MAXv    export0.___w = C9.???x, C9.???x
97 *	ALU:      MAXv    export0.xyz_ = R0.xxx?, C8.xxx?
98 *
99 */
100void
101cp_export(struct ir2_context *ctx)
102{
103   struct ir2_instr *c[4], *ins[4];
104   struct ir2_src *src;
105   struct ir2_reg *reg;
106   unsigned ncomp;
107
108   ir2_foreach_instr (instr, ctx) {
109      if (!is_export(instr)) /* TODO */
110         continue;
111
112      if (!is_mov(instr))
113         continue;
114
115      src = &instr->src[0];
116
117      if (src->negate || src->abs) /* TODO handle these cases */
118         continue;
119
120      if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
121         continue;
122
123      reg = get_reg_src(ctx, src);
124      ncomp = dst_ncomp(instr);
125
126      unsigned reswiz[4] = {};
127      unsigned num_instr = 0;
128
129      /* fill array c with pointers to instrs that write each component */
130      if (src->type == IR2_SRC_SSA) {
131         struct ir2_instr *instr = &ctx->instr[src->num];
132
133         if (instr->type != IR2_ALU)
134            continue;
135
136         for (int i = 0; i < ncomp; i++)
137            c[i] = instr;
138
139         ins[num_instr++] = instr;
140         reswiz[0] = src->swizzle;
141      } else {
142         bool ok = true;
143         unsigned write_mask = 0;
144
145         ir2_foreach_instr (instr, ctx) {
146            if (instr->is_ssa || instr->reg != reg)
147               continue;
148
149            /* set by non-ALU */
150            if (instr->type != IR2_ALU) {
151               ok = false;
152               break;
153            }
154
155            /* component written more than once */
156            if (write_mask & instr->alu.write_mask) {
157               ok = false;
158               break;
159            }
160
161            write_mask |= instr->alu.write_mask;
162
163            /* src pointers for components */
164            for (int i = 0, j = 0; i < 4; i++) {
165               unsigned k = swiz_get(src->swizzle, i);
166               if (instr->alu.write_mask & 1 << k) {
167                  c[i] = instr;
168
169                  /* reswiz = compressed src->swizzle */
170                  unsigned x = 0;
171                  for (int i = 0; i < k; i++)
172                     x += !!(instr->alu.write_mask & 1 << i);
173
174                  assert(src->swizzle || x == j);
175                  reswiz[num_instr] |= swiz_set(x, j++);
176               }
177            }
178            ins[num_instr++] = instr;
179         }
180         if (!ok)
181            continue;
182      }
183
184      bool redirect = true;
185
186      /* must all be in same block */
187      for (int i = 0; i < ncomp; i++)
188         redirect &= (c[i]->block_idx == instr->block_idx);
189
190      /* no other instr using the value */
191      ir2_foreach_instr (p, ctx) {
192         if (p == instr)
193            continue;
194         ir2_foreach_src (src, p)
195            redirect &= reg != get_reg_src(ctx, src);
196      }
197
198      if (!redirect)
199         continue;
200
201      /* redirect the instructions writing to the register */
202      for (int i = 0; i < num_instr; i++) {
203         struct ir2_instr *p = ins[i];
204
205         p->alu.export = instr->alu.export;
206         p->alu.write_mask = 0;
207         p->is_ssa = true;
208         p->ssa.ncomp = 0;
209         memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
210         p->alu.saturate |= instr->alu.saturate;
211
212         switch (p->alu.vector_opc) {
213         case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
214         case DOT2ADDv:
215         case DOT3v:
216         case DOT4v:
217         case CUBEv:
218            continue;
219         default:
220            break;
221         }
222         ir2_foreach_src (s, p)
223            swiz_merge_p(&s->swizzle, reswiz[i]);
224      }
225
226      for (int i = 0; i < ncomp; i++) {
227         c[i]->alu.write_mask |= (1 << i);
228         c[i]->ssa.ncomp++;
229      }
230      instr->type = IR2_NONE;
231      instr->need_emit = false;
232   }
233}
234