1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2011 Christoph Bumiller
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
12bf215546Sopenharmony_ci * all copies or substantial portions of the Software.
13bf215546Sopenharmony_ci *
14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
21bf215546Sopenharmony_ci */
22bf215546Sopenharmony_ci
23bf215546Sopenharmony_ci#include "nv50_ir.h"
24bf215546Sopenharmony_ci#include "nv50_ir_target.h"
25bf215546Sopenharmony_ci#include "nv50_ir_build_util.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ciextern "C" {
28bf215546Sopenharmony_ci#include "util/u_math.h"
29bf215546Sopenharmony_ci}
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_cinamespace nv50_ir {
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_cibool
34bf215546Sopenharmony_ciInstruction::isNop() const
35bf215546Sopenharmony_ci{
36bf215546Sopenharmony_ci   if (op == OP_PHI || op == OP_SPLIT || op == OP_MERGE || op == OP_CONSTRAINT)
37bf215546Sopenharmony_ci      return true;
38bf215546Sopenharmony_ci   if (terminator || join) // XXX: should terminator imply flow ?
39bf215546Sopenharmony_ci      return false;
40bf215546Sopenharmony_ci   if (op == OP_ATOM)
41bf215546Sopenharmony_ci      return false;
42bf215546Sopenharmony_ci   if (!fixed && op == OP_NOP)
43bf215546Sopenharmony_ci      return true;
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci   if (defExists(0) && def(0).rep()->reg.data.id < 0) {
46bf215546Sopenharmony_ci      for (int d = 1; defExists(d); ++d)
47bf215546Sopenharmony_ci         if (def(d).rep()->reg.data.id >= 0)
48bf215546Sopenharmony_ci            WARN("part of vector result is unused !\n");
49bf215546Sopenharmony_ci      return true;
50bf215546Sopenharmony_ci   }
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci   if (op == OP_MOV || op == OP_UNION) {
53bf215546Sopenharmony_ci      if (!getDef(0)->equals(getSrc(0)))
54bf215546Sopenharmony_ci         return false;
55bf215546Sopenharmony_ci      if (op == OP_UNION)
56bf215546Sopenharmony_ci         if (!getDef(0)->equals(getSrc(1)))
57bf215546Sopenharmony_ci            return false;
58bf215546Sopenharmony_ci      return true;
59bf215546Sopenharmony_ci   }
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_ci   return false;
62bf215546Sopenharmony_ci}
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_cibool Instruction::isDead() const
65bf215546Sopenharmony_ci{
66bf215546Sopenharmony_ci   if (op == OP_STORE ||
67bf215546Sopenharmony_ci       op == OP_EXPORT ||
68bf215546Sopenharmony_ci       op == OP_ATOM ||
69bf215546Sopenharmony_ci       op == OP_SUSTB || op == OP_SUSTP || op == OP_SUREDP || op == OP_SUREDB ||
70bf215546Sopenharmony_ci       op == OP_WRSV)
71bf215546Sopenharmony_ci      return false;
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci   for (int d = 0; defExists(d); ++d)
74bf215546Sopenharmony_ci      if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0)
75bf215546Sopenharmony_ci         return false;
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   if (terminator || asFlow())
78bf215546Sopenharmony_ci      return false;
79bf215546Sopenharmony_ci   if (fixed)
80bf215546Sopenharmony_ci      return false;
81bf215546Sopenharmony_ci
82bf215546Sopenharmony_ci   return true;
83bf215546Sopenharmony_ci};
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci// =============================================================================
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ciclass CopyPropagation : public Pass
88bf215546Sopenharmony_ci{
89bf215546Sopenharmony_ciprivate:
90bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
91bf215546Sopenharmony_ci};
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci// Propagate all MOVs forward to make subsequent optimization easier, except if
94bf215546Sopenharmony_ci// the sources stem from a phi, in which case we don't want to mess up potential
95bf215546Sopenharmony_ci// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def.
96bf215546Sopenharmony_cibool
97bf215546Sopenharmony_ciCopyPropagation::visit(BasicBlock *bb)
98bf215546Sopenharmony_ci{
99bf215546Sopenharmony_ci   Instruction *mov, *si, *next;
100bf215546Sopenharmony_ci
101bf215546Sopenharmony_ci   for (mov = bb->getEntry(); mov; mov = next) {
102bf215546Sopenharmony_ci      next = mov->next;
103bf215546Sopenharmony_ci      if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue())
104bf215546Sopenharmony_ci         continue;
105bf215546Sopenharmony_ci      if (mov->getPredicate())
106bf215546Sopenharmony_ci         continue;
107bf215546Sopenharmony_ci      if (mov->def(0).getFile() != mov->src(0).getFile())
108bf215546Sopenharmony_ci         continue;
109bf215546Sopenharmony_ci      si = mov->getSrc(0)->getInsn();
110bf215546Sopenharmony_ci      if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) {
111bf215546Sopenharmony_ci         // propagate
112bf215546Sopenharmony_ci         mov->def(0).replace(mov->getSrc(0), false);
113bf215546Sopenharmony_ci         delete_Instruction(prog, mov);
114bf215546Sopenharmony_ci      }
115bf215546Sopenharmony_ci   }
116bf215546Sopenharmony_ci   return true;
117bf215546Sopenharmony_ci}
118bf215546Sopenharmony_ci
119bf215546Sopenharmony_ci// =============================================================================
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ciclass MergeSplits : public Pass
122bf215546Sopenharmony_ci{
123bf215546Sopenharmony_ciprivate:
124bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
125bf215546Sopenharmony_ci};
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_ci// For SPLIT / MERGE pairs that operate on the same registers, replace the
128bf215546Sopenharmony_ci// post-merge def with the SPLIT's source.
129bf215546Sopenharmony_cibool
130bf215546Sopenharmony_ciMergeSplits::visit(BasicBlock *bb)
131bf215546Sopenharmony_ci{
132bf215546Sopenharmony_ci   Instruction *i, *next, *si;
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   for (i = bb->getEntry(); i; i = next) {
135bf215546Sopenharmony_ci      next = i->next;
136bf215546Sopenharmony_ci      if (i->op != OP_MERGE || typeSizeof(i->dType) != 8)
137bf215546Sopenharmony_ci         continue;
138bf215546Sopenharmony_ci      si = i->getSrc(0)->getInsn();
139bf215546Sopenharmony_ci      if (si->op != OP_SPLIT || si != i->getSrc(1)->getInsn())
140bf215546Sopenharmony_ci         continue;
141bf215546Sopenharmony_ci      i->def(0).replace(si->getSrc(0), false);
142bf215546Sopenharmony_ci      delete_Instruction(prog, i);
143bf215546Sopenharmony_ci   }
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci   return true;
146bf215546Sopenharmony_ci}
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci// =============================================================================
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ciclass LoadPropagation : public Pass
151bf215546Sopenharmony_ci{
152bf215546Sopenharmony_ciprivate:
153bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
154bf215546Sopenharmony_ci
155bf215546Sopenharmony_ci   void checkSwapSrc01(Instruction *);
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   bool isCSpaceLoad(Instruction *);
158bf215546Sopenharmony_ci   bool isImmdLoad(Instruction *);
159bf215546Sopenharmony_ci   bool isAttribOrSharedLoad(Instruction *);
160bf215546Sopenharmony_ci};
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_cibool
163bf215546Sopenharmony_ciLoadPropagation::isCSpaceLoad(Instruction *ld)
164bf215546Sopenharmony_ci{
165bf215546Sopenharmony_ci   return ld && ld->op == OP_LOAD && ld->src(0).getFile() == FILE_MEMORY_CONST;
166bf215546Sopenharmony_ci}
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_cibool
169bf215546Sopenharmony_ciLoadPropagation::isImmdLoad(Instruction *ld)
170bf215546Sopenharmony_ci{
171bf215546Sopenharmony_ci   if (!ld || (ld->op != OP_MOV) ||
172bf215546Sopenharmony_ci       ((typeSizeof(ld->dType) != 4) && (typeSizeof(ld->dType) != 8)))
173bf215546Sopenharmony_ci      return false;
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   // A 0 can be replaced with a register, so it doesn't count as an immediate.
176bf215546Sopenharmony_ci   ImmediateValue val;
177bf215546Sopenharmony_ci   return ld->src(0).getImmediate(val) && !val.isInteger(0);
178bf215546Sopenharmony_ci}
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_cibool
181bf215546Sopenharmony_ciLoadPropagation::isAttribOrSharedLoad(Instruction *ld)
182bf215546Sopenharmony_ci{
183bf215546Sopenharmony_ci   return ld &&
184bf215546Sopenharmony_ci      (ld->op == OP_VFETCH ||
185bf215546Sopenharmony_ci       (ld->op == OP_LOAD &&
186bf215546Sopenharmony_ci        (ld->src(0).getFile() == FILE_SHADER_INPUT ||
187bf215546Sopenharmony_ci         ld->src(0).getFile() == FILE_MEMORY_SHARED)));
188bf215546Sopenharmony_ci}
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_civoid
191bf215546Sopenharmony_ciLoadPropagation::checkSwapSrc01(Instruction *insn)
192bf215546Sopenharmony_ci{
193bf215546Sopenharmony_ci   const Target *targ = prog->getTarget();
194bf215546Sopenharmony_ci   if (!targ->getOpInfo(insn).commutative) {
195bf215546Sopenharmony_ci      if (insn->op != OP_SET && insn->op != OP_SLCT &&
196bf215546Sopenharmony_ci          insn->op != OP_SUB && insn->op != OP_XMAD)
197bf215546Sopenharmony_ci         return;
198bf215546Sopenharmony_ci      // XMAD is only commutative if both the CBCC and MRG flags are not set.
199bf215546Sopenharmony_ci      if (insn->op == OP_XMAD &&
200bf215546Sopenharmony_ci          (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == NV50_IR_SUBOP_XMAD_CBCC)
201bf215546Sopenharmony_ci         return;
202bf215546Sopenharmony_ci      if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG))
203bf215546Sopenharmony_ci         return;
204bf215546Sopenharmony_ci   }
205bf215546Sopenharmony_ci   if (insn->src(1).getFile() != FILE_GPR)
206bf215546Sopenharmony_ci      return;
207bf215546Sopenharmony_ci   // This is the special OP_SET used for alphatesting, we can't reverse its
208bf215546Sopenharmony_ci   // arguments as that will confuse the fixup code.
209bf215546Sopenharmony_ci   if (insn->op == OP_SET && insn->subOp)
210bf215546Sopenharmony_ci      return;
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci   Instruction *i0 = insn->getSrc(0)->getInsn();
213bf215546Sopenharmony_ci   Instruction *i1 = insn->getSrc(1)->getInsn();
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci   // Swap sources to inline the less frequently used source. That way,
216bf215546Sopenharmony_ci   // optimistically, it will eventually be able to remove the instruction.
217bf215546Sopenharmony_ci   int i0refs = insn->getSrc(0)->refCount();
218bf215546Sopenharmony_ci   int i1refs = insn->getSrc(1)->refCount();
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci   if ((isCSpaceLoad(i0) || isImmdLoad(i0)) && targ->insnCanLoad(insn, 1, i0)) {
221bf215546Sopenharmony_ci      if ((!isImmdLoad(i1) && !isCSpaceLoad(i1)) ||
222bf215546Sopenharmony_ci          !targ->insnCanLoad(insn, 1, i1) ||
223bf215546Sopenharmony_ci          i0refs < i1refs)
224bf215546Sopenharmony_ci         insn->swapSources(0, 1);
225bf215546Sopenharmony_ci      else
226bf215546Sopenharmony_ci         return;
227bf215546Sopenharmony_ci   } else
228bf215546Sopenharmony_ci   if (isAttribOrSharedLoad(i1)) {
229bf215546Sopenharmony_ci      if (!isAttribOrSharedLoad(i0))
230bf215546Sopenharmony_ci         insn->swapSources(0, 1);
231bf215546Sopenharmony_ci      else
232bf215546Sopenharmony_ci         return;
233bf215546Sopenharmony_ci   } else {
234bf215546Sopenharmony_ci      return;
235bf215546Sopenharmony_ci   }
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci   if (insn->op == OP_SET || insn->op == OP_SET_AND ||
238bf215546Sopenharmony_ci       insn->op == OP_SET_OR || insn->op == OP_SET_XOR)
239bf215546Sopenharmony_ci      insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond);
240bf215546Sopenharmony_ci   else
241bf215546Sopenharmony_ci   if (insn->op == OP_SLCT)
242bf215546Sopenharmony_ci      insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond);
243bf215546Sopenharmony_ci   else
244bf215546Sopenharmony_ci   if (insn->op == OP_SUB) {
245bf215546Sopenharmony_ci      insn->src(0).mod = insn->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
246bf215546Sopenharmony_ci      insn->src(1).mod = insn->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
247bf215546Sopenharmony_ci   } else
248bf215546Sopenharmony_ci   if (insn->op == OP_XMAD) {
249bf215546Sopenharmony_ci      // swap h1 flags
250bf215546Sopenharmony_ci      uint16_t h1 = (insn->subOp >> 1 & NV50_IR_SUBOP_XMAD_H1(0)) |
251bf215546Sopenharmony_ci                    (insn->subOp << 1 & NV50_IR_SUBOP_XMAD_H1(1));
252bf215546Sopenharmony_ci      insn->subOp = (insn->subOp & ~NV50_IR_SUBOP_XMAD_H1_MASK) | h1;
253bf215546Sopenharmony_ci   }
254bf215546Sopenharmony_ci}
255bf215546Sopenharmony_ci
256bf215546Sopenharmony_cibool
257bf215546Sopenharmony_ciLoadPropagation::visit(BasicBlock *bb)
258bf215546Sopenharmony_ci{
259bf215546Sopenharmony_ci   const Target *targ = prog->getTarget();
260bf215546Sopenharmony_ci   Instruction *next;
261bf215546Sopenharmony_ci
262bf215546Sopenharmony_ci   for (Instruction *i = bb->getEntry(); i; i = next) {
263bf215546Sopenharmony_ci      next = i->next;
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci      if (i->op == OP_CALL) // calls have args as sources, they must be in regs
266bf215546Sopenharmony_ci         continue;
267bf215546Sopenharmony_ci
268bf215546Sopenharmony_ci      if (i->op == OP_PFETCH) // pfetch expects arg1 to be a reg
269bf215546Sopenharmony_ci         continue;
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci      if (i->srcExists(1))
272bf215546Sopenharmony_ci         checkSwapSrc01(i);
273bf215546Sopenharmony_ci
274bf215546Sopenharmony_ci      for (int s = 0; i->srcExists(s); ++s) {
275bf215546Sopenharmony_ci         Instruction *ld = i->getSrc(s)->getInsn();
276bf215546Sopenharmony_ci
277bf215546Sopenharmony_ci         if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV))
278bf215546Sopenharmony_ci            continue;
279bf215546Sopenharmony_ci         if (ld->op == OP_LOAD && ld->subOp == NV50_IR_SUBOP_LOAD_LOCKED)
280bf215546Sopenharmony_ci            continue;
281bf215546Sopenharmony_ci         if (!targ->insnCanLoad(i, s, ld))
282bf215546Sopenharmony_ci            continue;
283bf215546Sopenharmony_ci
284bf215546Sopenharmony_ci         // propagate !
285bf215546Sopenharmony_ci         i->setSrc(s, ld->getSrc(0));
286bf215546Sopenharmony_ci         if (ld->src(0).isIndirect(0))
287bf215546Sopenharmony_ci            i->setIndirect(s, 0, ld->getIndirect(0, 0));
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci         if (ld->getDef(0)->refCount() == 0)
290bf215546Sopenharmony_ci            delete_Instruction(prog, ld);
291bf215546Sopenharmony_ci      }
292bf215546Sopenharmony_ci   }
293bf215546Sopenharmony_ci   return true;
294bf215546Sopenharmony_ci}
295bf215546Sopenharmony_ci
296bf215546Sopenharmony_ci// =============================================================================
297bf215546Sopenharmony_ci
298bf215546Sopenharmony_ciclass IndirectPropagation : public Pass
299bf215546Sopenharmony_ci{
300bf215546Sopenharmony_ciprivate:
301bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
302bf215546Sopenharmony_ci
303bf215546Sopenharmony_ci   BuildUtil bld;
304bf215546Sopenharmony_ci};
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_cibool
307bf215546Sopenharmony_ciIndirectPropagation::visit(BasicBlock *bb)
308bf215546Sopenharmony_ci{
309bf215546Sopenharmony_ci   const Target *targ = prog->getTarget();
310bf215546Sopenharmony_ci   Instruction *next;
311bf215546Sopenharmony_ci
312bf215546Sopenharmony_ci   for (Instruction *i = bb->getEntry(); i; i = next) {
313bf215546Sopenharmony_ci      next = i->next;
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci      bld.setPosition(i, false);
316bf215546Sopenharmony_ci
317bf215546Sopenharmony_ci      for (int s = 0; i->srcExists(s); ++s) {
318bf215546Sopenharmony_ci         Instruction *insn;
319bf215546Sopenharmony_ci         ImmediateValue imm;
320bf215546Sopenharmony_ci         if (!i->src(s).isIndirect(0))
321bf215546Sopenharmony_ci            continue;
322bf215546Sopenharmony_ci         insn = i->getIndirect(s, 0)->getInsn();
323bf215546Sopenharmony_ci         if (!insn)
324bf215546Sopenharmony_ci            continue;
325bf215546Sopenharmony_ci         if (insn->op == OP_ADD && !isFloatType(insn->dType)) {
326bf215546Sopenharmony_ci            if (insn->src(0).getFile() != targ->nativeFile(FILE_ADDRESS) ||
327bf215546Sopenharmony_ci                !insn->src(1).getImmediate(imm) ||
328bf215546Sopenharmony_ci                !targ->insnCanLoadOffset(i, s, imm.reg.data.s32))
329bf215546Sopenharmony_ci               continue;
330bf215546Sopenharmony_ci            i->setIndirect(s, 0, insn->getSrc(0));
331bf215546Sopenharmony_ci            i->setSrc(s, cloneShallow(func, i->getSrc(s)));
332bf215546Sopenharmony_ci            i->src(s).get()->reg.data.offset += imm.reg.data.u32;
333bf215546Sopenharmony_ci         } else if (insn->op == OP_SUB && !isFloatType(insn->dType)) {
334bf215546Sopenharmony_ci            if (insn->src(0).getFile() != targ->nativeFile(FILE_ADDRESS) ||
335bf215546Sopenharmony_ci                !insn->src(1).getImmediate(imm) ||
336bf215546Sopenharmony_ci                !targ->insnCanLoadOffset(i, s, -imm.reg.data.s32))
337bf215546Sopenharmony_ci               continue;
338bf215546Sopenharmony_ci            i->setIndirect(s, 0, insn->getSrc(0));
339bf215546Sopenharmony_ci            i->setSrc(s, cloneShallow(func, i->getSrc(s)));
340bf215546Sopenharmony_ci            i->src(s).get()->reg.data.offset -= imm.reg.data.u32;
341bf215546Sopenharmony_ci         } else if (insn->op == OP_MOV) {
342bf215546Sopenharmony_ci            if (!insn->src(0).getImmediate(imm) ||
343bf215546Sopenharmony_ci                !targ->insnCanLoadOffset(i, s, imm.reg.data.s32))
344bf215546Sopenharmony_ci               continue;
345bf215546Sopenharmony_ci            i->setIndirect(s, 0, NULL);
346bf215546Sopenharmony_ci            i->setSrc(s, cloneShallow(func, i->getSrc(s)));
347bf215546Sopenharmony_ci            i->src(s).get()->reg.data.offset += imm.reg.data.u32;
348bf215546Sopenharmony_ci         } else if (insn->op == OP_SHLADD) {
349bf215546Sopenharmony_ci            if (!insn->src(2).getImmediate(imm) ||
350bf215546Sopenharmony_ci                !targ->insnCanLoadOffset(i, s, imm.reg.data.s32))
351bf215546Sopenharmony_ci               continue;
352bf215546Sopenharmony_ci            i->setIndirect(s, 0, bld.mkOp2v(
353bf215546Sopenharmony_ci               OP_SHL, TYPE_U32, bld.getSSA(), insn->getSrc(0), insn->getSrc(1)));
354bf215546Sopenharmony_ci            i->setSrc(s, cloneShallow(func, i->getSrc(s)));
355bf215546Sopenharmony_ci            i->src(s).get()->reg.data.offset += imm.reg.data.u32;
356bf215546Sopenharmony_ci         }
357bf215546Sopenharmony_ci      }
358bf215546Sopenharmony_ci   }
359bf215546Sopenharmony_ci   return true;
360bf215546Sopenharmony_ci}
361bf215546Sopenharmony_ci
362bf215546Sopenharmony_ci// =============================================================================
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ci// Evaluate constant expressions.
365bf215546Sopenharmony_ciclass ConstantFolding : public Pass
366bf215546Sopenharmony_ci{
367bf215546Sopenharmony_cipublic:
368bf215546Sopenharmony_ci   ConstantFolding() : foldCount(0) {}
369bf215546Sopenharmony_ci   bool foldAll(Program *);
370bf215546Sopenharmony_ci
371bf215546Sopenharmony_ciprivate:
372bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
373bf215546Sopenharmony_ci
374bf215546Sopenharmony_ci   void expr(Instruction *, ImmediateValue&, ImmediateValue&);
375bf215546Sopenharmony_ci   void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&);
376bf215546Sopenharmony_ci   /* true if i was deleted */
377bf215546Sopenharmony_ci   bool opnd(Instruction *i, ImmediateValue&, int s);
378bf215546Sopenharmony_ci   void opnd3(Instruction *, ImmediateValue&);
379bf215546Sopenharmony_ci
380bf215546Sopenharmony_ci   void unary(Instruction *, const ImmediateValue&);
381bf215546Sopenharmony_ci
382bf215546Sopenharmony_ci   void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&);
383bf215546Sopenharmony_ci
384bf215546Sopenharmony_ci   CmpInstruction *findOriginForTestWithZero(Value *);
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_ci   bool createMul(DataType ty, Value *def, Value *a, int64_t b, Value *c);
387bf215546Sopenharmony_ci
388bf215546Sopenharmony_ci   unsigned int foldCount;
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_ci   BuildUtil bld;
391bf215546Sopenharmony_ci};
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci// TODO: remember generated immediates and only revisit these
394bf215546Sopenharmony_cibool
395bf215546Sopenharmony_ciConstantFolding::foldAll(Program *prog)
396bf215546Sopenharmony_ci{
397bf215546Sopenharmony_ci   unsigned int iterCount = 0;
398bf215546Sopenharmony_ci   do {
399bf215546Sopenharmony_ci      foldCount = 0;
400bf215546Sopenharmony_ci      if (!run(prog))
401bf215546Sopenharmony_ci         return false;
402bf215546Sopenharmony_ci   } while (foldCount && ++iterCount < 2);
403bf215546Sopenharmony_ci   return true;
404bf215546Sopenharmony_ci}
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_cibool
407bf215546Sopenharmony_ciConstantFolding::visit(BasicBlock *bb)
408bf215546Sopenharmony_ci{
409bf215546Sopenharmony_ci   Instruction *i, *next;
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_ci   for (i = bb->getEntry(); i; i = next) {
412bf215546Sopenharmony_ci      next = i->next;
413bf215546Sopenharmony_ci      if (i->op == OP_MOV || i->op == OP_CALL)
414bf215546Sopenharmony_ci         continue;
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci      ImmediateValue src0, src1, src2;
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci      if (i->srcExists(2) &&
419bf215546Sopenharmony_ci          i->src(0).getImmediate(src0) &&
420bf215546Sopenharmony_ci          i->src(1).getImmediate(src1) &&
421bf215546Sopenharmony_ci          i->src(2).getImmediate(src2)) {
422bf215546Sopenharmony_ci         expr(i, src0, src1, src2);
423bf215546Sopenharmony_ci      } else
424bf215546Sopenharmony_ci      if (i->srcExists(1) &&
425bf215546Sopenharmony_ci          i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) {
426bf215546Sopenharmony_ci         expr(i, src0, src1);
427bf215546Sopenharmony_ci      } else
428bf215546Sopenharmony_ci      if (i->srcExists(0) && i->src(0).getImmediate(src0)) {
429bf215546Sopenharmony_ci         if (opnd(i, src0, 0))
430bf215546Sopenharmony_ci            continue;
431bf215546Sopenharmony_ci      } else
432bf215546Sopenharmony_ci      if (i->srcExists(1) && i->src(1).getImmediate(src1)) {
433bf215546Sopenharmony_ci         if (opnd(i, src1, 1))
434bf215546Sopenharmony_ci            continue;
435bf215546Sopenharmony_ci      }
436bf215546Sopenharmony_ci      if (i->srcExists(2) && i->src(2).getImmediate(src2))
437bf215546Sopenharmony_ci         opnd3(i, src2);
438bf215546Sopenharmony_ci   }
439bf215546Sopenharmony_ci   return true;
440bf215546Sopenharmony_ci}
441bf215546Sopenharmony_ci
442bf215546Sopenharmony_ciCmpInstruction *
443bf215546Sopenharmony_ciConstantFolding::findOriginForTestWithZero(Value *value)
444bf215546Sopenharmony_ci{
445bf215546Sopenharmony_ci   if (!value)
446bf215546Sopenharmony_ci      return NULL;
447bf215546Sopenharmony_ci   Instruction *insn = value->getInsn();
448bf215546Sopenharmony_ci   if (!insn)
449bf215546Sopenharmony_ci      return NULL;
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   if (insn->asCmp() && insn->op != OP_SLCT)
452bf215546Sopenharmony_ci      return insn->asCmp();
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci   /* Sometimes mov's will sneak in as a result of other folding. This gets
455bf215546Sopenharmony_ci    * cleaned up later.
456bf215546Sopenharmony_ci    */
457bf215546Sopenharmony_ci   if (insn->op == OP_MOV)
458bf215546Sopenharmony_ci      return findOriginForTestWithZero(insn->getSrc(0));
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci   /* Deal with AND 1.0 here since nv50 can't fold into boolean float */
461bf215546Sopenharmony_ci   if (insn->op == OP_AND) {
462bf215546Sopenharmony_ci      int s = 0;
463bf215546Sopenharmony_ci      ImmediateValue imm;
464bf215546Sopenharmony_ci      if (!insn->src(s).getImmediate(imm)) {
465bf215546Sopenharmony_ci         s = 1;
466bf215546Sopenharmony_ci         if (!insn->src(s).getImmediate(imm))
467bf215546Sopenharmony_ci            return NULL;
468bf215546Sopenharmony_ci      }
469bf215546Sopenharmony_ci      if (imm.reg.data.f32 != 1.0f)
470bf215546Sopenharmony_ci         return NULL;
471bf215546Sopenharmony_ci      /* TODO: Come up with a way to handle the condition being inverted */
472bf215546Sopenharmony_ci      if (insn->src(!s).mod != Modifier(0))
473bf215546Sopenharmony_ci         return NULL;
474bf215546Sopenharmony_ci      return findOriginForTestWithZero(insn->getSrc(!s));
475bf215546Sopenharmony_ci   }
476bf215546Sopenharmony_ci
477bf215546Sopenharmony_ci   return NULL;
478bf215546Sopenharmony_ci}
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_civoid
481bf215546Sopenharmony_ciModifier::applyTo(ImmediateValue& imm) const
482bf215546Sopenharmony_ci{
483bf215546Sopenharmony_ci   if (!bits) // avoid failure if imm.reg.type is unhandled (e.g. b128)
484bf215546Sopenharmony_ci      return;
485bf215546Sopenharmony_ci   switch (imm.reg.type) {
486bf215546Sopenharmony_ci   case TYPE_F32:
487bf215546Sopenharmony_ci      if (bits & NV50_IR_MOD_ABS)
488bf215546Sopenharmony_ci         imm.reg.data.f32 = fabsf(imm.reg.data.f32);
489bf215546Sopenharmony_ci      if (bits & NV50_IR_MOD_NEG)
490bf215546Sopenharmony_ci         imm.reg.data.f32 = -imm.reg.data.f32;
491bf215546Sopenharmony_ci      if (bits & NV50_IR_MOD_SAT) {
492bf215546Sopenharmony_ci         if (imm.reg.data.f32 < 0.0f)
493bf215546Sopenharmony_ci            imm.reg.data.f32 = 0.0f;
494bf215546Sopenharmony_ci         else
495bf215546Sopenharmony_ci         if (imm.reg.data.f32 > 1.0f)
496bf215546Sopenharmony_ci            imm.reg.data.f32 = 1.0f;
497bf215546Sopenharmony_ci      }
498bf215546Sopenharmony_ci      assert(!(bits & NV50_IR_MOD_NOT));
499bf215546Sopenharmony_ci      break;
500bf215546Sopenharmony_ci
501bf215546Sopenharmony_ci   case TYPE_S8: // NOTE: will be extended
502bf215546Sopenharmony_ci   case TYPE_S16:
503bf215546Sopenharmony_ci   case TYPE_S32:
504bf215546Sopenharmony_ci   case TYPE_U8: // NOTE: treated as signed
505bf215546Sopenharmony_ci   case TYPE_U16:
506bf215546Sopenharmony_ci   case TYPE_U32:
507bf215546Sopenharmony_ci      if (bits & NV50_IR_MOD_ABS)
508bf215546Sopenharmony_ci         imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ?
509bf215546Sopenharmony_ci            imm.reg.data.s32 : -imm.reg.data.s32;
510bf215546Sopenharmony_ci      if (bits & NV50_IR_MOD_NEG)
511bf215546Sopenharmony_ci         imm.reg.data.s32 = -imm.reg.data.s32;
512bf215546Sopenharmony_ci      if (bits & NV50_IR_MOD_NOT)
513bf215546Sopenharmony_ci         imm.reg.data.s32 = ~imm.reg.data.s32;
514bf215546Sopenharmony_ci      break;
515bf215546Sopenharmony_ci
516bf215546Sopenharmony_ci   case TYPE_F64:
517bf215546Sopenharmony_ci      if (bits & NV50_IR_MOD_ABS)
518bf215546Sopenharmony_ci         imm.reg.data.f64 = fabs(imm.reg.data.f64);
519bf215546Sopenharmony_ci      if (bits & NV50_IR_MOD_NEG)
520bf215546Sopenharmony_ci         imm.reg.data.f64 = -imm.reg.data.f64;
521bf215546Sopenharmony_ci      if (bits & NV50_IR_MOD_SAT) {
522bf215546Sopenharmony_ci         if (imm.reg.data.f64 < 0.0)
523bf215546Sopenharmony_ci            imm.reg.data.f64 = 0.0;
524bf215546Sopenharmony_ci         else
525bf215546Sopenharmony_ci         if (imm.reg.data.f64 > 1.0)
526bf215546Sopenharmony_ci            imm.reg.data.f64 = 1.0;
527bf215546Sopenharmony_ci      }
528bf215546Sopenharmony_ci      assert(!(bits & NV50_IR_MOD_NOT));
529bf215546Sopenharmony_ci      break;
530bf215546Sopenharmony_ci
531bf215546Sopenharmony_ci   default:
532bf215546Sopenharmony_ci      assert(!"invalid/unhandled type");
533bf215546Sopenharmony_ci      imm.reg.data.u64 = 0;
534bf215546Sopenharmony_ci      break;
535bf215546Sopenharmony_ci   }
536bf215546Sopenharmony_ci}
537bf215546Sopenharmony_ci
538bf215546Sopenharmony_cioperation
539bf215546Sopenharmony_ciModifier::getOp() const
540bf215546Sopenharmony_ci{
541bf215546Sopenharmony_ci   switch (bits) {
542bf215546Sopenharmony_ci   case NV50_IR_MOD_ABS: return OP_ABS;
543bf215546Sopenharmony_ci   case NV50_IR_MOD_NEG: return OP_NEG;
544bf215546Sopenharmony_ci   case NV50_IR_MOD_SAT: return OP_SAT;
545bf215546Sopenharmony_ci   case NV50_IR_MOD_NOT: return OP_NOT;
546bf215546Sopenharmony_ci   case 0:
547bf215546Sopenharmony_ci      return OP_MOV;
548bf215546Sopenharmony_ci   default:
549bf215546Sopenharmony_ci      return OP_CVT;
550bf215546Sopenharmony_ci   }
551bf215546Sopenharmony_ci}
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_civoid
554bf215546Sopenharmony_ciConstantFolding::expr(Instruction *i,
555bf215546Sopenharmony_ci                      ImmediateValue &imm0, ImmediateValue &imm1)
556bf215546Sopenharmony_ci{
557bf215546Sopenharmony_ci   struct Storage *const a = &imm0.reg, *const b = &imm1.reg;
558bf215546Sopenharmony_ci   struct Storage res;
559bf215546Sopenharmony_ci   DataType type = i->dType;
560bf215546Sopenharmony_ci
561bf215546Sopenharmony_ci   memset(&res.data, 0, sizeof(res.data));
562bf215546Sopenharmony_ci
563bf215546Sopenharmony_ci   switch (i->op) {
564bf215546Sopenharmony_ci   case OP_SGXT: {
565bf215546Sopenharmony_ci      int bits = b->data.u32;
566bf215546Sopenharmony_ci      if (bits) {
567bf215546Sopenharmony_ci         uint32_t data = a->data.u32 & (0xffffffff >> (32 - bits));
568bf215546Sopenharmony_ci         if (bits < 32 && (data & (1 << (bits - 1))))
569bf215546Sopenharmony_ci            data = data - (1 << bits);
570bf215546Sopenharmony_ci         res.data.u32 = data;
571bf215546Sopenharmony_ci      }
572bf215546Sopenharmony_ci      break;
573bf215546Sopenharmony_ci   }
574bf215546Sopenharmony_ci   case OP_BMSK:
575bf215546Sopenharmony_ci      res.data.u32 = ((1 << b->data.u32) - 1) << a->data.u32;
576bf215546Sopenharmony_ci      break;
577bf215546Sopenharmony_ci   case OP_MAD:
578bf215546Sopenharmony_ci   case OP_FMA:
579bf215546Sopenharmony_ci   case OP_MUL:
580bf215546Sopenharmony_ci      if (i->dnz && i->dType == TYPE_F32) {
581bf215546Sopenharmony_ci         if (!isfinite(a->data.f32))
582bf215546Sopenharmony_ci            a->data.f32 = 0.0f;
583bf215546Sopenharmony_ci         if (!isfinite(b->data.f32))
584bf215546Sopenharmony_ci            b->data.f32 = 0.0f;
585bf215546Sopenharmony_ci      }
586bf215546Sopenharmony_ci      switch (i->dType) {
587bf215546Sopenharmony_ci      case TYPE_F32:
588bf215546Sopenharmony_ci         res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor);
589bf215546Sopenharmony_ci         break;
590bf215546Sopenharmony_ci      case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break;
591bf215546Sopenharmony_ci      case TYPE_S32:
592bf215546Sopenharmony_ci         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
593bf215546Sopenharmony_ci            res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32;
594bf215546Sopenharmony_ci            break;
595bf215546Sopenharmony_ci         }
596bf215546Sopenharmony_ci         FALLTHROUGH;
597bf215546Sopenharmony_ci      case TYPE_U32:
598bf215546Sopenharmony_ci         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
599bf215546Sopenharmony_ci            res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32;
600bf215546Sopenharmony_ci            break;
601bf215546Sopenharmony_ci         }
602bf215546Sopenharmony_ci         res.data.u32 = a->data.u32 * b->data.u32; break;
603bf215546Sopenharmony_ci      default:
604bf215546Sopenharmony_ci         return;
605bf215546Sopenharmony_ci      }
606bf215546Sopenharmony_ci      break;
607bf215546Sopenharmony_ci   case OP_DIV:
608bf215546Sopenharmony_ci      if (b->data.u32 == 0)
609bf215546Sopenharmony_ci         break;
610bf215546Sopenharmony_ci      switch (i->dType) {
611bf215546Sopenharmony_ci      case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break;
612bf215546Sopenharmony_ci      case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break;
613bf215546Sopenharmony_ci      case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break;
614bf215546Sopenharmony_ci      case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break;
615bf215546Sopenharmony_ci      default:
616bf215546Sopenharmony_ci         return;
617bf215546Sopenharmony_ci      }
618bf215546Sopenharmony_ci      break;
619bf215546Sopenharmony_ci   case OP_ADD:
620bf215546Sopenharmony_ci      switch (i->dType) {
621bf215546Sopenharmony_ci      case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break;
622bf215546Sopenharmony_ci      case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break;
623bf215546Sopenharmony_ci      case TYPE_S32:
624bf215546Sopenharmony_ci      case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break;
625bf215546Sopenharmony_ci      default:
626bf215546Sopenharmony_ci         return;
627bf215546Sopenharmony_ci      }
628bf215546Sopenharmony_ci      break;
629bf215546Sopenharmony_ci   case OP_SUB:
630bf215546Sopenharmony_ci      switch (i->dType) {
631bf215546Sopenharmony_ci      case TYPE_F32: res.data.f32 = a->data.f32 - b->data.f32; break;
632bf215546Sopenharmony_ci      case TYPE_F64: res.data.f64 = a->data.f64 - b->data.f64; break;
633bf215546Sopenharmony_ci      case TYPE_S32:
634bf215546Sopenharmony_ci      case TYPE_U32: res.data.u32 = a->data.u32 - b->data.u32; break;
635bf215546Sopenharmony_ci      default:
636bf215546Sopenharmony_ci         return;
637bf215546Sopenharmony_ci      }
638bf215546Sopenharmony_ci      break;
639bf215546Sopenharmony_ci   case OP_POW:
640bf215546Sopenharmony_ci      switch (i->dType) {
641bf215546Sopenharmony_ci      case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break;
642bf215546Sopenharmony_ci      case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break;
643bf215546Sopenharmony_ci      default:
644bf215546Sopenharmony_ci         return;
645bf215546Sopenharmony_ci      }
646bf215546Sopenharmony_ci      break;
647bf215546Sopenharmony_ci   case OP_MAX:
648bf215546Sopenharmony_ci      switch (i->dType) {
649bf215546Sopenharmony_ci      case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break;
650bf215546Sopenharmony_ci      case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break;
651bf215546Sopenharmony_ci      case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break;
652bf215546Sopenharmony_ci      case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break;
653bf215546Sopenharmony_ci      default:
654bf215546Sopenharmony_ci         return;
655bf215546Sopenharmony_ci      }
656bf215546Sopenharmony_ci      break;
657bf215546Sopenharmony_ci   case OP_MIN:
658bf215546Sopenharmony_ci      switch (i->dType) {
659bf215546Sopenharmony_ci      case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break;
660bf215546Sopenharmony_ci      case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break;
661bf215546Sopenharmony_ci      case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break;
662bf215546Sopenharmony_ci      case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break;
663bf215546Sopenharmony_ci      default:
664bf215546Sopenharmony_ci         return;
665bf215546Sopenharmony_ci      }
666bf215546Sopenharmony_ci      break;
667bf215546Sopenharmony_ci   case OP_AND:
668bf215546Sopenharmony_ci      res.data.u64 = a->data.u64 & b->data.u64;
669bf215546Sopenharmony_ci      break;
670bf215546Sopenharmony_ci   case OP_OR:
671bf215546Sopenharmony_ci      res.data.u64 = a->data.u64 | b->data.u64;
672bf215546Sopenharmony_ci      break;
673bf215546Sopenharmony_ci   case OP_XOR:
674bf215546Sopenharmony_ci      res.data.u64 = a->data.u64 ^ b->data.u64;
675bf215546Sopenharmony_ci      break;
676bf215546Sopenharmony_ci   case OP_SHL:
677bf215546Sopenharmony_ci      res.data.u32 = a->data.u32 << b->data.u32;
678bf215546Sopenharmony_ci      break;
679bf215546Sopenharmony_ci   case OP_SHR:
680bf215546Sopenharmony_ci      switch (i->dType) {
681bf215546Sopenharmony_ci      case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break;
682bf215546Sopenharmony_ci      case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break;
683bf215546Sopenharmony_ci      default:
684bf215546Sopenharmony_ci         return;
685bf215546Sopenharmony_ci      }
686bf215546Sopenharmony_ci      break;
687bf215546Sopenharmony_ci   case OP_SLCT:
688bf215546Sopenharmony_ci      if (a->data.u32 != b->data.u32)
689bf215546Sopenharmony_ci         return;
690bf215546Sopenharmony_ci      res.data.u32 = a->data.u32;
691bf215546Sopenharmony_ci      break;
692bf215546Sopenharmony_ci   case OP_EXTBF: {
693bf215546Sopenharmony_ci      int offset = b->data.u32 & 0xff;
694bf215546Sopenharmony_ci      int width = (b->data.u32 >> 8) & 0xff;
695bf215546Sopenharmony_ci      int rshift = offset;
696bf215546Sopenharmony_ci      int lshift = 0;
697bf215546Sopenharmony_ci      if (width == 0) {
698bf215546Sopenharmony_ci         res.data.u32 = 0;
699bf215546Sopenharmony_ci         break;
700bf215546Sopenharmony_ci      }
701bf215546Sopenharmony_ci      if (width + offset < 32) {
702bf215546Sopenharmony_ci         rshift = 32 - width;
703bf215546Sopenharmony_ci         lshift = 32 - width - offset;
704bf215546Sopenharmony_ci      }
705bf215546Sopenharmony_ci      if (i->subOp == NV50_IR_SUBOP_EXTBF_REV)
706bf215546Sopenharmony_ci         res.data.u32 = util_bitreverse(a->data.u32);
707bf215546Sopenharmony_ci      else
708bf215546Sopenharmony_ci         res.data.u32 = a->data.u32;
709bf215546Sopenharmony_ci      switch (i->dType) {
710bf215546Sopenharmony_ci      case TYPE_S32: res.data.s32 = (res.data.s32 << lshift) >> rshift; break;
711bf215546Sopenharmony_ci      case TYPE_U32: res.data.u32 = (res.data.u32 << lshift) >> rshift; break;
712bf215546Sopenharmony_ci      default:
713bf215546Sopenharmony_ci         return;
714bf215546Sopenharmony_ci      }
715bf215546Sopenharmony_ci      break;
716bf215546Sopenharmony_ci   }
717bf215546Sopenharmony_ci   case OP_POPCNT:
718bf215546Sopenharmony_ci      res.data.u32 = util_bitcount(a->data.u32 & b->data.u32);
719bf215546Sopenharmony_ci      break;
720bf215546Sopenharmony_ci   case OP_PFETCH:
721bf215546Sopenharmony_ci      // The two arguments to pfetch are logically added together. Normally
722bf215546Sopenharmony_ci      // the second argument will not be constant, but that can happen.
723bf215546Sopenharmony_ci      res.data.u32 = a->data.u32 + b->data.u32;
724bf215546Sopenharmony_ci      type = TYPE_U32;
725bf215546Sopenharmony_ci      break;
726bf215546Sopenharmony_ci   case OP_MERGE:
727bf215546Sopenharmony_ci      switch (i->dType) {
728bf215546Sopenharmony_ci      case TYPE_U64:
729bf215546Sopenharmony_ci      case TYPE_S64:
730bf215546Sopenharmony_ci      case TYPE_F64:
731bf215546Sopenharmony_ci         res.data.u64 = (((uint64_t)b->data.u32) << 32) | a->data.u32;
732bf215546Sopenharmony_ci         break;
733bf215546Sopenharmony_ci      default:
734bf215546Sopenharmony_ci         return;
735bf215546Sopenharmony_ci      }
736bf215546Sopenharmony_ci      break;
737bf215546Sopenharmony_ci   default:
738bf215546Sopenharmony_ci      return;
739bf215546Sopenharmony_ci   }
740bf215546Sopenharmony_ci   ++foldCount;
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ci   i->src(0).mod = Modifier(0);
743bf215546Sopenharmony_ci   i->src(1).mod = Modifier(0);
744bf215546Sopenharmony_ci   i->postFactor = 0;
745bf215546Sopenharmony_ci
746bf215546Sopenharmony_ci   i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
747bf215546Sopenharmony_ci   i->setSrc(1, NULL);
748bf215546Sopenharmony_ci
749bf215546Sopenharmony_ci   i->getSrc(0)->reg.data = res.data;
750bf215546Sopenharmony_ci   i->getSrc(0)->reg.type = type;
751bf215546Sopenharmony_ci   i->getSrc(0)->reg.size = typeSizeof(type);
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_ci   switch (i->op) {
754bf215546Sopenharmony_ci   case OP_MAD:
755bf215546Sopenharmony_ci   case OP_FMA: {
756bf215546Sopenharmony_ci      ImmediateValue src0, src1 = *i->getSrc(0)->asImm();
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci      // Move the immediate into position 1, where we know it might be
759bf215546Sopenharmony_ci      // emittable. However it might not be anyways, as there may be other
760bf215546Sopenharmony_ci      // restrictions, so move it into a separate LValue.
761bf215546Sopenharmony_ci      bld.setPosition(i, false);
762bf215546Sopenharmony_ci      i->op = OP_ADD;
763bf215546Sopenharmony_ci      i->dnz = 0;
764bf215546Sopenharmony_ci      i->setSrc(1, bld.mkMov(bld.getSSA(type), i->getSrc(0), type)->getDef(0));
765bf215546Sopenharmony_ci      i->setSrc(0, i->getSrc(2));
766bf215546Sopenharmony_ci      i->src(0).mod = i->src(2).mod;
767bf215546Sopenharmony_ci      i->setSrc(2, NULL);
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci      if (i->src(0).getImmediate(src0))
770bf215546Sopenharmony_ci         expr(i, src0, src1);
771bf215546Sopenharmony_ci      else
772bf215546Sopenharmony_ci         opnd(i, src1, 1);
773bf215546Sopenharmony_ci      break;
774bf215546Sopenharmony_ci   }
775bf215546Sopenharmony_ci   case OP_PFETCH:
776bf215546Sopenharmony_ci      // Leave PFETCH alone... we just folded its 2 args into 1.
777bf215546Sopenharmony_ci      break;
778bf215546Sopenharmony_ci   default:
779bf215546Sopenharmony_ci      i->op = i->saturate ? OP_SAT : OP_MOV;
780bf215546Sopenharmony_ci      if (i->saturate)
781bf215546Sopenharmony_ci         unary(i, *i->getSrc(0)->asImm());
782bf215546Sopenharmony_ci      break;
783bf215546Sopenharmony_ci   }
784bf215546Sopenharmony_ci   i->subOp = 0;
785bf215546Sopenharmony_ci}
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_civoid
788bf215546Sopenharmony_ciConstantFolding::expr(Instruction *i,
789bf215546Sopenharmony_ci                      ImmediateValue &imm0,
790bf215546Sopenharmony_ci                      ImmediateValue &imm1,
791bf215546Sopenharmony_ci                      ImmediateValue &imm2)
792bf215546Sopenharmony_ci{
793bf215546Sopenharmony_ci   struct Storage *const a = &imm0.reg, *const b = &imm1.reg, *const c = &imm2.reg;
794bf215546Sopenharmony_ci   struct Storage res;
795bf215546Sopenharmony_ci
796bf215546Sopenharmony_ci   memset(&res.data, 0, sizeof(res.data));
797bf215546Sopenharmony_ci
798bf215546Sopenharmony_ci   switch (i->op) {
799bf215546Sopenharmony_ci   case OP_LOP3_LUT:
800bf215546Sopenharmony_ci      for (int n = 0; n < 32; n++) {
801bf215546Sopenharmony_ci         uint8_t lut = ((a->data.u32 >> n) & 1) << 2 |
802bf215546Sopenharmony_ci                       ((b->data.u32 >> n) & 1) << 1 |
803bf215546Sopenharmony_ci                       ((c->data.u32 >> n) & 1);
804bf215546Sopenharmony_ci         res.data.u32 |= !!(i->subOp & (1 << lut)) << n;
805bf215546Sopenharmony_ci      }
806bf215546Sopenharmony_ci      break;
807bf215546Sopenharmony_ci   case OP_PERMT:
808bf215546Sopenharmony_ci      if (!i->subOp) {
809bf215546Sopenharmony_ci         uint64_t input = (uint64_t)c->data.u32 << 32 | a->data.u32;
810bf215546Sopenharmony_ci         uint16_t permt = b->data.u32;
811bf215546Sopenharmony_ci         for (int n = 0 ; n < 4; n++, permt >>= 4)
812bf215546Sopenharmony_ci            res.data.u32 |= ((input >> ((permt & 0xf) * 8)) & 0xff) << n * 8;
813bf215546Sopenharmony_ci      } else
814bf215546Sopenharmony_ci         return;
815bf215546Sopenharmony_ci      break;
816bf215546Sopenharmony_ci   case OP_INSBF: {
817bf215546Sopenharmony_ci      int offset = b->data.u32 & 0xff;
818bf215546Sopenharmony_ci      int width = (b->data.u32 >> 8) & 0xff;
819bf215546Sopenharmony_ci      unsigned bitmask = ((1 << width) - 1) << offset;
820bf215546Sopenharmony_ci      res.data.u32 = ((a->data.u32 << offset) & bitmask) | (c->data.u32 & ~bitmask);
821bf215546Sopenharmony_ci      break;
822bf215546Sopenharmony_ci   }
823bf215546Sopenharmony_ci   case OP_MAD:
824bf215546Sopenharmony_ci   case OP_FMA: {
825bf215546Sopenharmony_ci      switch (i->dType) {
826bf215546Sopenharmony_ci      case TYPE_F32:
827bf215546Sopenharmony_ci         res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor) +
828bf215546Sopenharmony_ci            c->data.f32;
829bf215546Sopenharmony_ci         break;
830bf215546Sopenharmony_ci      case TYPE_F64:
831bf215546Sopenharmony_ci         res.data.f64 = a->data.f64 * b->data.f64 + c->data.f64;
832bf215546Sopenharmony_ci         break;
833bf215546Sopenharmony_ci      case TYPE_S32:
834bf215546Sopenharmony_ci         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
835bf215546Sopenharmony_ci            res.data.s32 = ((int64_t)a->data.s32 * b->data.s32 >> 32) + c->data.s32;
836bf215546Sopenharmony_ci            break;
837bf215546Sopenharmony_ci         }
838bf215546Sopenharmony_ci         FALLTHROUGH;
839bf215546Sopenharmony_ci      case TYPE_U32:
840bf215546Sopenharmony_ci         if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
841bf215546Sopenharmony_ci            res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32 >> 32) + c->data.u32;
842bf215546Sopenharmony_ci            break;
843bf215546Sopenharmony_ci         }
844bf215546Sopenharmony_ci         res.data.u32 = a->data.u32 * b->data.u32 + c->data.u32;
845bf215546Sopenharmony_ci         break;
846bf215546Sopenharmony_ci      default:
847bf215546Sopenharmony_ci         return;
848bf215546Sopenharmony_ci      }
849bf215546Sopenharmony_ci      break;
850bf215546Sopenharmony_ci   }
851bf215546Sopenharmony_ci   case OP_SHLADD:
852bf215546Sopenharmony_ci      res.data.u32 = (a->data.u32 << b->data.u32) + c->data.u32;
853bf215546Sopenharmony_ci      break;
854bf215546Sopenharmony_ci   default:
855bf215546Sopenharmony_ci      return;
856bf215546Sopenharmony_ci   }
857bf215546Sopenharmony_ci
858bf215546Sopenharmony_ci   ++foldCount;
859bf215546Sopenharmony_ci   i->src(0).mod = Modifier(0);
860bf215546Sopenharmony_ci   i->src(1).mod = Modifier(0);
861bf215546Sopenharmony_ci   i->src(2).mod = Modifier(0);
862bf215546Sopenharmony_ci
863bf215546Sopenharmony_ci   i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32));
864bf215546Sopenharmony_ci   i->setSrc(1, NULL);
865bf215546Sopenharmony_ci   i->setSrc(2, NULL);
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_ci   i->getSrc(0)->reg.data = res.data;
868bf215546Sopenharmony_ci   i->getSrc(0)->reg.type = i->dType;
869bf215546Sopenharmony_ci   i->getSrc(0)->reg.size = typeSizeof(i->dType);
870bf215546Sopenharmony_ci
871bf215546Sopenharmony_ci   i->op = OP_MOV;
872bf215546Sopenharmony_ci}
873bf215546Sopenharmony_ci
874bf215546Sopenharmony_civoid
875bf215546Sopenharmony_ciConstantFolding::unary(Instruction *i, const ImmediateValue &imm)
876bf215546Sopenharmony_ci{
877bf215546Sopenharmony_ci   Storage res;
878bf215546Sopenharmony_ci
879bf215546Sopenharmony_ci   if (i->dType != TYPE_F32)
880bf215546Sopenharmony_ci      return;
881bf215546Sopenharmony_ci   switch (i->op) {
882bf215546Sopenharmony_ci   case OP_NEG: res.data.f32 = -imm.reg.data.f32; break;
883bf215546Sopenharmony_ci   case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break;
884bf215546Sopenharmony_ci   case OP_SAT: res.data.f32 = SATURATE(imm.reg.data.f32); break;
885bf215546Sopenharmony_ci   case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break;
886bf215546Sopenharmony_ci   case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break;
887bf215546Sopenharmony_ci   case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break;
888bf215546Sopenharmony_ci   case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break;
889bf215546Sopenharmony_ci   case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break;
890bf215546Sopenharmony_ci   case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break;
891bf215546Sopenharmony_ci   case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break;
892bf215546Sopenharmony_ci   case OP_PRESIN:
893bf215546Sopenharmony_ci   case OP_PREEX2:
894bf215546Sopenharmony_ci      // these should be handled in subsequent OP_SIN/COS/EX2
895bf215546Sopenharmony_ci      res.data.f32 = imm.reg.data.f32;
896bf215546Sopenharmony_ci      break;
897bf215546Sopenharmony_ci   default:
898bf215546Sopenharmony_ci      return;
899bf215546Sopenharmony_ci   }
900bf215546Sopenharmony_ci   i->op = OP_MOV;
901bf215546Sopenharmony_ci   i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32));
902bf215546Sopenharmony_ci   i->src(0).mod = Modifier(0);
903bf215546Sopenharmony_ci}
904bf215546Sopenharmony_ci
905bf215546Sopenharmony_civoid
906bf215546Sopenharmony_ciConstantFolding::tryCollapseChainedMULs(Instruction *mul2,
907bf215546Sopenharmony_ci                                        const int s, ImmediateValue& imm2)
908bf215546Sopenharmony_ci{
909bf215546Sopenharmony_ci   const int t = s ? 0 : 1;
910bf215546Sopenharmony_ci   Instruction *insn;
911bf215546Sopenharmony_ci   Instruction *mul1 = NULL; // mul1 before mul2
912bf215546Sopenharmony_ci   int e = 0;
913bf215546Sopenharmony_ci   float f = imm2.reg.data.f32 * exp2f(mul2->postFactor);
914bf215546Sopenharmony_ci   ImmediateValue imm1;
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci   assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32);
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci   if (mul2->getSrc(t)->refCount() == 1) {
919bf215546Sopenharmony_ci      insn = mul2->getSrc(t)->getInsn();
920bf215546Sopenharmony_ci      if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32)
921bf215546Sopenharmony_ci         mul1 = insn;
922bf215546Sopenharmony_ci      if (mul1 && !mul1->saturate) {
923bf215546Sopenharmony_ci         int s1;
924bf215546Sopenharmony_ci
925bf215546Sopenharmony_ci         if (mul1->src(s1 = 0).getImmediate(imm1) ||
926bf215546Sopenharmony_ci             mul1->src(s1 = 1).getImmediate(imm1)) {
927bf215546Sopenharmony_ci            bld.setPosition(mul1, false);
928bf215546Sopenharmony_ci            // a = mul r, imm1
929bf215546Sopenharmony_ci            // d = mul a, imm2 -> d = mul r, (imm1 * imm2)
930bf215546Sopenharmony_ci            mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32));
931bf215546Sopenharmony_ci            mul1->src(s1).mod = Modifier(0);
932bf215546Sopenharmony_ci            mul2->def(0).replace(mul1->getDef(0), false);
933bf215546Sopenharmony_ci            mul1->saturate = mul2->saturate;
934bf215546Sopenharmony_ci         } else
935bf215546Sopenharmony_ci         if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
936bf215546Sopenharmony_ci            // c = mul a, b
937bf215546Sopenharmony_ci            // d = mul c, imm   -> d = mul_x_imm a, b
938bf215546Sopenharmony_ci            mul1->postFactor = e;
939bf215546Sopenharmony_ci            mul2->def(0).replace(mul1->getDef(0), false);
940bf215546Sopenharmony_ci            if (f < 0)
941bf215546Sopenharmony_ci               mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG);
942bf215546Sopenharmony_ci            mul1->saturate = mul2->saturate;
943bf215546Sopenharmony_ci         }
944bf215546Sopenharmony_ci         return;
945bf215546Sopenharmony_ci      }
946bf215546Sopenharmony_ci   }
947bf215546Sopenharmony_ci   if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) {
948bf215546Sopenharmony_ci      // b = mul a, imm
949bf215546Sopenharmony_ci      // d = mul b, c   -> d = mul_x_imm a, c
950bf215546Sopenharmony_ci      int s2, t2;
951bf215546Sopenharmony_ci      insn = (*mul2->getDef(0)->uses.begin())->getInsn();
952bf215546Sopenharmony_ci      if (!insn)
953bf215546Sopenharmony_ci         return;
954bf215546Sopenharmony_ci      mul1 = mul2;
955bf215546Sopenharmony_ci      mul2 = NULL;
956bf215546Sopenharmony_ci      s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1;
957bf215546Sopenharmony_ci      t2 = s2 ? 0 : 1;
958bf215546Sopenharmony_ci      if (insn->op == OP_MUL && insn->dType == TYPE_F32)
959bf215546Sopenharmony_ci         if (!insn->src(s2).mod && !insn->src(t2).getImmediate(imm1))
960bf215546Sopenharmony_ci            mul2 = insn;
961bf215546Sopenharmony_ci      if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) {
962bf215546Sopenharmony_ci         mul2->postFactor = e;
963bf215546Sopenharmony_ci         mul2->setSrc(s2, mul1->src(t));
964bf215546Sopenharmony_ci         if (f < 0)
965bf215546Sopenharmony_ci            mul2->src(s2).mod *= Modifier(NV50_IR_MOD_NEG);
966bf215546Sopenharmony_ci      }
967bf215546Sopenharmony_ci   }
968bf215546Sopenharmony_ci}
969bf215546Sopenharmony_ci
970bf215546Sopenharmony_civoid
971bf215546Sopenharmony_ciConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2)
972bf215546Sopenharmony_ci{
973bf215546Sopenharmony_ci   switch (i->op) {
974bf215546Sopenharmony_ci   case OP_MAD:
975bf215546Sopenharmony_ci   case OP_FMA:
976bf215546Sopenharmony_ci      if (imm2.isInteger(0)) {
977bf215546Sopenharmony_ci         i->op = OP_MUL;
978bf215546Sopenharmony_ci         i->setSrc(2, NULL);
979bf215546Sopenharmony_ci         foldCount++;
980bf215546Sopenharmony_ci         return;
981bf215546Sopenharmony_ci      }
982bf215546Sopenharmony_ci      break;
983bf215546Sopenharmony_ci   case OP_SHLADD:
984bf215546Sopenharmony_ci      if (imm2.isInteger(0)) {
985bf215546Sopenharmony_ci         i->op = OP_SHL;
986bf215546Sopenharmony_ci         i->setSrc(2, NULL);
987bf215546Sopenharmony_ci         foldCount++;
988bf215546Sopenharmony_ci         return;
989bf215546Sopenharmony_ci      }
990bf215546Sopenharmony_ci      break;
991bf215546Sopenharmony_ci   default:
992bf215546Sopenharmony_ci      return;
993bf215546Sopenharmony_ci   }
994bf215546Sopenharmony_ci}
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_cibool
997bf215546Sopenharmony_ciConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *c)
998bf215546Sopenharmony_ci{
999bf215546Sopenharmony_ci   const Target *target = prog->getTarget();
1000bf215546Sopenharmony_ci   int64_t absB = llabs(b);
1001bf215546Sopenharmony_ci
1002bf215546Sopenharmony_ci   //a * (2^shl) -> a << shl
1003bf215546Sopenharmony_ci   if (b >= 0 && util_is_power_of_two_or_zero64(b)) {
1004bf215546Sopenharmony_ci      int shl = util_logbase2_64(b);
1005bf215546Sopenharmony_ci
1006bf215546Sopenharmony_ci      Value *res = c ? bld.getSSA(typeSizeof(ty)) : def;
1007bf215546Sopenharmony_ci      bld.mkOp2(OP_SHL, ty, res, a, bld.mkImm(shl));
1008bf215546Sopenharmony_ci      if (c)
1009bf215546Sopenharmony_ci         bld.mkOp2(OP_ADD, ty, def, res, c);
1010bf215546Sopenharmony_ci
1011bf215546Sopenharmony_ci      return true;
1012bf215546Sopenharmony_ci   }
1013bf215546Sopenharmony_ci
1014bf215546Sopenharmony_ci   //a * (2^shl + 1) -> a << shl + a
1015bf215546Sopenharmony_ci   //a * -(2^shl + 1) -> -a << shl + a
1016bf215546Sopenharmony_ci   //a * (2^shl - 1) -> a << shl - a
1017bf215546Sopenharmony_ci   //a * -(2^shl - 1) -> -a << shl - a
1018bf215546Sopenharmony_ci   if (typeSizeof(ty) == 4 &&
1019bf215546Sopenharmony_ci       (util_is_power_of_two_or_zero64(absB - 1) ||
1020bf215546Sopenharmony_ci        util_is_power_of_two_or_zero64(absB + 1)) &&
1021bf215546Sopenharmony_ci       target->isOpSupported(OP_SHLADD, TYPE_U32)) {
1022bf215546Sopenharmony_ci      bool subA = util_is_power_of_two_or_zero64(absB + 1);
1023bf215546Sopenharmony_ci      int shl = subA ? util_logbase2_64(absB + 1) : util_logbase2_64(absB - 1);
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci      Value *res = c ? bld.getSSA() : def;
1026bf215546Sopenharmony_ci      Instruction *insn = bld.mkOp3(OP_SHLADD, TYPE_U32, res, a, bld.mkImm(shl), a);
1027bf215546Sopenharmony_ci      if (b < 0)
1028bf215546Sopenharmony_ci         insn->src(0).mod = Modifier(NV50_IR_MOD_NEG);
1029bf215546Sopenharmony_ci      if (subA)
1030bf215546Sopenharmony_ci         insn->src(2).mod = Modifier(NV50_IR_MOD_NEG);
1031bf215546Sopenharmony_ci
1032bf215546Sopenharmony_ci      if (c)
1033bf215546Sopenharmony_ci         bld.mkOp2(OP_ADD, TYPE_U32, def, res, c);
1034bf215546Sopenharmony_ci
1035bf215546Sopenharmony_ci      return true;
1036bf215546Sopenharmony_ci   }
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_ci   if (typeSizeof(ty) == 4 && b >= 0 && b <= 0xffff &&
1039bf215546Sopenharmony_ci       target->isOpSupported(OP_XMAD, TYPE_U32)) {
1040bf215546Sopenharmony_ci      Value *tmp = bld.mkOp3v(OP_XMAD, TYPE_U32, bld.getSSA(),
1041bf215546Sopenharmony_ci                              a, bld.mkImm((uint32_t)b), c ? c : bld.mkImm(0));
1042bf215546Sopenharmony_ci      bld.mkOp3(OP_XMAD, TYPE_U32, def, a, bld.mkImm((uint32_t)b), tmp)->subOp =
1043bf215546Sopenharmony_ci         NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_H1(0);
1044bf215546Sopenharmony_ci
1045bf215546Sopenharmony_ci      return true;
1046bf215546Sopenharmony_ci   }
1047bf215546Sopenharmony_ci
1048bf215546Sopenharmony_ci   return false;
1049bf215546Sopenharmony_ci}
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_cibool
1052bf215546Sopenharmony_ciConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
1053bf215546Sopenharmony_ci{
1054bf215546Sopenharmony_ci   const int t = !s;
1055bf215546Sopenharmony_ci   const operation op = i->op;
1056bf215546Sopenharmony_ci   Instruction *newi = i;
1057bf215546Sopenharmony_ci   bool deleted = false;
1058bf215546Sopenharmony_ci
1059bf215546Sopenharmony_ci   switch (i->op) {
1060bf215546Sopenharmony_ci   case OP_SPLIT: {
1061bf215546Sopenharmony_ci      bld.setPosition(i, false);
1062bf215546Sopenharmony_ci
1063bf215546Sopenharmony_ci      uint8_t size = i->getDef(0)->reg.size;
1064bf215546Sopenharmony_ci      uint8_t bitsize = size * 8;
1065bf215546Sopenharmony_ci      uint32_t mask = (1ULL << bitsize) - 1;
1066bf215546Sopenharmony_ci      assert(bitsize <= 32);
1067bf215546Sopenharmony_ci
1068bf215546Sopenharmony_ci      uint64_t val = imm0.reg.data.u64;
1069bf215546Sopenharmony_ci      for (int8_t d = 0; i->defExists(d); ++d) {
1070bf215546Sopenharmony_ci         Value *def = i->getDef(d);
1071bf215546Sopenharmony_ci         assert(def->reg.size == size);
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_ci         newi = bld.mkMov(def, bld.mkImm((uint32_t)(val & mask)), TYPE_U32);
1074bf215546Sopenharmony_ci         val >>= bitsize;
1075bf215546Sopenharmony_ci      }
1076bf215546Sopenharmony_ci      delete_Instruction(prog, i);
1077bf215546Sopenharmony_ci      deleted = true;
1078bf215546Sopenharmony_ci      break;
1079bf215546Sopenharmony_ci   }
1080bf215546Sopenharmony_ci   case OP_MUL:
1081bf215546Sopenharmony_ci      if (i->dType == TYPE_F32 && !i->precise)
1082bf215546Sopenharmony_ci         tryCollapseChainedMULs(i, s, imm0);
1083bf215546Sopenharmony_ci
1084bf215546Sopenharmony_ci      if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) {
1085bf215546Sopenharmony_ci         assert(!isFloatType(i->sType));
1086bf215546Sopenharmony_ci         if (imm0.isInteger(1) && i->dType == TYPE_S32) {
1087bf215546Sopenharmony_ci            bld.setPosition(i, false);
1088bf215546Sopenharmony_ci            // Need to set to the sign value, which is a compare.
1089bf215546Sopenharmony_ci            newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0),
1090bf215546Sopenharmony_ci                             TYPE_S32, i->getSrc(t), bld.mkImm(0));
1091bf215546Sopenharmony_ci            delete_Instruction(prog, i);
1092bf215546Sopenharmony_ci            deleted = true;
1093bf215546Sopenharmony_ci         } else if (imm0.isInteger(0) || imm0.isInteger(1)) {
1094bf215546Sopenharmony_ci            // The high bits can't be set in this case (either mul by 0 or
1095bf215546Sopenharmony_ci            // unsigned by 1)
1096bf215546Sopenharmony_ci            i->op = OP_MOV;
1097bf215546Sopenharmony_ci            i->subOp = 0;
1098bf215546Sopenharmony_ci            i->setSrc(0, new_ImmediateValue(prog, 0u));
1099bf215546Sopenharmony_ci            i->src(0).mod = Modifier(0);
1100bf215546Sopenharmony_ci            i->setSrc(1, NULL);
1101bf215546Sopenharmony_ci         } else if (!imm0.isNegative() && imm0.isPow2()) {
1102bf215546Sopenharmony_ci            // Translate into a shift
1103bf215546Sopenharmony_ci            imm0.applyLog2();
1104bf215546Sopenharmony_ci            i->op = OP_SHR;
1105bf215546Sopenharmony_ci            i->subOp = 0;
1106bf215546Sopenharmony_ci            imm0.reg.data.u32 = 32 - imm0.reg.data.u32;
1107bf215546Sopenharmony_ci            i->setSrc(0, i->getSrc(t));
1108bf215546Sopenharmony_ci            i->src(0).mod = i->src(t).mod;
1109bf215546Sopenharmony_ci            i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32));
1110bf215546Sopenharmony_ci            i->src(1).mod = 0;
1111bf215546Sopenharmony_ci         }
1112bf215546Sopenharmony_ci      } else
1113bf215546Sopenharmony_ci      if (imm0.isInteger(0)) {
1114bf215546Sopenharmony_ci         i->dnz = 0;
1115bf215546Sopenharmony_ci         i->op = OP_MOV;
1116bf215546Sopenharmony_ci         i->setSrc(0, new_ImmediateValue(prog, 0u));
1117bf215546Sopenharmony_ci         i->src(0).mod = Modifier(0);
1118bf215546Sopenharmony_ci         i->postFactor = 0;
1119bf215546Sopenharmony_ci         i->setSrc(1, NULL);
1120bf215546Sopenharmony_ci      } else
1121bf215546Sopenharmony_ci      if (!i->postFactor && (imm0.isInteger(1) || imm0.isInteger(-1))) {
1122bf215546Sopenharmony_ci         if (imm0.isNegative())
1123bf215546Sopenharmony_ci            i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
1124bf215546Sopenharmony_ci         i->dnz = 0;
1125bf215546Sopenharmony_ci         i->op = i->src(t).mod.getOp();
1126bf215546Sopenharmony_ci         if (s == 0) {
1127bf215546Sopenharmony_ci            i->setSrc(0, i->getSrc(1));
1128bf215546Sopenharmony_ci            i->src(0).mod = i->src(1).mod;
1129bf215546Sopenharmony_ci            i->src(1).mod = 0;
1130bf215546Sopenharmony_ci         }
1131bf215546Sopenharmony_ci         if (i->op != OP_CVT)
1132bf215546Sopenharmony_ci            i->src(0).mod = 0;
1133bf215546Sopenharmony_ci         i->setSrc(1, NULL);
1134bf215546Sopenharmony_ci      } else
1135bf215546Sopenharmony_ci      if (!i->postFactor && (imm0.isInteger(2) || imm0.isInteger(-2))) {
1136bf215546Sopenharmony_ci         if (imm0.isNegative())
1137bf215546Sopenharmony_ci            i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
1138bf215546Sopenharmony_ci         i->op = OP_ADD;
1139bf215546Sopenharmony_ci         i->dnz = 0;
1140bf215546Sopenharmony_ci         i->setSrc(s, i->getSrc(t));
1141bf215546Sopenharmony_ci         i->src(s).mod = i->src(t).mod;
1142bf215546Sopenharmony_ci      } else
1143bf215546Sopenharmony_ci      if (!isFloatType(i->dType) && !i->src(t).mod) {
1144bf215546Sopenharmony_ci         bld.setPosition(i, false);
1145bf215546Sopenharmony_ci         int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
1146bf215546Sopenharmony_ci         if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) {
1147bf215546Sopenharmony_ci            delete_Instruction(prog, i);
1148bf215546Sopenharmony_ci            deleted = true;
1149bf215546Sopenharmony_ci         }
1150bf215546Sopenharmony_ci      } else
1151bf215546Sopenharmony_ci      if (i->postFactor && i->sType == TYPE_F32) {
1152bf215546Sopenharmony_ci         /* Can't emit a postfactor with an immediate, have to fold it in */
1153bf215546Sopenharmony_ci         i->setSrc(s, new_ImmediateValue(
1154bf215546Sopenharmony_ci                      prog, imm0.reg.data.f32 * exp2f(i->postFactor)));
1155bf215546Sopenharmony_ci         i->postFactor = 0;
1156bf215546Sopenharmony_ci      }
1157bf215546Sopenharmony_ci      break;
1158bf215546Sopenharmony_ci   case OP_FMA:
1159bf215546Sopenharmony_ci   case OP_MAD:
1160bf215546Sopenharmony_ci      if (imm0.isInteger(0)) {
1161bf215546Sopenharmony_ci         i->setSrc(0, i->getSrc(2));
1162bf215546Sopenharmony_ci         i->src(0).mod = i->src(2).mod;
1163bf215546Sopenharmony_ci         i->setSrc(1, NULL);
1164bf215546Sopenharmony_ci         i->setSrc(2, NULL);
1165bf215546Sopenharmony_ci         i->dnz = 0;
1166bf215546Sopenharmony_ci         i->op = i->src(0).mod.getOp();
1167bf215546Sopenharmony_ci         if (i->op != OP_CVT)
1168bf215546Sopenharmony_ci            i->src(0).mod = 0;
1169bf215546Sopenharmony_ci      } else
1170bf215546Sopenharmony_ci      if (i->subOp != NV50_IR_SUBOP_MUL_HIGH &&
1171bf215546Sopenharmony_ci          (imm0.isInteger(1) || imm0.isInteger(-1))) {
1172bf215546Sopenharmony_ci         if (imm0.isNegative())
1173bf215546Sopenharmony_ci            i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG);
1174bf215546Sopenharmony_ci         if (s == 0) {
1175bf215546Sopenharmony_ci            i->setSrc(0, i->getSrc(1));
1176bf215546Sopenharmony_ci            i->src(0).mod = i->src(1).mod;
1177bf215546Sopenharmony_ci         }
1178bf215546Sopenharmony_ci         i->setSrc(1, i->getSrc(2));
1179bf215546Sopenharmony_ci         i->src(1).mod = i->src(2).mod;
1180bf215546Sopenharmony_ci         i->setSrc(2, NULL);
1181bf215546Sopenharmony_ci         i->dnz = 0;
1182bf215546Sopenharmony_ci         i->op = OP_ADD;
1183bf215546Sopenharmony_ci      } else
1184bf215546Sopenharmony_ci      if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) {
1185bf215546Sopenharmony_ci         bld.setPosition(i, false);
1186bf215546Sopenharmony_ci         int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32;
1187bf215546Sopenharmony_ci         if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) {
1188bf215546Sopenharmony_ci            delete_Instruction(prog, i);
1189bf215546Sopenharmony_ci            deleted = true;
1190bf215546Sopenharmony_ci         }
1191bf215546Sopenharmony_ci      }
1192bf215546Sopenharmony_ci      break;
1193bf215546Sopenharmony_ci   case OP_SUB:
1194bf215546Sopenharmony_ci      if (imm0.isInteger(0) && s == 0 && typeSizeof(i->dType) == 8 &&
1195bf215546Sopenharmony_ci          !isFloatType(i->dType))
1196bf215546Sopenharmony_ci         break;
1197bf215546Sopenharmony_ci      FALLTHROUGH;
1198bf215546Sopenharmony_ci   case OP_ADD:
1199bf215546Sopenharmony_ci      if (i->usesFlags())
1200bf215546Sopenharmony_ci         break;
1201bf215546Sopenharmony_ci      if (imm0.isInteger(0)) {
1202bf215546Sopenharmony_ci         if (s == 0) {
1203bf215546Sopenharmony_ci            i->setSrc(0, i->getSrc(1));
1204bf215546Sopenharmony_ci            i->src(0).mod = i->src(1).mod;
1205bf215546Sopenharmony_ci            if (i->op == OP_SUB)
1206bf215546Sopenharmony_ci               i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
1207bf215546Sopenharmony_ci         }
1208bf215546Sopenharmony_ci         i->setSrc(1, NULL);
1209bf215546Sopenharmony_ci         i->op = i->src(0).mod.getOp();
1210bf215546Sopenharmony_ci         if (i->op != OP_CVT)
1211bf215546Sopenharmony_ci            i->src(0).mod = Modifier(0);
1212bf215546Sopenharmony_ci      }
1213bf215546Sopenharmony_ci      break;
1214bf215546Sopenharmony_ci
1215bf215546Sopenharmony_ci   case OP_DIV:
1216bf215546Sopenharmony_ci      if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32))
1217bf215546Sopenharmony_ci         break;
1218bf215546Sopenharmony_ci      bld.setPosition(i, false);
1219bf215546Sopenharmony_ci      if (imm0.reg.data.u32 == 0) {
1220bf215546Sopenharmony_ci         break;
1221bf215546Sopenharmony_ci      } else
1222bf215546Sopenharmony_ci      if (imm0.reg.data.u32 == 1) {
1223bf215546Sopenharmony_ci         i->op = OP_MOV;
1224bf215546Sopenharmony_ci         i->setSrc(1, NULL);
1225bf215546Sopenharmony_ci      } else
1226bf215546Sopenharmony_ci      if (i->dType == TYPE_U32 && imm0.isPow2()) {
1227bf215546Sopenharmony_ci         i->op = OP_SHR;
1228bf215546Sopenharmony_ci         i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32)));
1229bf215546Sopenharmony_ci      } else
1230bf215546Sopenharmony_ci      if (i->dType == TYPE_U32) {
1231bf215546Sopenharmony_ci         Instruction *mul;
1232bf215546Sopenharmony_ci         Value *tA, *tB;
1233bf215546Sopenharmony_ci         const uint32_t d = imm0.reg.data.u32;
1234bf215546Sopenharmony_ci         uint32_t m;
1235bf215546Sopenharmony_ci         int r, s;
1236bf215546Sopenharmony_ci         uint32_t l = util_logbase2(d);
1237bf215546Sopenharmony_ci         if (((uint32_t)1 << l) < d)
1238bf215546Sopenharmony_ci            ++l;
1239bf215546Sopenharmony_ci         m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1;
1240bf215546Sopenharmony_ci         r = l ? 1 : 0;
1241bf215546Sopenharmony_ci         s = l ? (l - 1) : 0;
1242bf215546Sopenharmony_ci
1243bf215546Sopenharmony_ci         tA = bld.getSSA();
1244bf215546Sopenharmony_ci         tB = bld.getSSA();
1245bf215546Sopenharmony_ci         mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0),
1246bf215546Sopenharmony_ci                         bld.loadImm(NULL, m));
1247bf215546Sopenharmony_ci         mul->subOp = NV50_IR_SUBOP_MUL_HIGH;
1248bf215546Sopenharmony_ci         bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA);
1249bf215546Sopenharmony_ci         tA = bld.getSSA();
1250bf215546Sopenharmony_ci         if (r)
1251bf215546Sopenharmony_ci            bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r));
1252bf215546Sopenharmony_ci         else
1253bf215546Sopenharmony_ci            tA = tB;
1254bf215546Sopenharmony_ci         tB = s ? bld.getSSA() : i->getDef(0);
1255bf215546Sopenharmony_ci         newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA);
1256bf215546Sopenharmony_ci         if (s)
1257bf215546Sopenharmony_ci            bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s));
1258bf215546Sopenharmony_ci
1259bf215546Sopenharmony_ci         delete_Instruction(prog, i);
1260bf215546Sopenharmony_ci         deleted = true;
1261bf215546Sopenharmony_ci      } else
1262bf215546Sopenharmony_ci      if (imm0.reg.data.s32 == -1) {
1263bf215546Sopenharmony_ci         i->op = OP_NEG;
1264bf215546Sopenharmony_ci         i->setSrc(1, NULL);
1265bf215546Sopenharmony_ci      } else {
1266bf215546Sopenharmony_ci         LValue *tA, *tB;
1267bf215546Sopenharmony_ci         LValue *tD;
1268bf215546Sopenharmony_ci         const int32_t d = imm0.reg.data.s32;
1269bf215546Sopenharmony_ci         int32_t m;
1270bf215546Sopenharmony_ci         int32_t l = util_logbase2(static_cast<unsigned>(abs(d)));
1271bf215546Sopenharmony_ci         if ((1 << l) < abs(d))
1272bf215546Sopenharmony_ci            ++l;
1273bf215546Sopenharmony_ci         if (!l)
1274bf215546Sopenharmony_ci            l = 1;
1275bf215546Sopenharmony_ci         m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32);
1276bf215546Sopenharmony_ci
1277bf215546Sopenharmony_ci         tA = bld.getSSA();
1278bf215546Sopenharmony_ci         tB = bld.getSSA();
1279bf215546Sopenharmony_ci         bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m),
1280bf215546Sopenharmony_ci                   i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH;
1281bf215546Sopenharmony_ci         if (l > 1)
1282bf215546Sopenharmony_ci            bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1));
1283bf215546Sopenharmony_ci         else
1284bf215546Sopenharmony_ci            tB = tA;
1285bf215546Sopenharmony_ci         tA = bld.getSSA();
1286bf215546Sopenharmony_ci         bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0));
1287bf215546Sopenharmony_ci         tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue();
1288bf215546Sopenharmony_ci         newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA);
1289bf215546Sopenharmony_ci         if (d < 0)
1290bf215546Sopenharmony_ci            bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB);
1291bf215546Sopenharmony_ci
1292bf215546Sopenharmony_ci         delete_Instruction(prog, i);
1293bf215546Sopenharmony_ci         deleted = true;
1294bf215546Sopenharmony_ci      }
1295bf215546Sopenharmony_ci      break;
1296bf215546Sopenharmony_ci
1297bf215546Sopenharmony_ci   case OP_MOD:
1298bf215546Sopenharmony_ci      if (s == 1 && imm0.isPow2()) {
1299bf215546Sopenharmony_ci         bld.setPosition(i, false);
1300bf215546Sopenharmony_ci         if (i->sType == TYPE_U32) {
1301bf215546Sopenharmony_ci            i->op = OP_AND;
1302bf215546Sopenharmony_ci            i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1));
1303bf215546Sopenharmony_ci         } else if (i->sType == TYPE_S32) {
1304bf215546Sopenharmony_ci            // Do it on the absolute value of the input, and then restore the
1305bf215546Sopenharmony_ci            // sign. The only odd case is MIN_INT, but that should work out
1306bf215546Sopenharmony_ci            // as well, since MIN_INT mod any power of 2 is 0.
1307bf215546Sopenharmony_ci            //
1308bf215546Sopenharmony_ci            // Technically we don't have to do any of this since MOD is
1309bf215546Sopenharmony_ci            // undefined with negative arguments in GLSL, but this seems like
1310bf215546Sopenharmony_ci            // the nice thing to do.
1311bf215546Sopenharmony_ci            Value *abs = bld.mkOp1v(OP_ABS, TYPE_S32, bld.getSSA(), i->getSrc(0));
1312bf215546Sopenharmony_ci            Value *neg, *v1, *v2;
1313bf215546Sopenharmony_ci            bld.mkCmp(OP_SET, CC_LT, TYPE_S32,
1314bf215546Sopenharmony_ci                      (neg = bld.getSSA(1, prog->getTarget()->nativeFile(FILE_PREDICATE))),
1315bf215546Sopenharmony_ci                      TYPE_S32, i->getSrc(0), bld.loadImm(NULL, 0));
1316bf215546Sopenharmony_ci            Value *mod = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), abs,
1317bf215546Sopenharmony_ci                                    bld.loadImm(NULL, imm0.reg.data.u32 - 1));
1318bf215546Sopenharmony_ci            bld.mkOp1(OP_NEG, TYPE_S32, (v1 = bld.getSSA()), mod)
1319bf215546Sopenharmony_ci               ->setPredicate(CC_P, neg);
1320bf215546Sopenharmony_ci            bld.mkOp1(OP_MOV, TYPE_S32, (v2 = bld.getSSA()), mod)
1321bf215546Sopenharmony_ci               ->setPredicate(CC_NOT_P, neg);
1322bf215546Sopenharmony_ci            newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2);
1323bf215546Sopenharmony_ci
1324bf215546Sopenharmony_ci            delete_Instruction(prog, i);
1325bf215546Sopenharmony_ci            deleted = true;
1326bf215546Sopenharmony_ci         }
1327bf215546Sopenharmony_ci      } else if (s == 1) {
1328bf215546Sopenharmony_ci         // In this case, we still want the optimized lowering that we get
1329bf215546Sopenharmony_ci         // from having division by an immediate.
1330bf215546Sopenharmony_ci         //
1331bf215546Sopenharmony_ci         // a % b == a - (a/b) * b
1332bf215546Sopenharmony_ci         bld.setPosition(i, false);
1333bf215546Sopenharmony_ci         Value *div = bld.mkOp2v(OP_DIV, i->sType, bld.getSSA(),
1334bf215546Sopenharmony_ci                                 i->getSrc(0), i->getSrc(1));
1335bf215546Sopenharmony_ci         newi = bld.mkOp2(OP_ADD, i->sType, i->getDef(0), i->getSrc(0),
1336bf215546Sopenharmony_ci                          bld.mkOp2v(OP_MUL, i->sType, bld.getSSA(), div, i->getSrc(1)));
1337bf215546Sopenharmony_ci         // TODO: Check that target supports this. In this case, we know that
1338bf215546Sopenharmony_ci         // all backends do.
1339bf215546Sopenharmony_ci         newi->src(1).mod = Modifier(NV50_IR_MOD_NEG);
1340bf215546Sopenharmony_ci
1341bf215546Sopenharmony_ci         delete_Instruction(prog, i);
1342bf215546Sopenharmony_ci         deleted = true;
1343bf215546Sopenharmony_ci      }
1344bf215546Sopenharmony_ci      break;
1345bf215546Sopenharmony_ci
1346bf215546Sopenharmony_ci   case OP_SET: // TODO: SET_AND,OR,XOR
1347bf215546Sopenharmony_ci   {
1348bf215546Sopenharmony_ci      /* This optimizes the case where the output of a set is being compared
1349bf215546Sopenharmony_ci       * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we
1350bf215546Sopenharmony_ci       * can be a lot cleverer in our comparison.
1351bf215546Sopenharmony_ci       */
1352bf215546Sopenharmony_ci      CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t));
1353bf215546Sopenharmony_ci      CondCode cc, ccZ;
1354bf215546Sopenharmony_ci      if (imm0.reg.data.u32 != 0 || !si)
1355bf215546Sopenharmony_ci         return false;
1356bf215546Sopenharmony_ci      cc = si->setCond;
1357bf215546Sopenharmony_ci      ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U);
1358bf215546Sopenharmony_ci      // We do everything assuming var (cmp) 0, reverse the condition if 0 is
1359bf215546Sopenharmony_ci      // first.
1360bf215546Sopenharmony_ci      if (s == 0)
1361bf215546Sopenharmony_ci         ccZ = reverseCondCode(ccZ);
1362bf215546Sopenharmony_ci      // If there is a negative modifier, we need to undo that, by flipping
1363bf215546Sopenharmony_ci      // the comparison to zero.
1364bf215546Sopenharmony_ci      if (i->src(t).mod.neg())
1365bf215546Sopenharmony_ci         ccZ = reverseCondCode(ccZ);
1366bf215546Sopenharmony_ci      // If this is a signed comparison, we expect the input to be a regular
1367bf215546Sopenharmony_ci      // boolean, i.e. 0/-1. However the rest of the logic assumes that true
1368bf215546Sopenharmony_ci      // is positive, so just flip the sign.
1369bf215546Sopenharmony_ci      if (i->sType == TYPE_S32) {
1370bf215546Sopenharmony_ci         assert(!isFloatType(si->dType));
1371bf215546Sopenharmony_ci         ccZ = reverseCondCode(ccZ);
1372bf215546Sopenharmony_ci      }
1373bf215546Sopenharmony_ci      switch (ccZ) {
1374bf215546Sopenharmony_ci      case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true
1375bf215546Sopenharmony_ci      case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true
1376bf215546Sopenharmony_ci      case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool
1377bf215546Sopenharmony_ci      case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool
1378bf215546Sopenharmony_ci      case CC_GT: break; // bool > 0 -- bool
1379bf215546Sopenharmony_ci      case CC_NE: break; // bool != 0 -- bool
1380bf215546Sopenharmony_ci      default:
1381bf215546Sopenharmony_ci         return false;
1382bf215546Sopenharmony_ci      }
1383bf215546Sopenharmony_ci
1384bf215546Sopenharmony_ci      // Update the condition of this SET to be identical to the origin set,
1385bf215546Sopenharmony_ci      // but with the updated condition code. The original SET should get
1386bf215546Sopenharmony_ci      // DCE'd, ideally.
1387bf215546Sopenharmony_ci      i->op = si->op;
1388bf215546Sopenharmony_ci      i->asCmp()->setCond = cc;
1389bf215546Sopenharmony_ci      i->setSrc(0, si->src(0));
1390bf215546Sopenharmony_ci      i->setSrc(1, si->src(1));
1391bf215546Sopenharmony_ci      if (si->srcExists(2))
1392bf215546Sopenharmony_ci         i->setSrc(2, si->src(2));
1393bf215546Sopenharmony_ci      i->sType = si->sType;
1394bf215546Sopenharmony_ci   }
1395bf215546Sopenharmony_ci      break;
1396bf215546Sopenharmony_ci
1397bf215546Sopenharmony_ci   case OP_AND:
1398bf215546Sopenharmony_ci   {
1399bf215546Sopenharmony_ci      Instruction *src = i->getSrc(t)->getInsn();
1400bf215546Sopenharmony_ci      ImmediateValue imm1;
1401bf215546Sopenharmony_ci      if (imm0.reg.data.u32 == 0) {
1402bf215546Sopenharmony_ci         i->op = OP_MOV;
1403bf215546Sopenharmony_ci         i->setSrc(0, new_ImmediateValue(prog, 0u));
1404bf215546Sopenharmony_ci         i->src(0).mod = Modifier(0);
1405bf215546Sopenharmony_ci         i->setSrc(1, NULL);
1406bf215546Sopenharmony_ci      } else if (imm0.reg.data.u32 == ~0U) {
1407bf215546Sopenharmony_ci         i->op = i->src(t).mod.getOp();
1408bf215546Sopenharmony_ci         if (t) {
1409bf215546Sopenharmony_ci            i->setSrc(0, i->getSrc(t));
1410bf215546Sopenharmony_ci            i->src(0).mod = i->src(t).mod;
1411bf215546Sopenharmony_ci         }
1412bf215546Sopenharmony_ci         i->setSrc(1, NULL);
1413bf215546Sopenharmony_ci      } else if (src->asCmp()) {
1414bf215546Sopenharmony_ci         CmpInstruction *cmp = src->asCmp();
1415bf215546Sopenharmony_ci         if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1)
1416bf215546Sopenharmony_ci            return false;
1417bf215546Sopenharmony_ci         if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32))
1418bf215546Sopenharmony_ci            return false;
1419bf215546Sopenharmony_ci         if (imm0.reg.data.f32 != 1.0)
1420bf215546Sopenharmony_ci            return false;
1421bf215546Sopenharmony_ci         if (cmp->dType != TYPE_U32)
1422bf215546Sopenharmony_ci            return false;
1423bf215546Sopenharmony_ci
1424bf215546Sopenharmony_ci         cmp->dType = TYPE_F32;
1425bf215546Sopenharmony_ci         if (i->src(t).mod != Modifier(0)) {
1426bf215546Sopenharmony_ci            assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT));
1427bf215546Sopenharmony_ci            i->src(t).mod = Modifier(0);
1428bf215546Sopenharmony_ci            cmp->setCond = inverseCondCode(cmp->setCond);
1429bf215546Sopenharmony_ci         }
1430bf215546Sopenharmony_ci         i->op = OP_MOV;
1431bf215546Sopenharmony_ci         i->setSrc(s, NULL);
1432bf215546Sopenharmony_ci         if (t) {
1433bf215546Sopenharmony_ci            i->setSrc(0, i->getSrc(t));
1434bf215546Sopenharmony_ci            i->setSrc(t, NULL);
1435bf215546Sopenharmony_ci         }
1436bf215546Sopenharmony_ci      } else if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32) &&
1437bf215546Sopenharmony_ci                 src->op == OP_SHR &&
1438bf215546Sopenharmony_ci                 src->src(1).getImmediate(imm1) &&
1439bf215546Sopenharmony_ci                 i->src(t).mod == Modifier(0) &&
1440bf215546Sopenharmony_ci                 util_is_power_of_two_or_zero(imm0.reg.data.u32 + 1)) {
1441bf215546Sopenharmony_ci         // low byte = offset, high byte = width
1442bf215546Sopenharmony_ci         uint32_t ext = (util_last_bit(imm0.reg.data.u32) << 8) | imm1.reg.data.u32;
1443bf215546Sopenharmony_ci         i->op = OP_EXTBF;
1444bf215546Sopenharmony_ci         i->setSrc(0, src->getSrc(0));
1445bf215546Sopenharmony_ci         i->setSrc(1, new_ImmediateValue(prog, ext));
1446bf215546Sopenharmony_ci      } else if (src->op == OP_SHL &&
1447bf215546Sopenharmony_ci                 src->src(1).getImmediate(imm1) &&
1448bf215546Sopenharmony_ci                 i->src(t).mod == Modifier(0) &&
1449bf215546Sopenharmony_ci                 util_is_power_of_two_or_zero(~imm0.reg.data.u32 + 1) &&
1450bf215546Sopenharmony_ci                 util_last_bit(~imm0.reg.data.u32) <= imm1.reg.data.u32) {
1451bf215546Sopenharmony_ci         i->op = OP_MOV;
1452bf215546Sopenharmony_ci         i->setSrc(s, NULL);
1453bf215546Sopenharmony_ci         if (t) {
1454bf215546Sopenharmony_ci            i->setSrc(0, i->getSrc(t));
1455bf215546Sopenharmony_ci            i->setSrc(t, NULL);
1456bf215546Sopenharmony_ci         }
1457bf215546Sopenharmony_ci      }
1458bf215546Sopenharmony_ci   }
1459bf215546Sopenharmony_ci      break;
1460bf215546Sopenharmony_ci
1461bf215546Sopenharmony_ci   case OP_SHL:
1462bf215546Sopenharmony_ci   {
1463bf215546Sopenharmony_ci      if (s != 1 || i->src(0).mod != Modifier(0))
1464bf215546Sopenharmony_ci         break;
1465bf215546Sopenharmony_ci
1466bf215546Sopenharmony_ci      if (imm0.reg.data.u32 == 0) {
1467bf215546Sopenharmony_ci         i->op = OP_MOV;
1468bf215546Sopenharmony_ci         i->setSrc(1, NULL);
1469bf215546Sopenharmony_ci         break;
1470bf215546Sopenharmony_ci      }
1471bf215546Sopenharmony_ci      // try to concatenate shifts
1472bf215546Sopenharmony_ci      Instruction *si = i->getSrc(0)->getInsn();
1473bf215546Sopenharmony_ci      if (!si)
1474bf215546Sopenharmony_ci         break;
1475bf215546Sopenharmony_ci      ImmediateValue imm1;
1476bf215546Sopenharmony_ci      switch (si->op) {
1477bf215546Sopenharmony_ci      case OP_SHL:
1478bf215546Sopenharmony_ci         if (si->src(1).getImmediate(imm1)) {
1479bf215546Sopenharmony_ci            bld.setPosition(i, false);
1480bf215546Sopenharmony_ci            i->setSrc(0, si->getSrc(0));
1481bf215546Sopenharmony_ci            i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32));
1482bf215546Sopenharmony_ci         }
1483bf215546Sopenharmony_ci         break;
1484bf215546Sopenharmony_ci      case OP_SHR:
1485bf215546Sopenharmony_ci         if (si->src(1).getImmediate(imm1) && imm0.reg.data.u32 == imm1.reg.data.u32) {
1486bf215546Sopenharmony_ci            bld.setPosition(i, false);
1487bf215546Sopenharmony_ci            i->op = OP_AND;
1488bf215546Sopenharmony_ci            i->setSrc(0, si->getSrc(0));
1489bf215546Sopenharmony_ci            i->setSrc(1, bld.loadImm(NULL, ~((1 << imm0.reg.data.u32) - 1)));
1490bf215546Sopenharmony_ci         }
1491bf215546Sopenharmony_ci         break;
1492bf215546Sopenharmony_ci      case OP_MUL:
1493bf215546Sopenharmony_ci         int muls;
1494bf215546Sopenharmony_ci         if (isFloatType(si->dType))
1495bf215546Sopenharmony_ci            return false;
1496bf215546Sopenharmony_ci         if (si->subOp)
1497bf215546Sopenharmony_ci            return false;
1498bf215546Sopenharmony_ci         if (si->src(1).getImmediate(imm1))
1499bf215546Sopenharmony_ci            muls = 1;
1500bf215546Sopenharmony_ci         else if (si->src(0).getImmediate(imm1))
1501bf215546Sopenharmony_ci            muls = 0;
1502bf215546Sopenharmony_ci         else
1503bf215546Sopenharmony_ci            return false;
1504bf215546Sopenharmony_ci
1505bf215546Sopenharmony_ci         bld.setPosition(i, false);
1506bf215546Sopenharmony_ci         i->op = OP_MUL;
1507bf215546Sopenharmony_ci         i->subOp = 0;
1508bf215546Sopenharmony_ci         i->dType = si->dType;
1509bf215546Sopenharmony_ci         i->sType = si->sType;
1510bf215546Sopenharmony_ci         i->setSrc(0, si->getSrc(!muls));
1511bf215546Sopenharmony_ci         i->setSrc(1, bld.loadImm(NULL, imm1.reg.data.u32 << imm0.reg.data.u32));
1512bf215546Sopenharmony_ci         break;
1513bf215546Sopenharmony_ci      case OP_SUB:
1514bf215546Sopenharmony_ci      case OP_ADD:
1515bf215546Sopenharmony_ci         int adds;
1516bf215546Sopenharmony_ci         if (isFloatType(si->dType))
1517bf215546Sopenharmony_ci            return false;
1518bf215546Sopenharmony_ci         if (si->op != OP_SUB && si->src(0).getImmediate(imm1))
1519bf215546Sopenharmony_ci            adds = 0;
1520bf215546Sopenharmony_ci         else if (si->src(1).getImmediate(imm1))
1521bf215546Sopenharmony_ci            adds = 1;
1522bf215546Sopenharmony_ci         else
1523bf215546Sopenharmony_ci            return false;
1524bf215546Sopenharmony_ci         if (si->src(!adds).mod != Modifier(0))
1525bf215546Sopenharmony_ci            return false;
1526bf215546Sopenharmony_ci         // SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z))
1527bf215546Sopenharmony_ci
1528bf215546Sopenharmony_ci         // This is more operations, but if one of x, y is an immediate, then
1529bf215546Sopenharmony_ci         // we can get a situation where (a) we can use ISCADD, or (b)
1530bf215546Sopenharmony_ci         // propagate the add bit into an indirect load.
1531bf215546Sopenharmony_ci         bld.setPosition(i, false);
1532bf215546Sopenharmony_ci         i->op = si->op;
1533bf215546Sopenharmony_ci         i->setSrc(adds, bld.loadImm(NULL, imm1.reg.data.u32 << imm0.reg.data.u32));
1534bf215546Sopenharmony_ci         i->setSrc(!adds, bld.mkOp2v(OP_SHL, i->dType,
1535bf215546Sopenharmony_ci                                     bld.getSSA(i->def(0).getSize(), i->def(0).getFile()),
1536bf215546Sopenharmony_ci                                     si->getSrc(!adds),
1537bf215546Sopenharmony_ci                                     bld.mkImm(imm0.reg.data.u32)));
1538bf215546Sopenharmony_ci         break;
1539bf215546Sopenharmony_ci      default:
1540bf215546Sopenharmony_ci         return false;
1541bf215546Sopenharmony_ci      }
1542bf215546Sopenharmony_ci   }
1543bf215546Sopenharmony_ci      break;
1544bf215546Sopenharmony_ci
1545bf215546Sopenharmony_ci   case OP_ABS:
1546bf215546Sopenharmony_ci   case OP_NEG:
1547bf215546Sopenharmony_ci   case OP_SAT:
1548bf215546Sopenharmony_ci   case OP_LG2:
1549bf215546Sopenharmony_ci   case OP_RCP:
1550bf215546Sopenharmony_ci   case OP_SQRT:
1551bf215546Sopenharmony_ci   case OP_RSQ:
1552bf215546Sopenharmony_ci   case OP_PRESIN:
1553bf215546Sopenharmony_ci   case OP_SIN:
1554bf215546Sopenharmony_ci   case OP_COS:
1555bf215546Sopenharmony_ci   case OP_PREEX2:
1556bf215546Sopenharmony_ci   case OP_EX2:
1557bf215546Sopenharmony_ci      unary(i, imm0);
1558bf215546Sopenharmony_ci      break;
1559bf215546Sopenharmony_ci   case OP_BFIND: {
1560bf215546Sopenharmony_ci      int32_t res;
1561bf215546Sopenharmony_ci      switch (i->dType) {
1562bf215546Sopenharmony_ci      case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break;
1563bf215546Sopenharmony_ci      case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break;
1564bf215546Sopenharmony_ci      default:
1565bf215546Sopenharmony_ci         return false;
1566bf215546Sopenharmony_ci      }
1567bf215546Sopenharmony_ci      if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0)
1568bf215546Sopenharmony_ci         res = 31 - res;
1569bf215546Sopenharmony_ci      bld.setPosition(i, false); /* make sure bld is init'ed */
1570bf215546Sopenharmony_ci      i->setSrc(0, bld.mkImm(res));
1571bf215546Sopenharmony_ci      i->setSrc(1, NULL);
1572bf215546Sopenharmony_ci      i->op = OP_MOV;
1573bf215546Sopenharmony_ci      i->subOp = 0;
1574bf215546Sopenharmony_ci      break;
1575bf215546Sopenharmony_ci   }
1576bf215546Sopenharmony_ci   case OP_BREV: {
1577bf215546Sopenharmony_ci      uint32_t res = util_bitreverse(imm0.reg.data.u32);
1578bf215546Sopenharmony_ci      i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
1579bf215546Sopenharmony_ci      i->op = OP_MOV;
1580bf215546Sopenharmony_ci      break;
1581bf215546Sopenharmony_ci   }
1582bf215546Sopenharmony_ci   case OP_POPCNT: {
1583bf215546Sopenharmony_ci      // Only deal with 1-arg POPCNT here
1584bf215546Sopenharmony_ci      if (i->srcExists(1))
1585bf215546Sopenharmony_ci         break;
1586bf215546Sopenharmony_ci      uint32_t res = util_bitcount(imm0.reg.data.u32);
1587bf215546Sopenharmony_ci      i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
1588bf215546Sopenharmony_ci      i->setSrc(1, NULL);
1589bf215546Sopenharmony_ci      i->op = OP_MOV;
1590bf215546Sopenharmony_ci      break;
1591bf215546Sopenharmony_ci   }
1592bf215546Sopenharmony_ci   case OP_CVT: {
1593bf215546Sopenharmony_ci      Storage res;
1594bf215546Sopenharmony_ci
1595bf215546Sopenharmony_ci      // TODO: handle 64-bit values properly
1596bf215546Sopenharmony_ci      if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8)
1597bf215546Sopenharmony_ci         return false;
1598bf215546Sopenharmony_ci
1599bf215546Sopenharmony_ci      // TODO: handle single byte/word extractions
1600bf215546Sopenharmony_ci      if (i->subOp)
1601bf215546Sopenharmony_ci         return false;
1602bf215546Sopenharmony_ci
1603bf215546Sopenharmony_ci      bld.setPosition(i, true); /* make sure bld is init'ed */
1604bf215546Sopenharmony_ci
1605bf215546Sopenharmony_ci#define CASE(type, dst, fmin, fmax, imin, imax, umin, umax) \
1606bf215546Sopenharmony_ci   case type: \
1607bf215546Sopenharmony_ci      switch (i->sType) { \
1608bf215546Sopenharmony_ci      case TYPE_F64: \
1609bf215546Sopenharmony_ci         res.data.dst = util_iround(i->saturate ? \
1610bf215546Sopenharmony_ci                                    CLAMP(imm0.reg.data.f64, fmin, fmax) : \
1611bf215546Sopenharmony_ci                                    imm0.reg.data.f64); \
1612bf215546Sopenharmony_ci         break; \
1613bf215546Sopenharmony_ci      case TYPE_F32: \
1614bf215546Sopenharmony_ci         res.data.dst = util_iround(i->saturate ? \
1615bf215546Sopenharmony_ci                                    CLAMP(imm0.reg.data.f32, fmin, fmax) : \
1616bf215546Sopenharmony_ci                                    imm0.reg.data.f32); \
1617bf215546Sopenharmony_ci         break; \
1618bf215546Sopenharmony_ci      case TYPE_S32: \
1619bf215546Sopenharmony_ci         res.data.dst = i->saturate ? \
1620bf215546Sopenharmony_ci                        CLAMP(imm0.reg.data.s32, imin, imax) : \
1621bf215546Sopenharmony_ci                        imm0.reg.data.s32; \
1622bf215546Sopenharmony_ci         break; \
1623bf215546Sopenharmony_ci      case TYPE_U32: \
1624bf215546Sopenharmony_ci         res.data.dst = i->saturate ? \
1625bf215546Sopenharmony_ci                        CLAMP(imm0.reg.data.u32, umin, umax) : \
1626bf215546Sopenharmony_ci                        imm0.reg.data.u32; \
1627bf215546Sopenharmony_ci         break; \
1628bf215546Sopenharmony_ci      case TYPE_S16: \
1629bf215546Sopenharmony_ci         res.data.dst = i->saturate ? \
1630bf215546Sopenharmony_ci                        CLAMP(imm0.reg.data.s16, imin, imax) : \
1631bf215546Sopenharmony_ci                        imm0.reg.data.s16; \
1632bf215546Sopenharmony_ci         break; \
1633bf215546Sopenharmony_ci      case TYPE_U16: \
1634bf215546Sopenharmony_ci         res.data.dst = i->saturate ? \
1635bf215546Sopenharmony_ci                        CLAMP(imm0.reg.data.u16, umin, umax) : \
1636bf215546Sopenharmony_ci                        imm0.reg.data.u16; \
1637bf215546Sopenharmony_ci         break; \
1638bf215546Sopenharmony_ci      default: return false; \
1639bf215546Sopenharmony_ci      } \
1640bf215546Sopenharmony_ci      i->setSrc(0, bld.mkImm(res.data.dst)); \
1641bf215546Sopenharmony_ci      break
1642bf215546Sopenharmony_ci
1643bf215546Sopenharmony_ci      switch(i->dType) {
1644bf215546Sopenharmony_ci      CASE(TYPE_U16, u16, 0, UINT16_MAX, 0, UINT16_MAX, 0, UINT16_MAX);
1645bf215546Sopenharmony_ci      CASE(TYPE_S16, s16, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, 0, INT16_MAX);
1646bf215546Sopenharmony_ci      CASE(TYPE_U32, u32, 0, UINT32_MAX, 0, INT32_MAX, 0, UINT32_MAX);
1647bf215546Sopenharmony_ci      CASE(TYPE_S32, s32, INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX, 0, INT32_MAX);
1648bf215546Sopenharmony_ci      case TYPE_F32:
1649bf215546Sopenharmony_ci         switch (i->sType) {
1650bf215546Sopenharmony_ci         case TYPE_F64:
1651bf215546Sopenharmony_ci            res.data.f32 = i->saturate ?
1652bf215546Sopenharmony_ci               SATURATE(imm0.reg.data.f64) :
1653bf215546Sopenharmony_ci               imm0.reg.data.f64;
1654bf215546Sopenharmony_ci            break;
1655bf215546Sopenharmony_ci         case TYPE_F32:
1656bf215546Sopenharmony_ci            res.data.f32 = i->saturate ?
1657bf215546Sopenharmony_ci               SATURATE(imm0.reg.data.f32) :
1658bf215546Sopenharmony_ci               imm0.reg.data.f32;
1659bf215546Sopenharmony_ci            break;
1660bf215546Sopenharmony_ci         case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break;
1661bf215546Sopenharmony_ci         case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break;
1662bf215546Sopenharmony_ci         case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break;
1663bf215546Sopenharmony_ci         case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break;
1664bf215546Sopenharmony_ci         default:
1665bf215546Sopenharmony_ci            return false;
1666bf215546Sopenharmony_ci         }
1667bf215546Sopenharmony_ci         i->setSrc(0, bld.mkImm(res.data.f32));
1668bf215546Sopenharmony_ci         break;
1669bf215546Sopenharmony_ci      case TYPE_F64:
1670bf215546Sopenharmony_ci         switch (i->sType) {
1671bf215546Sopenharmony_ci         case TYPE_F64:
1672bf215546Sopenharmony_ci            res.data.f64 = i->saturate ?
1673bf215546Sopenharmony_ci               SATURATE(imm0.reg.data.f64) :
1674bf215546Sopenharmony_ci               imm0.reg.data.f64;
1675bf215546Sopenharmony_ci            break;
1676bf215546Sopenharmony_ci         case TYPE_F32:
1677bf215546Sopenharmony_ci            res.data.f64 = i->saturate ?
1678bf215546Sopenharmony_ci               SATURATE(imm0.reg.data.f32) :
1679bf215546Sopenharmony_ci               imm0.reg.data.f32;
1680bf215546Sopenharmony_ci            break;
1681bf215546Sopenharmony_ci         case TYPE_U16: res.data.f64 = (double) imm0.reg.data.u16; break;
1682bf215546Sopenharmony_ci         case TYPE_U32: res.data.f64 = (double) imm0.reg.data.u32; break;
1683bf215546Sopenharmony_ci         case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break;
1684bf215546Sopenharmony_ci         case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break;
1685bf215546Sopenharmony_ci         default:
1686bf215546Sopenharmony_ci            return false;
1687bf215546Sopenharmony_ci         }
1688bf215546Sopenharmony_ci         i->setSrc(0, bld.mkImm(res.data.f64));
1689bf215546Sopenharmony_ci         break;
1690bf215546Sopenharmony_ci      default:
1691bf215546Sopenharmony_ci         return false;
1692bf215546Sopenharmony_ci      }
1693bf215546Sopenharmony_ci#undef CASE
1694bf215546Sopenharmony_ci
1695bf215546Sopenharmony_ci      i->setType(i->dType); /* Remove i->sType, which we don't need anymore */
1696bf215546Sopenharmony_ci      i->op = OP_MOV;
1697bf215546Sopenharmony_ci      i->saturate = 0;
1698bf215546Sopenharmony_ci      i->src(0).mod = Modifier(0); /* Clear the already applied modifier */
1699bf215546Sopenharmony_ci      break;
1700bf215546Sopenharmony_ci   }
1701bf215546Sopenharmony_ci   default:
1702bf215546Sopenharmony_ci      return false;
1703bf215546Sopenharmony_ci   }
1704bf215546Sopenharmony_ci
1705bf215546Sopenharmony_ci   // This can get left behind some of the optimizations which simplify
1706bf215546Sopenharmony_ci   // saturatable values.
1707bf215546Sopenharmony_ci   if (newi->op == OP_MOV && newi->saturate) {
1708bf215546Sopenharmony_ci      ImmediateValue tmp;
1709bf215546Sopenharmony_ci      newi->saturate = 0;
1710bf215546Sopenharmony_ci      newi->op = OP_SAT;
1711bf215546Sopenharmony_ci      if (newi->src(0).getImmediate(tmp))
1712bf215546Sopenharmony_ci         unary(newi, tmp);
1713bf215546Sopenharmony_ci   }
1714bf215546Sopenharmony_ci
1715bf215546Sopenharmony_ci   if (newi->op != op)
1716bf215546Sopenharmony_ci      foldCount++;
1717bf215546Sopenharmony_ci   return deleted;
1718bf215546Sopenharmony_ci}
1719bf215546Sopenharmony_ci
1720bf215546Sopenharmony_ci// =============================================================================
1721bf215546Sopenharmony_ci
1722bf215546Sopenharmony_ci// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed.
1723bf215546Sopenharmony_ciclass ModifierFolding : public Pass
1724bf215546Sopenharmony_ci{
1725bf215546Sopenharmony_ciprivate:
1726bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
1727bf215546Sopenharmony_ci};
1728bf215546Sopenharmony_ci
1729bf215546Sopenharmony_cibool
1730bf215546Sopenharmony_ciModifierFolding::visit(BasicBlock *bb)
1731bf215546Sopenharmony_ci{
1732bf215546Sopenharmony_ci   const Target *target = prog->getTarget();
1733bf215546Sopenharmony_ci
1734bf215546Sopenharmony_ci   Instruction *i, *next, *mi;
1735bf215546Sopenharmony_ci   Modifier mod;
1736bf215546Sopenharmony_ci
1737bf215546Sopenharmony_ci   for (i = bb->getEntry(); i; i = next) {
1738bf215546Sopenharmony_ci      next = i->next;
1739bf215546Sopenharmony_ci
1740bf215546Sopenharmony_ci      if (false && i->op == OP_SUB) {
1741bf215546Sopenharmony_ci         // turn "sub" into "add neg" (do we really want this ?)
1742bf215546Sopenharmony_ci         i->op = OP_ADD;
1743bf215546Sopenharmony_ci         i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG);
1744bf215546Sopenharmony_ci      }
1745bf215546Sopenharmony_ci
1746bf215546Sopenharmony_ci      for (int s = 0; s < 3 && i->srcExists(s); ++s) {
1747bf215546Sopenharmony_ci         mi = i->getSrc(s)->getInsn();
1748bf215546Sopenharmony_ci         if (!mi ||
1749bf215546Sopenharmony_ci             mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8)
1750bf215546Sopenharmony_ci            continue;
1751bf215546Sopenharmony_ci         if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) {
1752bf215546Sopenharmony_ci            if ((i->op != OP_ADD &&
1753bf215546Sopenharmony_ci                 i->op != OP_MUL) ||
1754bf215546Sopenharmony_ci                (mi->op != OP_ABS &&
1755bf215546Sopenharmony_ci                 mi->op != OP_NEG))
1756bf215546Sopenharmony_ci               continue;
1757bf215546Sopenharmony_ci         } else
1758bf215546Sopenharmony_ci         if (i->sType != mi->dType) {
1759bf215546Sopenharmony_ci            continue;
1760bf215546Sopenharmony_ci         }
1761bf215546Sopenharmony_ci         if ((mod = Modifier(mi->op)) == Modifier(0))
1762bf215546Sopenharmony_ci            continue;
1763bf215546Sopenharmony_ci         mod *= mi->src(0).mod;
1764bf215546Sopenharmony_ci
1765bf215546Sopenharmony_ci         if ((i->op == OP_ABS) || i->src(s).mod.abs()) {
1766bf215546Sopenharmony_ci            // abs neg [abs] = abs
1767bf215546Sopenharmony_ci            mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS));
1768bf215546Sopenharmony_ci         } else
1769bf215546Sopenharmony_ci         if ((i->op == OP_NEG) && mod.neg()) {
1770bf215546Sopenharmony_ci            assert(s == 0);
1771bf215546Sopenharmony_ci            // neg as both opcode and modifier on same insn is prohibited
1772bf215546Sopenharmony_ci            // neg neg abs = abs, neg neg = identity
1773bf215546Sopenharmony_ci            mod = mod & Modifier(~NV50_IR_MOD_NEG);
1774bf215546Sopenharmony_ci            i->op = mod.getOp();
1775bf215546Sopenharmony_ci            mod = mod & Modifier(~NV50_IR_MOD_ABS);
1776bf215546Sopenharmony_ci            if (mod == Modifier(0))
1777bf215546Sopenharmony_ci               i->op = OP_MOV;
1778bf215546Sopenharmony_ci         }
1779bf215546Sopenharmony_ci
1780bf215546Sopenharmony_ci         if (target->isModSupported(i, s, mod)) {
1781bf215546Sopenharmony_ci            i->setSrc(s, mi->getSrc(0));
1782bf215546Sopenharmony_ci            i->src(s).mod *= mod;
1783bf215546Sopenharmony_ci         }
1784bf215546Sopenharmony_ci      }
1785bf215546Sopenharmony_ci
1786bf215546Sopenharmony_ci      if (i->op == OP_SAT) {
1787bf215546Sopenharmony_ci         mi = i->getSrc(0)->getInsn();
1788bf215546Sopenharmony_ci         if (mi &&
1789bf215546Sopenharmony_ci             mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) {
1790bf215546Sopenharmony_ci            mi->saturate = 1;
1791bf215546Sopenharmony_ci            mi->setDef(0, i->getDef(0));
1792bf215546Sopenharmony_ci            delete_Instruction(prog, i);
1793bf215546Sopenharmony_ci         }
1794bf215546Sopenharmony_ci      }
1795bf215546Sopenharmony_ci   }
1796bf215546Sopenharmony_ci
1797bf215546Sopenharmony_ci   return true;
1798bf215546Sopenharmony_ci}
1799bf215546Sopenharmony_ci
1800bf215546Sopenharmony_ci// =============================================================================
1801bf215546Sopenharmony_ci
1802bf215546Sopenharmony_ci// MUL + ADD -> MAD/FMA
1803bf215546Sopenharmony_ci// MIN/MAX(a, a) -> a, etc.
1804bf215546Sopenharmony_ci// SLCT(a, b, const) -> cc(const) ? a : b
1805bf215546Sopenharmony_ci// RCP(RCP(a)) -> a
1806bf215546Sopenharmony_ci// MUL(MUL(a, b), const) -> MUL_Xconst(a, b)
1807bf215546Sopenharmony_ci// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID)
1808bf215546Sopenharmony_ciclass AlgebraicOpt : public Pass
1809bf215546Sopenharmony_ci{
1810bf215546Sopenharmony_ciprivate:
1811bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
1812bf215546Sopenharmony_ci
1813bf215546Sopenharmony_ci   void handleABS(Instruction *);
1814bf215546Sopenharmony_ci   bool handleADD(Instruction *);
1815bf215546Sopenharmony_ci   bool tryADDToMADOrSAD(Instruction *, operation toOp);
1816bf215546Sopenharmony_ci   void handleMINMAX(Instruction *);
1817bf215546Sopenharmony_ci   void handleRCP(Instruction *);
1818bf215546Sopenharmony_ci   void handleSLCT(Instruction *);
1819bf215546Sopenharmony_ci   void handleLOGOP(Instruction *);
1820bf215546Sopenharmony_ci   void handleCVT_NEG(Instruction *);
1821bf215546Sopenharmony_ci   void handleCVT_CVT(Instruction *);
1822bf215546Sopenharmony_ci   void handleCVT_EXTBF(Instruction *);
1823bf215546Sopenharmony_ci   void handleSUCLAMP(Instruction *);
1824bf215546Sopenharmony_ci   void handleNEG(Instruction *);
1825bf215546Sopenharmony_ci   void handleEXTBF_RDSV(Instruction *);
1826bf215546Sopenharmony_ci
1827bf215546Sopenharmony_ci   BuildUtil bld;
1828bf215546Sopenharmony_ci};
1829bf215546Sopenharmony_ci
1830bf215546Sopenharmony_civoid
1831bf215546Sopenharmony_ciAlgebraicOpt::handleABS(Instruction *abs)
1832bf215546Sopenharmony_ci{
1833bf215546Sopenharmony_ci   Instruction *sub = abs->getSrc(0)->getInsn();
1834bf215546Sopenharmony_ci   DataType ty;
1835bf215546Sopenharmony_ci   if (!sub ||
1836bf215546Sopenharmony_ci       !prog->getTarget()->isOpSupported(OP_SAD, abs->dType))
1837bf215546Sopenharmony_ci      return;
1838bf215546Sopenharmony_ci   // hidden conversion ?
1839bf215546Sopenharmony_ci   ty = intTypeToSigned(sub->dType);
1840bf215546Sopenharmony_ci   if (abs->dType != abs->sType || ty != abs->sType)
1841bf215546Sopenharmony_ci      return;
1842bf215546Sopenharmony_ci
1843bf215546Sopenharmony_ci   if ((sub->op != OP_ADD && sub->op != OP_SUB) ||
1844bf215546Sopenharmony_ci       sub->src(0).getFile() != FILE_GPR || sub->src(0).mod ||
1845bf215546Sopenharmony_ci       sub->src(1).getFile() != FILE_GPR || sub->src(1).mod)
1846bf215546Sopenharmony_ci         return;
1847bf215546Sopenharmony_ci
1848bf215546Sopenharmony_ci   Value *src0 = sub->getSrc(0);
1849bf215546Sopenharmony_ci   Value *src1 = sub->getSrc(1);
1850bf215546Sopenharmony_ci
1851bf215546Sopenharmony_ci   if (sub->op == OP_ADD) {
1852bf215546Sopenharmony_ci      Instruction *neg = sub->getSrc(1)->getInsn();
1853bf215546Sopenharmony_ci      if (neg && neg->op != OP_NEG) {
1854bf215546Sopenharmony_ci         neg = sub->getSrc(0)->getInsn();
1855bf215546Sopenharmony_ci         src0 = sub->getSrc(1);
1856bf215546Sopenharmony_ci      }
1857bf215546Sopenharmony_ci      if (!neg || neg->op != OP_NEG ||
1858bf215546Sopenharmony_ci          neg->dType != neg->sType || neg->sType != ty)
1859bf215546Sopenharmony_ci         return;
1860bf215546Sopenharmony_ci      src1 = neg->getSrc(0);
1861bf215546Sopenharmony_ci   }
1862bf215546Sopenharmony_ci
1863bf215546Sopenharmony_ci   // found ABS(SUB))
1864bf215546Sopenharmony_ci   abs->moveSources(1, 2); // move sources >=1 up by 2
1865bf215546Sopenharmony_ci   abs->op = OP_SAD;
1866bf215546Sopenharmony_ci   abs->setType(sub->dType);
1867bf215546Sopenharmony_ci   abs->setSrc(0, src0);
1868bf215546Sopenharmony_ci   abs->setSrc(1, src1);
1869bf215546Sopenharmony_ci   bld.setPosition(abs, false);
1870bf215546Sopenharmony_ci   abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0));
1871bf215546Sopenharmony_ci}
1872bf215546Sopenharmony_ci
1873bf215546Sopenharmony_cibool
1874bf215546Sopenharmony_ciAlgebraicOpt::handleADD(Instruction *add)
1875bf215546Sopenharmony_ci{
1876bf215546Sopenharmony_ci   Value *src0 = add->getSrc(0);
1877bf215546Sopenharmony_ci   Value *src1 = add->getSrc(1);
1878bf215546Sopenharmony_ci
1879bf215546Sopenharmony_ci   if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
1880bf215546Sopenharmony_ci      return false;
1881bf215546Sopenharmony_ci
1882bf215546Sopenharmony_ci   bool changed = false;
1883bf215546Sopenharmony_ci   // we can't optimize to MAD if the add is precise
1884bf215546Sopenharmony_ci   if (!add->precise && prog->getTarget()->isOpSupported(OP_MAD, add->dType))
1885bf215546Sopenharmony_ci      changed = tryADDToMADOrSAD(add, OP_MAD);
1886bf215546Sopenharmony_ci   if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType))
1887bf215546Sopenharmony_ci      changed = tryADDToMADOrSAD(add, OP_SAD);
1888bf215546Sopenharmony_ci   return changed;
1889bf215546Sopenharmony_ci}
1890bf215546Sopenharmony_ci
1891bf215546Sopenharmony_ci// ADD(SAD(a,b,0), c) -> SAD(a,b,c)
1892bf215546Sopenharmony_ci// ADD(MUL(a,b), c) -> MAD(a,b,c)
1893bf215546Sopenharmony_cibool
1894bf215546Sopenharmony_ciAlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp)
1895bf215546Sopenharmony_ci{
1896bf215546Sopenharmony_ci   Value *src0 = add->getSrc(0);
1897bf215546Sopenharmony_ci   Value *src1 = add->getSrc(1);
1898bf215546Sopenharmony_ci   Value *src;
1899bf215546Sopenharmony_ci   int s;
1900bf215546Sopenharmony_ci   const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL;
1901bf215546Sopenharmony_ci   const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0));
1902bf215546Sopenharmony_ci   Modifier mod[4];
1903bf215546Sopenharmony_ci
1904bf215546Sopenharmony_ci   if (src0->refCount() == 1 &&
1905bf215546Sopenharmony_ci       src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp)
1906bf215546Sopenharmony_ci      s = 0;
1907bf215546Sopenharmony_ci   else
1908bf215546Sopenharmony_ci   if (src1->refCount() == 1 &&
1909bf215546Sopenharmony_ci       src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp)
1910bf215546Sopenharmony_ci      s = 1;
1911bf215546Sopenharmony_ci   else
1912bf215546Sopenharmony_ci      return false;
1913bf215546Sopenharmony_ci
1914bf215546Sopenharmony_ci   src = add->getSrc(s);
1915bf215546Sopenharmony_ci
1916bf215546Sopenharmony_ci   if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb)
1917bf215546Sopenharmony_ci      return false;
1918bf215546Sopenharmony_ci
1919bf215546Sopenharmony_ci   if (src->getInsn()->saturate || src->getInsn()->postFactor ||
1920bf215546Sopenharmony_ci       src->getInsn()->dnz || src->getInsn()->precise)
1921bf215546Sopenharmony_ci      return false;
1922bf215546Sopenharmony_ci
1923bf215546Sopenharmony_ci   if (toOp == OP_SAD) {
1924bf215546Sopenharmony_ci      ImmediateValue imm;
1925bf215546Sopenharmony_ci      if (!src->getInsn()->src(2).getImmediate(imm))
1926bf215546Sopenharmony_ci         return false;
1927bf215546Sopenharmony_ci      if (!imm.isInteger(0))
1928bf215546Sopenharmony_ci         return false;
1929bf215546Sopenharmony_ci   }
1930bf215546Sopenharmony_ci
1931bf215546Sopenharmony_ci   if (typeSizeof(add->dType) != typeSizeof(src->getInsn()->dType) ||
1932bf215546Sopenharmony_ci       isFloatType(add->dType) != isFloatType(src->getInsn()->dType))
1933bf215546Sopenharmony_ci      return false;
1934bf215546Sopenharmony_ci
1935bf215546Sopenharmony_ci   mod[0] = add->src(0).mod;
1936bf215546Sopenharmony_ci   mod[1] = add->src(1).mod;
1937bf215546Sopenharmony_ci   mod[2] = src->getUniqueInsn()->src(0).mod;
1938bf215546Sopenharmony_ci   mod[3] = src->getUniqueInsn()->src(1).mod;
1939bf215546Sopenharmony_ci
1940bf215546Sopenharmony_ci   if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad)
1941bf215546Sopenharmony_ci      return false;
1942bf215546Sopenharmony_ci
1943bf215546Sopenharmony_ci   add->op = toOp;
1944bf215546Sopenharmony_ci   add->subOp = src->getInsn()->subOp; // potentially mul-high
1945bf215546Sopenharmony_ci   add->dnz = src->getInsn()->dnz;
1946bf215546Sopenharmony_ci   add->dType = src->getInsn()->dType; // sign matters for imad hi
1947bf215546Sopenharmony_ci   add->sType = src->getInsn()->sType;
1948bf215546Sopenharmony_ci
1949bf215546Sopenharmony_ci   add->setSrc(2, add->src(s ? 0 : 1));
1950bf215546Sopenharmony_ci
1951bf215546Sopenharmony_ci   add->setSrc(0, src->getInsn()->getSrc(0));
1952bf215546Sopenharmony_ci   add->src(0).mod = mod[2] ^ mod[s];
1953bf215546Sopenharmony_ci   add->setSrc(1, src->getInsn()->getSrc(1));
1954bf215546Sopenharmony_ci   add->src(1).mod = mod[3];
1955bf215546Sopenharmony_ci
1956bf215546Sopenharmony_ci   return true;
1957bf215546Sopenharmony_ci}
1958bf215546Sopenharmony_ci
1959bf215546Sopenharmony_civoid
1960bf215546Sopenharmony_ciAlgebraicOpt::handleMINMAX(Instruction *minmax)
1961bf215546Sopenharmony_ci{
1962bf215546Sopenharmony_ci   Value *src0 = minmax->getSrc(0);
1963bf215546Sopenharmony_ci   Value *src1 = minmax->getSrc(1);
1964bf215546Sopenharmony_ci
1965bf215546Sopenharmony_ci   if (src0 != src1 || src0->reg.file != FILE_GPR)
1966bf215546Sopenharmony_ci      return;
1967bf215546Sopenharmony_ci   if (minmax->src(0).mod == minmax->src(1).mod) {
1968bf215546Sopenharmony_ci      if (minmax->def(0).mayReplace(minmax->src(0))) {
1969bf215546Sopenharmony_ci         minmax->def(0).replace(minmax->src(0), false);
1970bf215546Sopenharmony_ci         delete_Instruction(prog, minmax);
1971bf215546Sopenharmony_ci      } else {
1972bf215546Sopenharmony_ci         minmax->op = OP_CVT;
1973bf215546Sopenharmony_ci         minmax->setSrc(1, NULL);
1974bf215546Sopenharmony_ci      }
1975bf215546Sopenharmony_ci   } else {
1976bf215546Sopenharmony_ci      // TODO:
1977bf215546Sopenharmony_ci      // min(x, -x) = -abs(x)
1978bf215546Sopenharmony_ci      // min(x, -abs(x)) = -abs(x)
1979bf215546Sopenharmony_ci      // min(x, abs(x)) = x
1980bf215546Sopenharmony_ci      // max(x, -abs(x)) = x
1981bf215546Sopenharmony_ci      // max(x, abs(x)) = abs(x)
1982bf215546Sopenharmony_ci      // max(x, -x) = abs(x)
1983bf215546Sopenharmony_ci   }
1984bf215546Sopenharmony_ci}
1985bf215546Sopenharmony_ci
1986bf215546Sopenharmony_ci// rcp(rcp(a)) = a
1987bf215546Sopenharmony_ci// rcp(sqrt(a)) = rsq(a)
1988bf215546Sopenharmony_civoid
1989bf215546Sopenharmony_ciAlgebraicOpt::handleRCP(Instruction *rcp)
1990bf215546Sopenharmony_ci{
1991bf215546Sopenharmony_ci   Instruction *si = rcp->getSrc(0)->getUniqueInsn();
1992bf215546Sopenharmony_ci
1993bf215546Sopenharmony_ci   if (!si)
1994bf215546Sopenharmony_ci      return;
1995bf215546Sopenharmony_ci
1996bf215546Sopenharmony_ci   if (si->op == OP_RCP) {
1997bf215546Sopenharmony_ci      Modifier mod = rcp->src(0).mod * si->src(0).mod;
1998bf215546Sopenharmony_ci      rcp->op = mod.getOp();
1999bf215546Sopenharmony_ci      rcp->setSrc(0, si->getSrc(0));
2000bf215546Sopenharmony_ci   } else if (si->op == OP_SQRT) {
2001bf215546Sopenharmony_ci      rcp->op = OP_RSQ;
2002bf215546Sopenharmony_ci      rcp->setSrc(0, si->getSrc(0));
2003bf215546Sopenharmony_ci      rcp->src(0).mod = rcp->src(0).mod * si->src(0).mod;
2004bf215546Sopenharmony_ci   }
2005bf215546Sopenharmony_ci}
2006bf215546Sopenharmony_ci
2007bf215546Sopenharmony_civoid
2008bf215546Sopenharmony_ciAlgebraicOpt::handleSLCT(Instruction *slct)
2009bf215546Sopenharmony_ci{
2010bf215546Sopenharmony_ci   if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) {
2011bf215546Sopenharmony_ci      if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f))
2012bf215546Sopenharmony_ci         slct->setSrc(0, slct->getSrc(1));
2013bf215546Sopenharmony_ci   } else
2014bf215546Sopenharmony_ci   if (slct->getSrc(0) != slct->getSrc(1)) {
2015bf215546Sopenharmony_ci      return;
2016bf215546Sopenharmony_ci   }
2017bf215546Sopenharmony_ci   slct->op = OP_MOV;
2018bf215546Sopenharmony_ci   slct->setSrc(1, NULL);
2019bf215546Sopenharmony_ci   slct->setSrc(2, NULL);
2020bf215546Sopenharmony_ci}
2021bf215546Sopenharmony_ci
2022bf215546Sopenharmony_civoid
2023bf215546Sopenharmony_ciAlgebraicOpt::handleLOGOP(Instruction *logop)
2024bf215546Sopenharmony_ci{
2025bf215546Sopenharmony_ci   Value *src0 = logop->getSrc(0);
2026bf215546Sopenharmony_ci   Value *src1 = logop->getSrc(1);
2027bf215546Sopenharmony_ci
2028bf215546Sopenharmony_ci   if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
2029bf215546Sopenharmony_ci      return;
2030bf215546Sopenharmony_ci
2031bf215546Sopenharmony_ci   if (src0 == src1) {
2032bf215546Sopenharmony_ci      if ((logop->op == OP_AND || logop->op == OP_OR) &&
2033bf215546Sopenharmony_ci          logop->def(0).mayReplace(logop->src(0))) {
2034bf215546Sopenharmony_ci         logop->def(0).replace(logop->src(0), false);
2035bf215546Sopenharmony_ci         delete_Instruction(prog, logop);
2036bf215546Sopenharmony_ci      }
2037bf215546Sopenharmony_ci   } else {
2038bf215546Sopenharmony_ci      // try AND(SET, SET) -> SET_AND(SET)
2039bf215546Sopenharmony_ci      Instruction *set0 = src0->getInsn();
2040bf215546Sopenharmony_ci      Instruction *set1 = src1->getInsn();
2041bf215546Sopenharmony_ci
2042bf215546Sopenharmony_ci      if (!set0 || set0->fixed || !set1 || set1->fixed)
2043bf215546Sopenharmony_ci         return;
2044bf215546Sopenharmony_ci      if (set1->op != OP_SET) {
2045bf215546Sopenharmony_ci         Instruction *xchg = set0;
2046bf215546Sopenharmony_ci         set0 = set1;
2047bf215546Sopenharmony_ci         set1 = xchg;
2048bf215546Sopenharmony_ci         if (set1->op != OP_SET)
2049bf215546Sopenharmony_ci            return;
2050bf215546Sopenharmony_ci      }
2051bf215546Sopenharmony_ci      operation redOp = (logop->op == OP_AND ? OP_SET_AND :
2052bf215546Sopenharmony_ci                         logop->op == OP_XOR ? OP_SET_XOR : OP_SET_OR);
2053bf215546Sopenharmony_ci      if (!prog->getTarget()->isOpSupported(redOp, set1->sType))
2054bf215546Sopenharmony_ci         return;
2055bf215546Sopenharmony_ci      if (set0->op != OP_SET &&
2056bf215546Sopenharmony_ci          set0->op != OP_SET_AND &&
2057bf215546Sopenharmony_ci          set0->op != OP_SET_OR &&
2058bf215546Sopenharmony_ci          set0->op != OP_SET_XOR)
2059bf215546Sopenharmony_ci         return;
2060bf215546Sopenharmony_ci      if (set0->getDef(0)->refCount() > 1 &&
2061bf215546Sopenharmony_ci          set1->getDef(0)->refCount() > 1)
2062bf215546Sopenharmony_ci         return;
2063bf215546Sopenharmony_ci      if (set0->getPredicate() || set1->getPredicate())
2064bf215546Sopenharmony_ci         return;
2065bf215546Sopenharmony_ci      // check that they don't source each other
2066bf215546Sopenharmony_ci      for (int s = 0; s < 2; ++s)
2067bf215546Sopenharmony_ci         if (set0->getSrc(s) == set1->getDef(0) ||
2068bf215546Sopenharmony_ci             set1->getSrc(s) == set0->getDef(0))
2069bf215546Sopenharmony_ci            return;
2070bf215546Sopenharmony_ci
2071bf215546Sopenharmony_ci      set0 = cloneForward(func, set0);
2072bf215546Sopenharmony_ci      set1 = cloneShallow(func, set1);
2073bf215546Sopenharmony_ci      logop->bb->insertAfter(logop, set1);
2074bf215546Sopenharmony_ci      logop->bb->insertAfter(logop, set0);
2075bf215546Sopenharmony_ci
2076bf215546Sopenharmony_ci      set0->dType = TYPE_U8;
2077bf215546Sopenharmony_ci      set0->getDef(0)->reg.file = FILE_PREDICATE;
2078bf215546Sopenharmony_ci      set0->getDef(0)->reg.size = 1;
2079bf215546Sopenharmony_ci      set1->setSrc(2, set0->getDef(0));
2080bf215546Sopenharmony_ci      set1->op = redOp;
2081bf215546Sopenharmony_ci      set1->setDef(0, logop->getDef(0));
2082bf215546Sopenharmony_ci      delete_Instruction(prog, logop);
2083bf215546Sopenharmony_ci   }
2084bf215546Sopenharmony_ci}
2085bf215546Sopenharmony_ci
2086bf215546Sopenharmony_ci// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0
2087bf215546Sopenharmony_ci// nv50:
2088bf215546Sopenharmony_ci//  F2I(NEG(I2F(ABS(SET))))
2089bf215546Sopenharmony_civoid
2090bf215546Sopenharmony_ciAlgebraicOpt::handleCVT_NEG(Instruction *cvt)
2091bf215546Sopenharmony_ci{
2092bf215546Sopenharmony_ci   Instruction *insn = cvt->getSrc(0)->getInsn();
2093bf215546Sopenharmony_ci   if (cvt->sType != TYPE_F32 ||
2094bf215546Sopenharmony_ci       cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0))
2095bf215546Sopenharmony_ci      return;
2096bf215546Sopenharmony_ci   if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32)
2097bf215546Sopenharmony_ci      return;
2098bf215546Sopenharmony_ci   if (insn->src(0).mod != Modifier(0))
2099bf215546Sopenharmony_ci      return;
2100bf215546Sopenharmony_ci   insn = insn->getSrc(0)->getInsn();
2101bf215546Sopenharmony_ci
2102bf215546Sopenharmony_ci   // check for nv50 SET(-1,0) -> SET(1.0f/0.0f) chain and nvc0's f32 SET
2103bf215546Sopenharmony_ci   if (insn && insn->op == OP_CVT &&
2104bf215546Sopenharmony_ci       insn->dType == TYPE_F32 &&
2105bf215546Sopenharmony_ci       insn->sType == TYPE_S32) {
2106bf215546Sopenharmony_ci      insn = insn->getSrc(0)->getInsn();
2107bf215546Sopenharmony_ci      if (!insn || insn->op != OP_ABS || insn->sType != TYPE_S32 ||
2108bf215546Sopenharmony_ci          insn->src(0).mod)
2109bf215546Sopenharmony_ci         return;
2110bf215546Sopenharmony_ci      insn = insn->getSrc(0)->getInsn();
2111bf215546Sopenharmony_ci      if (!insn || insn->op != OP_SET || insn->dType != TYPE_U32)
2112bf215546Sopenharmony_ci         return;
2113bf215546Sopenharmony_ci   } else
2114bf215546Sopenharmony_ci   if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32) {
2115bf215546Sopenharmony_ci      return;
2116bf215546Sopenharmony_ci   }
2117bf215546Sopenharmony_ci
2118bf215546Sopenharmony_ci   Instruction *bset = cloneShallow(func, insn);
2119bf215546Sopenharmony_ci   bset->dType = TYPE_U32;
2120bf215546Sopenharmony_ci   bset->setDef(0, cvt->getDef(0));
2121bf215546Sopenharmony_ci   cvt->bb->insertAfter(cvt, bset);
2122bf215546Sopenharmony_ci   delete_Instruction(prog, cvt);
2123bf215546Sopenharmony_ci}
2124bf215546Sopenharmony_ci
2125bf215546Sopenharmony_ci// F2I(TRUNC()) and so on can be expressed as a single CVT. If the earlier CVT
2126bf215546Sopenharmony_ci// does a type conversion, this becomes trickier as there might be range
2127bf215546Sopenharmony_ci// changes/etc. We could handle those in theory as long as the range was being
2128bf215546Sopenharmony_ci// reduced or kept the same.
2129bf215546Sopenharmony_civoid
2130bf215546Sopenharmony_ciAlgebraicOpt::handleCVT_CVT(Instruction *cvt)
2131bf215546Sopenharmony_ci{
2132bf215546Sopenharmony_ci   Instruction *insn = cvt->getSrc(0)->getInsn();
2133bf215546Sopenharmony_ci
2134bf215546Sopenharmony_ci   if (!insn ||
2135bf215546Sopenharmony_ci       insn->saturate ||
2136bf215546Sopenharmony_ci       insn->subOp ||
2137bf215546Sopenharmony_ci       insn->dType != insn->sType ||
2138bf215546Sopenharmony_ci       insn->dType != cvt->sType)
2139bf215546Sopenharmony_ci      return;
2140bf215546Sopenharmony_ci
2141bf215546Sopenharmony_ci   RoundMode rnd = insn->rnd;
2142bf215546Sopenharmony_ci   switch (insn->op) {
2143bf215546Sopenharmony_ci   case OP_CEIL:
2144bf215546Sopenharmony_ci      rnd = ROUND_PI;
2145bf215546Sopenharmony_ci      break;
2146bf215546Sopenharmony_ci   case OP_FLOOR:
2147bf215546Sopenharmony_ci      rnd = ROUND_MI;
2148bf215546Sopenharmony_ci      break;
2149bf215546Sopenharmony_ci   case OP_TRUNC:
2150bf215546Sopenharmony_ci      rnd = ROUND_ZI;
2151bf215546Sopenharmony_ci      break;
2152bf215546Sopenharmony_ci   case OP_CVT:
2153bf215546Sopenharmony_ci      break;
2154bf215546Sopenharmony_ci   default:
2155bf215546Sopenharmony_ci      return;
2156bf215546Sopenharmony_ci   }
2157bf215546Sopenharmony_ci
2158bf215546Sopenharmony_ci   if (!isFloatType(cvt->dType) || !isFloatType(insn->sType))
2159bf215546Sopenharmony_ci      rnd = (RoundMode)(rnd & 3);
2160bf215546Sopenharmony_ci
2161bf215546Sopenharmony_ci   cvt->rnd = rnd;
2162bf215546Sopenharmony_ci   cvt->setSrc(0, insn->getSrc(0));
2163bf215546Sopenharmony_ci   cvt->src(0).mod *= insn->src(0).mod;
2164bf215546Sopenharmony_ci   cvt->sType = insn->sType;
2165bf215546Sopenharmony_ci}
2166bf215546Sopenharmony_ci
2167bf215546Sopenharmony_ci// Some shaders extract packed bytes out of words and convert them to
2168bf215546Sopenharmony_ci// e.g. float. The Fermi+ CVT instruction can extract those directly, as can
2169bf215546Sopenharmony_ci// nv50 for word sizes.
2170bf215546Sopenharmony_ci//
2171bf215546Sopenharmony_ci// CVT(EXTBF(x, byte/word))
2172bf215546Sopenharmony_ci// CVT(AND(bytemask, x))
2173bf215546Sopenharmony_ci// CVT(AND(bytemask, SHR(x, 8/16/24)))
2174bf215546Sopenharmony_ci// CVT(SHR(x, 16/24))
2175bf215546Sopenharmony_civoid
2176bf215546Sopenharmony_ciAlgebraicOpt::handleCVT_EXTBF(Instruction *cvt)
2177bf215546Sopenharmony_ci{
2178bf215546Sopenharmony_ci   Instruction *insn = cvt->getSrc(0)->getInsn();
2179bf215546Sopenharmony_ci   ImmediateValue imm;
2180bf215546Sopenharmony_ci   Value *arg = NULL;
2181bf215546Sopenharmony_ci   unsigned width, offset = 0;
2182bf215546Sopenharmony_ci   if ((cvt->sType != TYPE_U32 && cvt->sType != TYPE_S32) || !insn)
2183bf215546Sopenharmony_ci      return;
2184bf215546Sopenharmony_ci   if (insn->op == OP_EXTBF && insn->src(1).getImmediate(imm)) {
2185bf215546Sopenharmony_ci      width = (imm.reg.data.u32 >> 8) & 0xff;
2186bf215546Sopenharmony_ci      offset = imm.reg.data.u32 & 0xff;
2187bf215546Sopenharmony_ci      arg = insn->getSrc(0);
2188bf215546Sopenharmony_ci
2189bf215546Sopenharmony_ci      if (width != 8 && width != 16)
2190bf215546Sopenharmony_ci         return;
2191bf215546Sopenharmony_ci      if (width == 8 && offset & 0x7)
2192bf215546Sopenharmony_ci         return;
2193bf215546Sopenharmony_ci      if (width == 16 && offset & 0xf)
2194bf215546Sopenharmony_ci         return;
2195bf215546Sopenharmony_ci   } else if (insn->op == OP_AND) {
2196bf215546Sopenharmony_ci      int s;
2197bf215546Sopenharmony_ci      if (insn->src(0).getImmediate(imm))
2198bf215546Sopenharmony_ci         s = 0;
2199bf215546Sopenharmony_ci      else if (insn->src(1).getImmediate(imm))
2200bf215546Sopenharmony_ci         s = 1;
2201bf215546Sopenharmony_ci      else
2202bf215546Sopenharmony_ci         return;
2203bf215546Sopenharmony_ci
2204bf215546Sopenharmony_ci      if (imm.reg.data.u32 == 0xff)
2205bf215546Sopenharmony_ci         width = 8;
2206bf215546Sopenharmony_ci      else if (imm.reg.data.u32 == 0xffff)
2207bf215546Sopenharmony_ci         width = 16;
2208bf215546Sopenharmony_ci      else
2209bf215546Sopenharmony_ci         return;
2210bf215546Sopenharmony_ci
2211bf215546Sopenharmony_ci      arg = insn->getSrc(!s);
2212bf215546Sopenharmony_ci      Instruction *shift = arg->getInsn();
2213bf215546Sopenharmony_ci
2214bf215546Sopenharmony_ci      if (shift && shift->op == OP_SHR &&
2215bf215546Sopenharmony_ci          shift->sType == cvt->sType &&
2216bf215546Sopenharmony_ci          shift->src(1).getImmediate(imm) &&
2217bf215546Sopenharmony_ci          ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) ||
2218bf215546Sopenharmony_ci           (width == 16 && (imm.reg.data.u32 & 0xf) == 0))) {
2219bf215546Sopenharmony_ci         arg = shift->getSrc(0);
2220bf215546Sopenharmony_ci         offset = imm.reg.data.u32;
2221bf215546Sopenharmony_ci      }
2222bf215546Sopenharmony_ci      // We just AND'd the high bits away, which means this is effectively an
2223bf215546Sopenharmony_ci      // unsigned value.
2224bf215546Sopenharmony_ci      cvt->sType = TYPE_U32;
2225bf215546Sopenharmony_ci   } else if (insn->op == OP_SHR &&
2226bf215546Sopenharmony_ci              insn->sType == cvt->sType &&
2227bf215546Sopenharmony_ci              insn->src(1).getImmediate(imm)) {
2228bf215546Sopenharmony_ci      arg = insn->getSrc(0);
2229bf215546Sopenharmony_ci      if (imm.reg.data.u32 == 24) {
2230bf215546Sopenharmony_ci         width = 8;
2231bf215546Sopenharmony_ci         offset = 24;
2232bf215546Sopenharmony_ci      } else if (imm.reg.data.u32 == 16) {
2233bf215546Sopenharmony_ci         width = 16;
2234bf215546Sopenharmony_ci         offset = 16;
2235bf215546Sopenharmony_ci      } else {
2236bf215546Sopenharmony_ci         return;
2237bf215546Sopenharmony_ci      }
2238bf215546Sopenharmony_ci   }
2239bf215546Sopenharmony_ci
2240bf215546Sopenharmony_ci   if (!arg)
2241bf215546Sopenharmony_ci      return;
2242bf215546Sopenharmony_ci
2243bf215546Sopenharmony_ci   // Irrespective of what came earlier, we can undo a shift on the argument
2244bf215546Sopenharmony_ci   // by adjusting the offset.
2245bf215546Sopenharmony_ci   Instruction *shift = arg->getInsn();
2246bf215546Sopenharmony_ci   if (shift && shift->op == OP_SHL &&
2247bf215546Sopenharmony_ci       shift->src(1).getImmediate(imm) &&
2248bf215546Sopenharmony_ci       ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) ||
2249bf215546Sopenharmony_ci        (width == 16 && (imm.reg.data.u32 & 0xf) == 0)) &&
2250bf215546Sopenharmony_ci       imm.reg.data.u32 <= offset) {
2251bf215546Sopenharmony_ci      arg = shift->getSrc(0);
2252bf215546Sopenharmony_ci      offset -= imm.reg.data.u32;
2253bf215546Sopenharmony_ci   }
2254bf215546Sopenharmony_ci
2255bf215546Sopenharmony_ci   // The unpackSnorm lowering still leaves a few shifts behind, but it's too
2256bf215546Sopenharmony_ci   // annoying to detect them.
2257bf215546Sopenharmony_ci
2258bf215546Sopenharmony_ci   if (width == 8) {
2259bf215546Sopenharmony_ci      cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U8 : TYPE_S8;
2260bf215546Sopenharmony_ci   } else {
2261bf215546Sopenharmony_ci      assert(width == 16);
2262bf215546Sopenharmony_ci      cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U16 : TYPE_S16;
2263bf215546Sopenharmony_ci   }
2264bf215546Sopenharmony_ci   cvt->setSrc(0, arg);
2265bf215546Sopenharmony_ci   cvt->subOp = offset >> 3;
2266bf215546Sopenharmony_ci}
2267bf215546Sopenharmony_ci
2268bf215546Sopenharmony_ci// SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6)
2269bf215546Sopenharmony_civoid
2270bf215546Sopenharmony_ciAlgebraicOpt::handleSUCLAMP(Instruction *insn)
2271bf215546Sopenharmony_ci{
2272bf215546Sopenharmony_ci   ImmediateValue imm;
2273bf215546Sopenharmony_ci   int32_t val = insn->getSrc(2)->asImm()->reg.data.s32;
2274bf215546Sopenharmony_ci   int s;
2275bf215546Sopenharmony_ci   Instruction *add;
2276bf215546Sopenharmony_ci
2277bf215546Sopenharmony_ci   assert(insn->srcExists(0) && insn->src(0).getFile() == FILE_GPR);
2278bf215546Sopenharmony_ci
2279bf215546Sopenharmony_ci   // look for ADD (TODO: only count references by non-SUCLAMP)
2280bf215546Sopenharmony_ci   if (insn->getSrc(0)->refCount() > 1)
2281bf215546Sopenharmony_ci      return;
2282bf215546Sopenharmony_ci   add = insn->getSrc(0)->getInsn();
2283bf215546Sopenharmony_ci   if (!add || add->op != OP_ADD ||
2284bf215546Sopenharmony_ci       (add->dType != TYPE_U32 &&
2285bf215546Sopenharmony_ci        add->dType != TYPE_S32))
2286bf215546Sopenharmony_ci      return;
2287bf215546Sopenharmony_ci
2288bf215546Sopenharmony_ci   // look for immediate
2289bf215546Sopenharmony_ci   for (s = 0; s < 2; ++s)
2290bf215546Sopenharmony_ci      if (add->src(s).getImmediate(imm))
2291bf215546Sopenharmony_ci         break;
2292bf215546Sopenharmony_ci   if (s >= 2)
2293bf215546Sopenharmony_ci      return;
2294bf215546Sopenharmony_ci   s = s ? 0 : 1;
2295bf215546Sopenharmony_ci   // determine if immediate fits
2296bf215546Sopenharmony_ci   val += imm.reg.data.s32;
2297bf215546Sopenharmony_ci   if (val > 31 || val < -32)
2298bf215546Sopenharmony_ci      return;
2299bf215546Sopenharmony_ci   // determine if other addend fits
2300bf215546Sopenharmony_ci   if (add->src(s).getFile() != FILE_GPR || add->src(s).mod != Modifier(0))
2301bf215546Sopenharmony_ci      return;
2302bf215546Sopenharmony_ci
2303bf215546Sopenharmony_ci   bld.setPosition(insn, false); // make sure bld is init'ed
2304bf215546Sopenharmony_ci   // replace sources
2305bf215546Sopenharmony_ci   insn->setSrc(2, bld.mkImm(val));
2306bf215546Sopenharmony_ci   insn->setSrc(0, add->getSrc(s));
2307bf215546Sopenharmony_ci}
2308bf215546Sopenharmony_ci
2309bf215546Sopenharmony_ci// NEG(AND(SET, 1)) -> SET
2310bf215546Sopenharmony_civoid
2311bf215546Sopenharmony_ciAlgebraicOpt::handleNEG(Instruction *i) {
2312bf215546Sopenharmony_ci   Instruction *src = i->getSrc(0)->getInsn();
2313bf215546Sopenharmony_ci   ImmediateValue imm;
2314bf215546Sopenharmony_ci   int b;
2315bf215546Sopenharmony_ci
2316bf215546Sopenharmony_ci   if (isFloatType(i->sType) || !src || src->op != OP_AND)
2317bf215546Sopenharmony_ci      return;
2318bf215546Sopenharmony_ci
2319bf215546Sopenharmony_ci   if (src->src(0).getImmediate(imm))
2320bf215546Sopenharmony_ci      b = 1;
2321bf215546Sopenharmony_ci   else if (src->src(1).getImmediate(imm))
2322bf215546Sopenharmony_ci      b = 0;
2323bf215546Sopenharmony_ci   else
2324bf215546Sopenharmony_ci      return;
2325bf215546Sopenharmony_ci
2326bf215546Sopenharmony_ci   if (!imm.isInteger(1))
2327bf215546Sopenharmony_ci      return;
2328bf215546Sopenharmony_ci
2329bf215546Sopenharmony_ci   Instruction *set = src->getSrc(b)->getInsn();
2330bf215546Sopenharmony_ci   if ((set->op == OP_SET || set->op == OP_SET_AND ||
2331bf215546Sopenharmony_ci       set->op == OP_SET_OR || set->op == OP_SET_XOR) &&
2332bf215546Sopenharmony_ci       !isFloatType(set->dType)) {
2333bf215546Sopenharmony_ci      i->def(0).replace(set->getDef(0), false);
2334bf215546Sopenharmony_ci   }
2335bf215546Sopenharmony_ci}
2336bf215546Sopenharmony_ci
2337bf215546Sopenharmony_ci// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID)
2338bf215546Sopenharmony_civoid
2339bf215546Sopenharmony_ciAlgebraicOpt::handleEXTBF_RDSV(Instruction *i)
2340bf215546Sopenharmony_ci{
2341bf215546Sopenharmony_ci   Instruction *rdsv = i->getSrc(0)->getUniqueInsn();
2342bf215546Sopenharmony_ci   if (rdsv->op != OP_RDSV ||
2343bf215546Sopenharmony_ci       rdsv->getSrc(0)->asSym()->reg.data.sv.sv != SV_COMBINED_TID)
2344bf215546Sopenharmony_ci      return;
2345bf215546Sopenharmony_ci   // Avoid creating more RDSV instructions
2346bf215546Sopenharmony_ci   if (rdsv->getDef(0)->refCount() > 1)
2347bf215546Sopenharmony_ci      return;
2348bf215546Sopenharmony_ci
2349bf215546Sopenharmony_ci   ImmediateValue imm;
2350bf215546Sopenharmony_ci   if (!i->src(1).getImmediate(imm))
2351bf215546Sopenharmony_ci      return;
2352bf215546Sopenharmony_ci
2353bf215546Sopenharmony_ci   int index;
2354bf215546Sopenharmony_ci   if (imm.isInteger(0x1000))
2355bf215546Sopenharmony_ci      index = 0;
2356bf215546Sopenharmony_ci   else
2357bf215546Sopenharmony_ci   if (imm.isInteger(0x0a10))
2358bf215546Sopenharmony_ci      index = 1;
2359bf215546Sopenharmony_ci   else
2360bf215546Sopenharmony_ci   if (imm.isInteger(0x061a))
2361bf215546Sopenharmony_ci      index = 2;
2362bf215546Sopenharmony_ci   else
2363bf215546Sopenharmony_ci      return;
2364bf215546Sopenharmony_ci
2365bf215546Sopenharmony_ci   bld.setPosition(i, false);
2366bf215546Sopenharmony_ci
2367bf215546Sopenharmony_ci   i->op = OP_RDSV;
2368bf215546Sopenharmony_ci   i->setSrc(0, bld.mkSysVal(SV_TID, index));
2369bf215546Sopenharmony_ci   i->setSrc(1, NULL);
2370bf215546Sopenharmony_ci}
2371bf215546Sopenharmony_ci
2372bf215546Sopenharmony_cibool
2373bf215546Sopenharmony_ciAlgebraicOpt::visit(BasicBlock *bb)
2374bf215546Sopenharmony_ci{
2375bf215546Sopenharmony_ci   Instruction *next;
2376bf215546Sopenharmony_ci   for (Instruction *i = bb->getEntry(); i; i = next) {
2377bf215546Sopenharmony_ci      next = i->next;
2378bf215546Sopenharmony_ci      switch (i->op) {
2379bf215546Sopenharmony_ci      case OP_ABS:
2380bf215546Sopenharmony_ci         handleABS(i);
2381bf215546Sopenharmony_ci         break;
2382bf215546Sopenharmony_ci      case OP_ADD:
2383bf215546Sopenharmony_ci         handleADD(i);
2384bf215546Sopenharmony_ci         break;
2385bf215546Sopenharmony_ci      case OP_RCP:
2386bf215546Sopenharmony_ci         handleRCP(i);
2387bf215546Sopenharmony_ci         break;
2388bf215546Sopenharmony_ci      case OP_MIN:
2389bf215546Sopenharmony_ci      case OP_MAX:
2390bf215546Sopenharmony_ci         handleMINMAX(i);
2391bf215546Sopenharmony_ci         break;
2392bf215546Sopenharmony_ci      case OP_SLCT:
2393bf215546Sopenharmony_ci         handleSLCT(i);
2394bf215546Sopenharmony_ci         break;
2395bf215546Sopenharmony_ci      case OP_AND:
2396bf215546Sopenharmony_ci      case OP_OR:
2397bf215546Sopenharmony_ci      case OP_XOR:
2398bf215546Sopenharmony_ci         handleLOGOP(i);
2399bf215546Sopenharmony_ci         break;
2400bf215546Sopenharmony_ci      case OP_CVT:
2401bf215546Sopenharmony_ci         handleCVT_NEG(i);
2402bf215546Sopenharmony_ci         handleCVT_CVT(i);
2403bf215546Sopenharmony_ci         if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32))
2404bf215546Sopenharmony_ci             handleCVT_EXTBF(i);
2405bf215546Sopenharmony_ci         break;
2406bf215546Sopenharmony_ci      case OP_SUCLAMP:
2407bf215546Sopenharmony_ci         handleSUCLAMP(i);
2408bf215546Sopenharmony_ci         break;
2409bf215546Sopenharmony_ci      case OP_NEG:
2410bf215546Sopenharmony_ci         handleNEG(i);
2411bf215546Sopenharmony_ci         break;
2412bf215546Sopenharmony_ci      case OP_EXTBF:
2413bf215546Sopenharmony_ci         handleEXTBF_RDSV(i);
2414bf215546Sopenharmony_ci         break;
2415bf215546Sopenharmony_ci      default:
2416bf215546Sopenharmony_ci         break;
2417bf215546Sopenharmony_ci      }
2418bf215546Sopenharmony_ci   }
2419bf215546Sopenharmony_ci
2420bf215546Sopenharmony_ci   return true;
2421bf215546Sopenharmony_ci}
2422bf215546Sopenharmony_ci
2423bf215546Sopenharmony_ci// =============================================================================
2424bf215546Sopenharmony_ci
2425bf215546Sopenharmony_ci// ADD(SHL(a, b), c) -> SHLADD(a, b, c)
2426bf215546Sopenharmony_ci// MUL(a, b) -> a few XMADs
2427bf215546Sopenharmony_ci// MAD/FMA(a, b, c) -> a few XMADs
2428bf215546Sopenharmony_ciclass LateAlgebraicOpt : public Pass
2429bf215546Sopenharmony_ci{
2430bf215546Sopenharmony_ciprivate:
2431bf215546Sopenharmony_ci   virtual bool visit(Instruction *);
2432bf215546Sopenharmony_ci
2433bf215546Sopenharmony_ci   void handleADD(Instruction *);
2434bf215546Sopenharmony_ci   void handleMULMAD(Instruction *);
2435bf215546Sopenharmony_ci   bool tryADDToSHLADD(Instruction *);
2436bf215546Sopenharmony_ci
2437bf215546Sopenharmony_ci   BuildUtil bld;
2438bf215546Sopenharmony_ci};
2439bf215546Sopenharmony_ci
2440bf215546Sopenharmony_civoid
2441bf215546Sopenharmony_ciLateAlgebraicOpt::handleADD(Instruction *add)
2442bf215546Sopenharmony_ci{
2443bf215546Sopenharmony_ci   Value *src0 = add->getSrc(0);
2444bf215546Sopenharmony_ci   Value *src1 = add->getSrc(1);
2445bf215546Sopenharmony_ci
2446bf215546Sopenharmony_ci   if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR)
2447bf215546Sopenharmony_ci      return;
2448bf215546Sopenharmony_ci
2449bf215546Sopenharmony_ci   if (prog->getTarget()->isOpSupported(OP_SHLADD, add->dType))
2450bf215546Sopenharmony_ci      tryADDToSHLADD(add);
2451bf215546Sopenharmony_ci}
2452bf215546Sopenharmony_ci
2453bf215546Sopenharmony_ci// ADD(SHL(a, b), c) -> SHLADD(a, b, c)
2454bf215546Sopenharmony_cibool
2455bf215546Sopenharmony_ciLateAlgebraicOpt::tryADDToSHLADD(Instruction *add)
2456bf215546Sopenharmony_ci{
2457bf215546Sopenharmony_ci   Value *src0 = add->getSrc(0);
2458bf215546Sopenharmony_ci   Value *src1 = add->getSrc(1);
2459bf215546Sopenharmony_ci   ImmediateValue imm;
2460bf215546Sopenharmony_ci   Instruction *shl;
2461bf215546Sopenharmony_ci   Value *src;
2462bf215546Sopenharmony_ci   int s;
2463bf215546Sopenharmony_ci
2464bf215546Sopenharmony_ci   if (add->saturate || add->usesFlags() || typeSizeof(add->dType) == 8
2465bf215546Sopenharmony_ci       || isFloatType(add->dType))
2466bf215546Sopenharmony_ci      return false;
2467bf215546Sopenharmony_ci
2468bf215546Sopenharmony_ci   if (src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_SHL)
2469bf215546Sopenharmony_ci      s = 0;
2470bf215546Sopenharmony_ci   else
2471bf215546Sopenharmony_ci   if (src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_SHL)
2472bf215546Sopenharmony_ci      s = 1;
2473bf215546Sopenharmony_ci   else
2474bf215546Sopenharmony_ci      return false;
2475bf215546Sopenharmony_ci
2476bf215546Sopenharmony_ci   src = add->getSrc(s);
2477bf215546Sopenharmony_ci   shl = src->getUniqueInsn();
2478bf215546Sopenharmony_ci
2479bf215546Sopenharmony_ci   if (shl->bb != add->bb || shl->usesFlags() || shl->subOp || shl->src(0).mod)
2480bf215546Sopenharmony_ci      return false;
2481bf215546Sopenharmony_ci
2482bf215546Sopenharmony_ci   if (!shl->src(1).getImmediate(imm))
2483bf215546Sopenharmony_ci      return false;
2484bf215546Sopenharmony_ci
2485bf215546Sopenharmony_ci   add->op = OP_SHLADD;
2486bf215546Sopenharmony_ci   add->setSrc(2, add->src(!s));
2487bf215546Sopenharmony_ci   // SHL can't have any modifiers, but the ADD source may have had
2488bf215546Sopenharmony_ci   // one. Preserve it.
2489bf215546Sopenharmony_ci   add->setSrc(0, shl->getSrc(0));
2490bf215546Sopenharmony_ci   if (s == 1)
2491bf215546Sopenharmony_ci      add->src(0).mod = add->src(1).mod;
2492bf215546Sopenharmony_ci   add->setSrc(1, new_ImmediateValue(shl->bb->getProgram(), imm.reg.data.u32));
2493bf215546Sopenharmony_ci   add->src(1).mod = Modifier(0);
2494bf215546Sopenharmony_ci
2495bf215546Sopenharmony_ci   return true;
2496bf215546Sopenharmony_ci}
2497bf215546Sopenharmony_ci
2498bf215546Sopenharmony_ci// MUL(a, b) -> a few XMADs
2499bf215546Sopenharmony_ci// MAD/FMA(a, b, c) -> a few XMADs
2500bf215546Sopenharmony_civoid
2501bf215546Sopenharmony_ciLateAlgebraicOpt::handleMULMAD(Instruction *i)
2502bf215546Sopenharmony_ci{
2503bf215546Sopenharmony_ci   // TODO: handle NV50_IR_SUBOP_MUL_HIGH
2504bf215546Sopenharmony_ci   if (!prog->getTarget()->isOpSupported(OP_XMAD, TYPE_U32))
2505bf215546Sopenharmony_ci      return;
2506bf215546Sopenharmony_ci   if (isFloatType(i->dType) || typeSizeof(i->dType) != 4)
2507bf215546Sopenharmony_ci      return;
2508bf215546Sopenharmony_ci   if (i->subOp || i->usesFlags() || i->flagsDef >= 0)
2509bf215546Sopenharmony_ci      return;
2510bf215546Sopenharmony_ci
2511bf215546Sopenharmony_ci   assert(!i->src(0).mod);
2512bf215546Sopenharmony_ci   assert(!i->src(1).mod);
2513bf215546Sopenharmony_ci   assert(i->op == OP_MUL ? 1 : !i->src(2).mod);
2514bf215546Sopenharmony_ci
2515bf215546Sopenharmony_ci   bld.setPosition(i, false);
2516bf215546Sopenharmony_ci
2517bf215546Sopenharmony_ci   Value *a = i->getSrc(0);
2518bf215546Sopenharmony_ci   Value *b = i->getSrc(1);
2519bf215546Sopenharmony_ci   Value *c = i->op == OP_MUL ? bld.mkImm(0) : i->getSrc(2);
2520bf215546Sopenharmony_ci
2521bf215546Sopenharmony_ci   Value *tmp0 = bld.getSSA();
2522bf215546Sopenharmony_ci   Value *tmp1 = bld.getSSA();
2523bf215546Sopenharmony_ci
2524bf215546Sopenharmony_ci   Instruction *insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp0, b, a, c);
2525bf215546Sopenharmony_ci   insn->setPredicate(i->cc, i->getPredicate());
2526bf215546Sopenharmony_ci
2527bf215546Sopenharmony_ci   insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp1, b, a, bld.mkImm(0));
2528bf215546Sopenharmony_ci   insn->setPredicate(i->cc, i->getPredicate());
2529bf215546Sopenharmony_ci   insn->subOp = NV50_IR_SUBOP_XMAD_MRG | NV50_IR_SUBOP_XMAD_H1(1);
2530bf215546Sopenharmony_ci
2531bf215546Sopenharmony_ci   Value *pred = i->getPredicate();
2532bf215546Sopenharmony_ci   i->setPredicate(i->cc, NULL);
2533bf215546Sopenharmony_ci
2534bf215546Sopenharmony_ci   i->op = OP_XMAD;
2535bf215546Sopenharmony_ci   i->setSrc(0, b);
2536bf215546Sopenharmony_ci   i->setSrc(1, tmp1);
2537bf215546Sopenharmony_ci   i->setSrc(2, tmp0);
2538bf215546Sopenharmony_ci   i->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC;
2539bf215546Sopenharmony_ci   i->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1);
2540bf215546Sopenharmony_ci
2541bf215546Sopenharmony_ci   i->setPredicate(i->cc, pred);
2542bf215546Sopenharmony_ci}
2543bf215546Sopenharmony_ci
2544bf215546Sopenharmony_cibool
2545bf215546Sopenharmony_ciLateAlgebraicOpt::visit(Instruction *i)
2546bf215546Sopenharmony_ci{
2547bf215546Sopenharmony_ci   switch (i->op) {
2548bf215546Sopenharmony_ci   case OP_ADD:
2549bf215546Sopenharmony_ci      handleADD(i);
2550bf215546Sopenharmony_ci      break;
2551bf215546Sopenharmony_ci   case OP_MUL:
2552bf215546Sopenharmony_ci   case OP_MAD:
2553bf215546Sopenharmony_ci   case OP_FMA:
2554bf215546Sopenharmony_ci      handleMULMAD(i);
2555bf215546Sopenharmony_ci      break;
2556bf215546Sopenharmony_ci   default:
2557bf215546Sopenharmony_ci      break;
2558bf215546Sopenharmony_ci   }
2559bf215546Sopenharmony_ci
2560bf215546Sopenharmony_ci   return true;
2561bf215546Sopenharmony_ci}
2562bf215546Sopenharmony_ci
2563bf215546Sopenharmony_ci// =============================================================================
2564bf215546Sopenharmony_ci
2565bf215546Sopenharmony_ci// Split 64-bit MUL and MAD
2566bf215546Sopenharmony_ciclass Split64BitOpPreRA : public Pass
2567bf215546Sopenharmony_ci{
2568bf215546Sopenharmony_ciprivate:
2569bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
2570bf215546Sopenharmony_ci   void split64MulMad(Function *, Instruction *, DataType);
2571bf215546Sopenharmony_ci
2572bf215546Sopenharmony_ci   BuildUtil bld;
2573bf215546Sopenharmony_ci};
2574bf215546Sopenharmony_ci
2575bf215546Sopenharmony_cibool
2576bf215546Sopenharmony_ciSplit64BitOpPreRA::visit(BasicBlock *bb)
2577bf215546Sopenharmony_ci{
2578bf215546Sopenharmony_ci   Instruction *i, *next;
2579bf215546Sopenharmony_ci   Modifier mod;
2580bf215546Sopenharmony_ci
2581bf215546Sopenharmony_ci   for (i = bb->getEntry(); i; i = next) {
2582bf215546Sopenharmony_ci      next = i->next;
2583bf215546Sopenharmony_ci
2584bf215546Sopenharmony_ci      DataType hTy;
2585bf215546Sopenharmony_ci      switch (i->dType) {
2586bf215546Sopenharmony_ci      case TYPE_U64: hTy = TYPE_U32; break;
2587bf215546Sopenharmony_ci      case TYPE_S64: hTy = TYPE_S32; break;
2588bf215546Sopenharmony_ci      default:
2589bf215546Sopenharmony_ci         continue;
2590bf215546Sopenharmony_ci      }
2591bf215546Sopenharmony_ci
2592bf215546Sopenharmony_ci      if (i->op == OP_MAD || i->op == OP_MUL)
2593bf215546Sopenharmony_ci         split64MulMad(func, i, hTy);
2594bf215546Sopenharmony_ci   }
2595bf215546Sopenharmony_ci
2596bf215546Sopenharmony_ci   return true;
2597bf215546Sopenharmony_ci}
2598bf215546Sopenharmony_ci
2599bf215546Sopenharmony_civoid
2600bf215546Sopenharmony_ciSplit64BitOpPreRA::split64MulMad(Function *fn, Instruction *i, DataType hTy)
2601bf215546Sopenharmony_ci{
2602bf215546Sopenharmony_ci   assert(i->op == OP_MAD || i->op == OP_MUL);
2603bf215546Sopenharmony_ci   assert(!isFloatType(i->dType) && !isFloatType(i->sType));
2604bf215546Sopenharmony_ci   assert(typeSizeof(hTy) == 4);
2605bf215546Sopenharmony_ci
2606bf215546Sopenharmony_ci   bld.setPosition(i, true);
2607bf215546Sopenharmony_ci
2608bf215546Sopenharmony_ci   Value *zero = bld.mkImm(0u);
2609bf215546Sopenharmony_ci   Value *carry = bld.getSSA(1, FILE_FLAGS);
2610bf215546Sopenharmony_ci
2611bf215546Sopenharmony_ci   // We want to compute `d = a * b (+ c)?`, where a, b, c and d are 64-bit
2612bf215546Sopenharmony_ci   // values (a, b and c might be 32-bit values), using 32-bit operations. This
2613bf215546Sopenharmony_ci   // gives the following operations:
2614bf215546Sopenharmony_ci   // * `d.low = low(a.low * b.low) (+ c.low)?`
2615bf215546Sopenharmony_ci   // * `d.high = low(a.high * b.low) + low(a.low * b.high)
2616bf215546Sopenharmony_ci   //           + high(a.low * b.low) (+ c.high)?`
2617bf215546Sopenharmony_ci   //
2618bf215546Sopenharmony_ci   // To compute the high bits, we can split in the following operations:
2619bf215546Sopenharmony_ci   // * `tmp1   = low(a.high * b.low) (+ c.high)?`
2620bf215546Sopenharmony_ci   // * `tmp2   = low(a.low * b.high) + tmp1`
2621bf215546Sopenharmony_ci   // * `d.high = high(a.low * b.low) + tmp2`
2622bf215546Sopenharmony_ci   //
2623bf215546Sopenharmony_ci   // mkSplit put lower bits at index 0 and higher bits at index 1
2624bf215546Sopenharmony_ci
2625bf215546Sopenharmony_ci   Value *op1[2];
2626bf215546Sopenharmony_ci   if (i->getSrc(0)->reg.size == 8)
2627bf215546Sopenharmony_ci      bld.mkSplit(op1, 4, i->getSrc(0));
2628bf215546Sopenharmony_ci   else {
2629bf215546Sopenharmony_ci      op1[0] = i->getSrc(0);
2630bf215546Sopenharmony_ci      op1[1] = zero;
2631bf215546Sopenharmony_ci   }
2632bf215546Sopenharmony_ci   Value *op2[2];
2633bf215546Sopenharmony_ci   if (i->getSrc(1)->reg.size == 8)
2634bf215546Sopenharmony_ci      bld.mkSplit(op2, 4, i->getSrc(1));
2635bf215546Sopenharmony_ci   else {
2636bf215546Sopenharmony_ci      op2[0] = i->getSrc(1);
2637bf215546Sopenharmony_ci      op2[1] = zero;
2638bf215546Sopenharmony_ci   }
2639bf215546Sopenharmony_ci
2640bf215546Sopenharmony_ci   Value *op3[2] = { NULL, NULL };
2641bf215546Sopenharmony_ci   if (i->op == OP_MAD) {
2642bf215546Sopenharmony_ci      if (i->getSrc(2)->reg.size == 8)
2643bf215546Sopenharmony_ci         bld.mkSplit(op3, 4, i->getSrc(2));
2644bf215546Sopenharmony_ci      else {
2645bf215546Sopenharmony_ci         op3[0] = i->getSrc(2);
2646bf215546Sopenharmony_ci         op3[1] = zero;
2647bf215546Sopenharmony_ci      }
2648bf215546Sopenharmony_ci   }
2649bf215546Sopenharmony_ci
2650bf215546Sopenharmony_ci   Value *tmpRes1Hi = bld.getSSA();
2651bf215546Sopenharmony_ci   if (i->op == OP_MAD)
2652bf215546Sopenharmony_ci      bld.mkOp3(OP_MAD, hTy, tmpRes1Hi, op1[1], op2[0], op3[1]);
2653bf215546Sopenharmony_ci   else
2654bf215546Sopenharmony_ci      bld.mkOp2(OP_MUL, hTy, tmpRes1Hi, op1[1], op2[0]);
2655bf215546Sopenharmony_ci
2656bf215546Sopenharmony_ci   Value *tmpRes2Hi = bld.mkOp3v(OP_MAD, hTy, bld.getSSA(), op1[0], op2[1], tmpRes1Hi);
2657bf215546Sopenharmony_ci
2658bf215546Sopenharmony_ci   Value *def[2] = { bld.getSSA(), bld.getSSA() };
2659bf215546Sopenharmony_ci
2660bf215546Sopenharmony_ci   // If it was a MAD, add the carry from the low bits
2661bf215546Sopenharmony_ci   // It is not needed if it was a MUL, since we added high(a.low * b.low) to
2662bf215546Sopenharmony_ci   // d.high
2663bf215546Sopenharmony_ci   if (i->op == OP_MAD)
2664bf215546Sopenharmony_ci      bld.mkOp3(OP_MAD, hTy, def[0], op1[0], op2[0], op3[0])->setFlagsDef(1, carry);
2665bf215546Sopenharmony_ci   else
2666bf215546Sopenharmony_ci      bld.mkOp2(OP_MUL, hTy, def[0], op1[0], op2[0]);
2667bf215546Sopenharmony_ci
2668bf215546Sopenharmony_ci   Instruction *hiPart3 = bld.mkOp3(OP_MAD, hTy, def[1], op1[0], op2[0], tmpRes2Hi);
2669bf215546Sopenharmony_ci   hiPart3->subOp = NV50_IR_SUBOP_MUL_HIGH;
2670bf215546Sopenharmony_ci   if (i->op == OP_MAD)
2671bf215546Sopenharmony_ci      hiPart3->setFlagsSrc(3, carry);
2672bf215546Sopenharmony_ci
2673bf215546Sopenharmony_ci   bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]);
2674bf215546Sopenharmony_ci
2675bf215546Sopenharmony_ci   delete_Instruction(fn->getProgram(), i);
2676bf215546Sopenharmony_ci}
2677bf215546Sopenharmony_ci
2678bf215546Sopenharmony_ci// =============================================================================
2679bf215546Sopenharmony_ci
2680bf215546Sopenharmony_cistatic inline void
2681bf215546Sopenharmony_ciupdateLdStOffset(Instruction *ldst, int32_t offset, Function *fn)
2682bf215546Sopenharmony_ci{
2683bf215546Sopenharmony_ci   if (offset != ldst->getSrc(0)->reg.data.offset) {
2684bf215546Sopenharmony_ci      if (ldst->getSrc(0)->refCount() > 1)
2685bf215546Sopenharmony_ci         ldst->setSrc(0, cloneShallow(fn, ldst->getSrc(0)));
2686bf215546Sopenharmony_ci      ldst->getSrc(0)->reg.data.offset = offset;
2687bf215546Sopenharmony_ci   }
2688bf215546Sopenharmony_ci}
2689bf215546Sopenharmony_ci
2690bf215546Sopenharmony_ci// Combine loads and stores, forward stores to loads where possible.
2691bf215546Sopenharmony_ciclass MemoryOpt : public Pass
2692bf215546Sopenharmony_ci{
2693bf215546Sopenharmony_ciprivate:
2694bf215546Sopenharmony_ci   class Record
2695bf215546Sopenharmony_ci   {
2696bf215546Sopenharmony_ci   public:
2697bf215546Sopenharmony_ci      Record *next;
2698bf215546Sopenharmony_ci      Instruction *insn;
2699bf215546Sopenharmony_ci      const Value *rel[2];
2700bf215546Sopenharmony_ci      const Value *base;
2701bf215546Sopenharmony_ci      int32_t offset;
2702bf215546Sopenharmony_ci      int8_t fileIndex;
2703bf215546Sopenharmony_ci      uint8_t size;
2704bf215546Sopenharmony_ci      bool locked;
2705bf215546Sopenharmony_ci      Record *prev;
2706bf215546Sopenharmony_ci
2707bf215546Sopenharmony_ci      bool overlaps(const Instruction *ldst) const;
2708bf215546Sopenharmony_ci
2709bf215546Sopenharmony_ci      inline void link(Record **);
2710bf215546Sopenharmony_ci      inline void unlink(Record **);
2711bf215546Sopenharmony_ci      inline void set(const Instruction *ldst);
2712bf215546Sopenharmony_ci   };
2713bf215546Sopenharmony_ci
2714bf215546Sopenharmony_cipublic:
2715bf215546Sopenharmony_ci   MemoryOpt();
2716bf215546Sopenharmony_ci
2717bf215546Sopenharmony_ci   Record *loads[DATA_FILE_COUNT];
2718bf215546Sopenharmony_ci   Record *stores[DATA_FILE_COUNT];
2719bf215546Sopenharmony_ci
2720bf215546Sopenharmony_ci   MemoryPool recordPool;
2721bf215546Sopenharmony_ci
2722bf215546Sopenharmony_ciprivate:
2723bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
2724bf215546Sopenharmony_ci   bool runOpt(BasicBlock *);
2725bf215546Sopenharmony_ci
2726bf215546Sopenharmony_ci   Record **getList(const Instruction *);
2727bf215546Sopenharmony_ci
2728bf215546Sopenharmony_ci   Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const;
2729bf215546Sopenharmony_ci
2730bf215546Sopenharmony_ci   // merge @insn into load/store instruction from @rec
2731bf215546Sopenharmony_ci   bool combineLd(Record *rec, Instruction *ld);
2732bf215546Sopenharmony_ci   bool combineSt(Record *rec, Instruction *st);
2733bf215546Sopenharmony_ci
2734bf215546Sopenharmony_ci   bool replaceLdFromLd(Instruction *ld, Record *ldRec);
2735bf215546Sopenharmony_ci   bool replaceLdFromSt(Instruction *ld, Record *stRec);
2736bf215546Sopenharmony_ci   bool replaceStFromSt(Instruction *restrict st, Record *stRec);
2737bf215546Sopenharmony_ci
2738bf215546Sopenharmony_ci   void addRecord(Instruction *ldst);
2739bf215546Sopenharmony_ci   void purgeRecords(Instruction *const st, DataFile);
2740bf215546Sopenharmony_ci   void lockStores(Instruction *const ld);
2741bf215546Sopenharmony_ci   void reset();
2742bf215546Sopenharmony_ci
2743bf215546Sopenharmony_ciprivate:
2744bf215546Sopenharmony_ci   Record *prevRecord;
2745bf215546Sopenharmony_ci};
2746bf215546Sopenharmony_ci
2747bf215546Sopenharmony_ciMemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6)
2748bf215546Sopenharmony_ci{
2749bf215546Sopenharmony_ci   for (int i = 0; i < DATA_FILE_COUNT; ++i) {
2750bf215546Sopenharmony_ci      loads[i] = NULL;
2751bf215546Sopenharmony_ci      stores[i] = NULL;
2752bf215546Sopenharmony_ci   }
2753bf215546Sopenharmony_ci   prevRecord = NULL;
2754bf215546Sopenharmony_ci}
2755bf215546Sopenharmony_ci
2756bf215546Sopenharmony_civoid
2757bf215546Sopenharmony_ciMemoryOpt::reset()
2758bf215546Sopenharmony_ci{
2759bf215546Sopenharmony_ci   for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) {
2760bf215546Sopenharmony_ci      Record *it, *next;
2761bf215546Sopenharmony_ci      for (it = loads[i]; it; it = next) {
2762bf215546Sopenharmony_ci         next = it->next;
2763bf215546Sopenharmony_ci         recordPool.release(it);
2764bf215546Sopenharmony_ci      }
2765bf215546Sopenharmony_ci      loads[i] = NULL;
2766bf215546Sopenharmony_ci      for (it = stores[i]; it; it = next) {
2767bf215546Sopenharmony_ci         next = it->next;
2768bf215546Sopenharmony_ci         recordPool.release(it);
2769bf215546Sopenharmony_ci      }
2770bf215546Sopenharmony_ci      stores[i] = NULL;
2771bf215546Sopenharmony_ci   }
2772bf215546Sopenharmony_ci}
2773bf215546Sopenharmony_ci
2774bf215546Sopenharmony_cibool
2775bf215546Sopenharmony_ciMemoryOpt::combineLd(Record *rec, Instruction *ld)
2776bf215546Sopenharmony_ci{
2777bf215546Sopenharmony_ci   int32_t offRc = rec->offset;
2778bf215546Sopenharmony_ci   int32_t offLd = ld->getSrc(0)->reg.data.offset;
2779bf215546Sopenharmony_ci   int sizeRc = rec->size;
2780bf215546Sopenharmony_ci   int sizeLd = typeSizeof(ld->dType);
2781bf215546Sopenharmony_ci   int size = sizeRc + sizeLd;
2782bf215546Sopenharmony_ci   int d, j;
2783bf215546Sopenharmony_ci
2784bf215546Sopenharmony_ci   if (!prog->getTarget()->
2785bf215546Sopenharmony_ci       isAccessSupported(ld->getSrc(0)->reg.file, typeOfSize(size)))
2786bf215546Sopenharmony_ci      return false;
2787bf215546Sopenharmony_ci   // no unaligned loads
2788bf215546Sopenharmony_ci   if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) ||
2789bf215546Sopenharmony_ci       ((size == 0xc) && (MIN2(offLd, offRc) & 0xf)))
2790bf215546Sopenharmony_ci      return false;
2791bf215546Sopenharmony_ci   // for compute indirect loads are not guaranteed to be aligned
2792bf215546Sopenharmony_ci   if (prog->getType() == Program::TYPE_COMPUTE && rec->rel[0])
2793bf215546Sopenharmony_ci      return false;
2794bf215546Sopenharmony_ci
2795bf215546Sopenharmony_ci   assert(sizeRc + sizeLd <= 16 && offRc != offLd);
2796bf215546Sopenharmony_ci
2797bf215546Sopenharmony_ci   // lock any stores that overlap with the load being merged into the
2798bf215546Sopenharmony_ci   // existing record.
2799bf215546Sopenharmony_ci   lockStores(ld);
2800bf215546Sopenharmony_ci
2801bf215546Sopenharmony_ci   for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j);
2802bf215546Sopenharmony_ci
2803bf215546Sopenharmony_ci   if (offLd < offRc) {
2804bf215546Sopenharmony_ci      int sz;
2805bf215546Sopenharmony_ci      for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d);
2806bf215546Sopenharmony_ci      // d: nr of definitions in ld
2807bf215546Sopenharmony_ci      // j: nr of definitions in rec->insn, move:
2808bf215546Sopenharmony_ci      for (d = d + j - 1; j > 0; --j, --d)
2809bf215546Sopenharmony_ci         rec->insn->setDef(d, rec->insn->getDef(j - 1));
2810bf215546Sopenharmony_ci
2811bf215546Sopenharmony_ci      if (rec->insn->getSrc(0)->refCount() > 1)
2812bf215546Sopenharmony_ci         rec->insn->setSrc(0, cloneShallow(func, rec->insn->getSrc(0)));
2813bf215546Sopenharmony_ci      rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd;
2814bf215546Sopenharmony_ci
2815bf215546Sopenharmony_ci      d = 0;
2816bf215546Sopenharmony_ci   } else {
2817bf215546Sopenharmony_ci      d = j;
2818bf215546Sopenharmony_ci   }
2819bf215546Sopenharmony_ci   // move definitions of @ld to @rec->insn
2820bf215546Sopenharmony_ci   for (j = 0; sizeLd; ++j, ++d) {
2821bf215546Sopenharmony_ci      sizeLd -= ld->getDef(j)->reg.size;
2822bf215546Sopenharmony_ci      rec->insn->setDef(d, ld->getDef(j));
2823bf215546Sopenharmony_ci   }
2824bf215546Sopenharmony_ci
2825bf215546Sopenharmony_ci   rec->size = size;
2826bf215546Sopenharmony_ci   rec->insn->getSrc(0)->reg.size = size;
2827bf215546Sopenharmony_ci   rec->insn->setType(typeOfSize(size));
2828bf215546Sopenharmony_ci
2829bf215546Sopenharmony_ci   delete_Instruction(prog, ld);
2830bf215546Sopenharmony_ci
2831bf215546Sopenharmony_ci   return true;
2832bf215546Sopenharmony_ci}
2833bf215546Sopenharmony_ci
2834bf215546Sopenharmony_cibool
2835bf215546Sopenharmony_ciMemoryOpt::combineSt(Record *rec, Instruction *st)
2836bf215546Sopenharmony_ci{
2837bf215546Sopenharmony_ci   int32_t offRc = rec->offset;
2838bf215546Sopenharmony_ci   int32_t offSt = st->getSrc(0)->reg.data.offset;
2839bf215546Sopenharmony_ci   int sizeRc = rec->size;
2840bf215546Sopenharmony_ci   int sizeSt = typeSizeof(st->dType);
2841bf215546Sopenharmony_ci   int s = sizeSt / 4;
2842bf215546Sopenharmony_ci   int size = sizeRc + sizeSt;
2843bf215546Sopenharmony_ci   int j, k;
2844bf215546Sopenharmony_ci   Value *src[4]; // no modifiers in ValueRef allowed for st
2845bf215546Sopenharmony_ci   Value *extra[3];
2846bf215546Sopenharmony_ci
2847bf215546Sopenharmony_ci   if (!prog->getTarget()->
2848bf215546Sopenharmony_ci       isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size)))
2849bf215546Sopenharmony_ci      return false;
2850bf215546Sopenharmony_ci   // no unaligned stores
2851bf215546Sopenharmony_ci   if (size == 8 && MIN2(offRc, offSt) & 0x7)
2852bf215546Sopenharmony_ci      return false;
2853bf215546Sopenharmony_ci   // for compute indirect stores are not guaranteed to be aligned
2854bf215546Sopenharmony_ci   if (prog->getType() == Program::TYPE_COMPUTE && rec->rel[0])
2855bf215546Sopenharmony_ci      return false;
2856bf215546Sopenharmony_ci
2857bf215546Sopenharmony_ci   // There's really no great place to put this in a generic manner. Seemingly
2858bf215546Sopenharmony_ci   // wide stores at 0x60 don't work in GS shaders on SM50+. Don't combine
2859bf215546Sopenharmony_ci   // those.
2860bf215546Sopenharmony_ci   if (prog->getTarget()->getChipset() >= NVISA_GM107_CHIPSET &&
2861bf215546Sopenharmony_ci       prog->getType() == Program::TYPE_GEOMETRY &&
2862bf215546Sopenharmony_ci       st->getSrc(0)->reg.file == FILE_SHADER_OUTPUT &&
2863bf215546Sopenharmony_ci       rec->rel[0] == NULL &&
2864bf215546Sopenharmony_ci       MIN2(offRc, offSt) == 0x60)
2865bf215546Sopenharmony_ci      return false;
2866bf215546Sopenharmony_ci
2867bf215546Sopenharmony_ci   // remove any existing load/store records for the store being merged into
2868bf215546Sopenharmony_ci   // the existing record.
2869bf215546Sopenharmony_ci   purgeRecords(st, DATA_FILE_COUNT);
2870bf215546Sopenharmony_ci
2871bf215546Sopenharmony_ci   st->takeExtraSources(0, extra); // save predicate and indirect address
2872bf215546Sopenharmony_ci
2873bf215546Sopenharmony_ci   if (offRc < offSt) {
2874bf215546Sopenharmony_ci      // save values from @st
2875bf215546Sopenharmony_ci      for (s = 0; sizeSt; ++s) {
2876bf215546Sopenharmony_ci         sizeSt -= st->getSrc(s + 1)->reg.size;
2877bf215546Sopenharmony_ci         src[s] = st->getSrc(s + 1);
2878bf215546Sopenharmony_ci      }
2879bf215546Sopenharmony_ci      // set record's values as low sources of @st
2880bf215546Sopenharmony_ci      for (j = 1; sizeRc; ++j) {
2881bf215546Sopenharmony_ci         sizeRc -= rec->insn->getSrc(j)->reg.size;
2882bf215546Sopenharmony_ci         st->setSrc(j, rec->insn->getSrc(j));
2883bf215546Sopenharmony_ci      }
2884bf215546Sopenharmony_ci      // set saved values as high sources of @st
2885bf215546Sopenharmony_ci      for (k = j, j = 0; j < s; ++j)
2886bf215546Sopenharmony_ci         st->setSrc(k++, src[j]);
2887bf215546Sopenharmony_ci
2888bf215546Sopenharmony_ci      updateLdStOffset(st, offRc, func);
2889bf215546Sopenharmony_ci   } else {
2890bf215546Sopenharmony_ci      for (j = 1; sizeSt; ++j)
2891bf215546Sopenharmony_ci         sizeSt -= st->getSrc(j)->reg.size;
2892bf215546Sopenharmony_ci      for (s = 1; sizeRc; ++j, ++s) {
2893bf215546Sopenharmony_ci         sizeRc -= rec->insn->getSrc(s)->reg.size;
2894bf215546Sopenharmony_ci         st->setSrc(j, rec->insn->getSrc(s));
2895bf215546Sopenharmony_ci      }
2896bf215546Sopenharmony_ci      rec->offset = offSt;
2897bf215546Sopenharmony_ci   }
2898bf215546Sopenharmony_ci   st->putExtraSources(0, extra); // restore pointer and predicate
2899bf215546Sopenharmony_ci
2900bf215546Sopenharmony_ci   delete_Instruction(prog, rec->insn);
2901bf215546Sopenharmony_ci   rec->insn = st;
2902bf215546Sopenharmony_ci   rec->size = size;
2903bf215546Sopenharmony_ci   rec->insn->getSrc(0)->reg.size = size;
2904bf215546Sopenharmony_ci   rec->insn->setType(typeOfSize(size));
2905bf215546Sopenharmony_ci   return true;
2906bf215546Sopenharmony_ci}
2907bf215546Sopenharmony_ci
2908bf215546Sopenharmony_civoid
2909bf215546Sopenharmony_ciMemoryOpt::Record::set(const Instruction *ldst)
2910bf215546Sopenharmony_ci{
2911bf215546Sopenharmony_ci   const Symbol *mem = ldst->getSrc(0)->asSym();
2912bf215546Sopenharmony_ci   fileIndex = mem->reg.fileIndex;
2913bf215546Sopenharmony_ci   rel[0] = ldst->getIndirect(0, 0);
2914bf215546Sopenharmony_ci   rel[1] = ldst->getIndirect(0, 1);
2915bf215546Sopenharmony_ci   offset = mem->reg.data.offset;
2916bf215546Sopenharmony_ci   base = mem->getBase();
2917bf215546Sopenharmony_ci   size = typeSizeof(ldst->sType);
2918bf215546Sopenharmony_ci}
2919bf215546Sopenharmony_ci
2920bf215546Sopenharmony_civoid
2921bf215546Sopenharmony_ciMemoryOpt::Record::link(Record **list)
2922bf215546Sopenharmony_ci{
2923bf215546Sopenharmony_ci   next = *list;
2924bf215546Sopenharmony_ci   if (next)
2925bf215546Sopenharmony_ci      next->prev = this;
2926bf215546Sopenharmony_ci   prev = NULL;
2927bf215546Sopenharmony_ci   *list = this;
2928bf215546Sopenharmony_ci}
2929bf215546Sopenharmony_ci
2930bf215546Sopenharmony_civoid
2931bf215546Sopenharmony_ciMemoryOpt::Record::unlink(Record **list)
2932bf215546Sopenharmony_ci{
2933bf215546Sopenharmony_ci   if (next)
2934bf215546Sopenharmony_ci      next->prev = prev;
2935bf215546Sopenharmony_ci   if (prev)
2936bf215546Sopenharmony_ci      prev->next = next;
2937bf215546Sopenharmony_ci   else
2938bf215546Sopenharmony_ci      *list = next;
2939bf215546Sopenharmony_ci}
2940bf215546Sopenharmony_ci
2941bf215546Sopenharmony_ciMemoryOpt::Record **
2942bf215546Sopenharmony_ciMemoryOpt::getList(const Instruction *insn)
2943bf215546Sopenharmony_ci{
2944bf215546Sopenharmony_ci   if (insn->op == OP_LOAD || insn->op == OP_VFETCH)
2945bf215546Sopenharmony_ci      return &loads[insn->src(0).getFile()];
2946bf215546Sopenharmony_ci   return &stores[insn->src(0).getFile()];
2947bf215546Sopenharmony_ci}
2948bf215546Sopenharmony_ci
2949bf215546Sopenharmony_civoid
2950bf215546Sopenharmony_ciMemoryOpt::addRecord(Instruction *i)
2951bf215546Sopenharmony_ci{
2952bf215546Sopenharmony_ci   Record **list = getList(i);
2953bf215546Sopenharmony_ci   Record *it = reinterpret_cast<Record *>(recordPool.allocate());
2954bf215546Sopenharmony_ci
2955bf215546Sopenharmony_ci   it->link(list);
2956bf215546Sopenharmony_ci   it->set(i);
2957bf215546Sopenharmony_ci   it->insn = i;
2958bf215546Sopenharmony_ci   it->locked = false;
2959bf215546Sopenharmony_ci}
2960bf215546Sopenharmony_ci
2961bf215546Sopenharmony_ciMemoryOpt::Record *
2962bf215546Sopenharmony_ciMemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const
2963bf215546Sopenharmony_ci{
2964bf215546Sopenharmony_ci   const Symbol *sym = insn->getSrc(0)->asSym();
2965bf215546Sopenharmony_ci   const int size = typeSizeof(insn->sType);
2966bf215546Sopenharmony_ci   Record *rec = NULL;
2967bf215546Sopenharmony_ci   Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file];
2968bf215546Sopenharmony_ci
2969bf215546Sopenharmony_ci   for (; it; it = it->next) {
2970bf215546Sopenharmony_ci      if (it->locked && insn->op != OP_LOAD && insn->op != OP_VFETCH)
2971bf215546Sopenharmony_ci         continue;
2972bf215546Sopenharmony_ci      if ((it->offset >> 4) != (sym->reg.data.offset >> 4) ||
2973bf215546Sopenharmony_ci          it->rel[0] != insn->getIndirect(0, 0) ||
2974bf215546Sopenharmony_ci          it->fileIndex != sym->reg.fileIndex ||
2975bf215546Sopenharmony_ci          it->rel[1] != insn->getIndirect(0, 1))
2976bf215546Sopenharmony_ci         continue;
2977bf215546Sopenharmony_ci
2978bf215546Sopenharmony_ci      if (it->offset < sym->reg.data.offset) {
2979bf215546Sopenharmony_ci         if (it->offset + it->size >= sym->reg.data.offset) {
2980bf215546Sopenharmony_ci            isAdj = (it->offset + it->size == sym->reg.data.offset);
2981bf215546Sopenharmony_ci            if (!isAdj)
2982bf215546Sopenharmony_ci               return it;
2983bf215546Sopenharmony_ci            if (!(it->offset & 0x7))
2984bf215546Sopenharmony_ci               rec = it;
2985bf215546Sopenharmony_ci         }
2986bf215546Sopenharmony_ci      } else {
2987bf215546Sopenharmony_ci         isAdj = it->offset != sym->reg.data.offset;
2988bf215546Sopenharmony_ci         if (size <= it->size && !isAdj)
2989bf215546Sopenharmony_ci            return it;
2990bf215546Sopenharmony_ci         else
2991bf215546Sopenharmony_ci         if (!(sym->reg.data.offset & 0x7))
2992bf215546Sopenharmony_ci            if (it->offset - size <= sym->reg.data.offset)
2993bf215546Sopenharmony_ci               rec = it;
2994bf215546Sopenharmony_ci      }
2995bf215546Sopenharmony_ci   }
2996bf215546Sopenharmony_ci   return rec;
2997bf215546Sopenharmony_ci}
2998bf215546Sopenharmony_ci
2999bf215546Sopenharmony_cibool
3000bf215546Sopenharmony_ciMemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec)
3001bf215546Sopenharmony_ci{
3002bf215546Sopenharmony_ci   Instruction *st = rec->insn;
3003bf215546Sopenharmony_ci   int32_t offSt = rec->offset;
3004bf215546Sopenharmony_ci   int32_t offLd = ld->getSrc(0)->reg.data.offset;
3005bf215546Sopenharmony_ci   int d, s;
3006bf215546Sopenharmony_ci
3007bf215546Sopenharmony_ci   for (s = 1; offSt != offLd && st->srcExists(s); ++s)
3008bf215546Sopenharmony_ci      offSt += st->getSrc(s)->reg.size;
3009bf215546Sopenharmony_ci   if (offSt != offLd)
3010bf215546Sopenharmony_ci      return false;
3011bf215546Sopenharmony_ci
3012bf215546Sopenharmony_ci   for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) {
3013bf215546Sopenharmony_ci      if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size)
3014bf215546Sopenharmony_ci         return false;
3015bf215546Sopenharmony_ci      if (st->getSrc(s)->reg.file != FILE_GPR)
3016bf215546Sopenharmony_ci         return false;
3017bf215546Sopenharmony_ci      ld->def(d).replace(st->src(s), false);
3018bf215546Sopenharmony_ci   }
3019bf215546Sopenharmony_ci   ld->bb->remove(ld);
3020bf215546Sopenharmony_ci   return true;
3021bf215546Sopenharmony_ci}
3022bf215546Sopenharmony_ci
3023bf215546Sopenharmony_cibool
3024bf215546Sopenharmony_ciMemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec)
3025bf215546Sopenharmony_ci{
3026bf215546Sopenharmony_ci   Instruction *ldR = rec->insn;
3027bf215546Sopenharmony_ci   int32_t offR = rec->offset;
3028bf215546Sopenharmony_ci   int32_t offE = ldE->getSrc(0)->reg.data.offset;
3029bf215546Sopenharmony_ci   int dR, dE;
3030bf215546Sopenharmony_ci
3031bf215546Sopenharmony_ci   assert(offR <= offE);
3032bf215546Sopenharmony_ci   for (dR = 0; offR < offE && ldR->defExists(dR); ++dR)
3033bf215546Sopenharmony_ci      offR += ldR->getDef(dR)->reg.size;
3034bf215546Sopenharmony_ci   if (offR != offE)
3035bf215546Sopenharmony_ci      return false;
3036bf215546Sopenharmony_ci
3037bf215546Sopenharmony_ci   for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) {
3038bf215546Sopenharmony_ci      if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size)
3039bf215546Sopenharmony_ci         return false;
3040bf215546Sopenharmony_ci      ldE->def(dE).replace(ldR->getDef(dR), false);
3041bf215546Sopenharmony_ci   }
3042bf215546Sopenharmony_ci
3043bf215546Sopenharmony_ci   delete_Instruction(prog, ldE);
3044bf215546Sopenharmony_ci   return true;
3045bf215546Sopenharmony_ci}
3046bf215546Sopenharmony_ci
3047bf215546Sopenharmony_cibool
3048bf215546Sopenharmony_ciMemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec)
3049bf215546Sopenharmony_ci{
3050bf215546Sopenharmony_ci   const Instruction *const ri = rec->insn;
3051bf215546Sopenharmony_ci   Value *extra[3];
3052bf215546Sopenharmony_ci
3053bf215546Sopenharmony_ci   int32_t offS = st->getSrc(0)->reg.data.offset;
3054bf215546Sopenharmony_ci   int32_t offR = rec->offset;
3055bf215546Sopenharmony_ci   int32_t endS = offS + typeSizeof(st->dType);
3056bf215546Sopenharmony_ci   int32_t endR = offR + typeSizeof(ri->dType);
3057bf215546Sopenharmony_ci
3058bf215546Sopenharmony_ci   rec->size = MAX2(endS, endR) - MIN2(offS, offR);
3059bf215546Sopenharmony_ci
3060bf215546Sopenharmony_ci   st->takeExtraSources(0, extra);
3061bf215546Sopenharmony_ci
3062bf215546Sopenharmony_ci   if (offR < offS) {
3063bf215546Sopenharmony_ci      Value *vals[10];
3064bf215546Sopenharmony_ci      int s, n;
3065bf215546Sopenharmony_ci      int k = 0;
3066bf215546Sopenharmony_ci      // get non-replaced sources of ri
3067bf215546Sopenharmony_ci      for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s)
3068bf215546Sopenharmony_ci         vals[k++] = ri->getSrc(s);
3069bf215546Sopenharmony_ci      n = s;
3070bf215546Sopenharmony_ci      // get replaced sources of st
3071bf215546Sopenharmony_ci      for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s)
3072bf215546Sopenharmony_ci         vals[k++] = st->getSrc(s);
3073bf215546Sopenharmony_ci      // skip replaced sources of ri
3074bf215546Sopenharmony_ci      for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s);
3075bf215546Sopenharmony_ci      // get non-replaced sources after values covered by st
3076bf215546Sopenharmony_ci      for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s)
3077bf215546Sopenharmony_ci         vals[k++] = ri->getSrc(s);
3078bf215546Sopenharmony_ci      assert((unsigned int)k <= ARRAY_SIZE(vals));
3079bf215546Sopenharmony_ci      for (s = 0; s < k; ++s)
3080bf215546Sopenharmony_ci         st->setSrc(s + 1, vals[s]);
3081bf215546Sopenharmony_ci      st->setSrc(0, ri->getSrc(0));
3082bf215546Sopenharmony_ci   } else
3083bf215546Sopenharmony_ci   if (endR > endS) {
3084bf215546Sopenharmony_ci      int j, s;
3085bf215546Sopenharmony_ci      for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size);
3086bf215546Sopenharmony_ci      for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size);
3087bf215546Sopenharmony_ci      for (; offR < endR; offR += ri->getSrc(j++)->reg.size)
3088bf215546Sopenharmony_ci         st->setSrc(s++, ri->getSrc(j));
3089bf215546Sopenharmony_ci   }
3090bf215546Sopenharmony_ci   st->putExtraSources(0, extra);
3091bf215546Sopenharmony_ci
3092bf215546Sopenharmony_ci   delete_Instruction(prog, rec->insn);
3093bf215546Sopenharmony_ci
3094bf215546Sopenharmony_ci   rec->insn = st;
3095bf215546Sopenharmony_ci   rec->offset = st->getSrc(0)->reg.data.offset;
3096bf215546Sopenharmony_ci
3097bf215546Sopenharmony_ci   st->setType(typeOfSize(rec->size));
3098bf215546Sopenharmony_ci
3099bf215546Sopenharmony_ci   return true;
3100bf215546Sopenharmony_ci}
3101bf215546Sopenharmony_ci
3102bf215546Sopenharmony_cibool
3103bf215546Sopenharmony_ciMemoryOpt::Record::overlaps(const Instruction *ldst) const
3104bf215546Sopenharmony_ci{
3105bf215546Sopenharmony_ci   Record that;
3106bf215546Sopenharmony_ci   that.set(ldst);
3107bf215546Sopenharmony_ci
3108bf215546Sopenharmony_ci   // This assumes that images/buffers can't overlap. They can.
3109bf215546Sopenharmony_ci   // TODO: Plumb the restrict logic through, and only skip when it's a
3110bf215546Sopenharmony_ci   // restrict situation, or there can implicitly be no writes.
3111bf215546Sopenharmony_ci   if (this->fileIndex != that.fileIndex && this->rel[1] == that.rel[1])
3112bf215546Sopenharmony_ci      return false;
3113bf215546Sopenharmony_ci
3114bf215546Sopenharmony_ci   if (this->rel[0] || that.rel[0])
3115bf215546Sopenharmony_ci      return this->base == that.base;
3116bf215546Sopenharmony_ci
3117bf215546Sopenharmony_ci   return
3118bf215546Sopenharmony_ci      (this->offset < that.offset + that.size) &&
3119bf215546Sopenharmony_ci      (this->offset + this->size > that.offset);
3120bf215546Sopenharmony_ci}
3121bf215546Sopenharmony_ci
3122bf215546Sopenharmony_ci// We must not eliminate stores that affect the result of @ld if
3123bf215546Sopenharmony_ci// we find later stores to the same location, and we may no longer
3124bf215546Sopenharmony_ci// merge them with later stores.
3125bf215546Sopenharmony_ci// The stored value can, however, still be used to determine the value
3126bf215546Sopenharmony_ci// returned by future loads.
3127bf215546Sopenharmony_civoid
3128bf215546Sopenharmony_ciMemoryOpt::lockStores(Instruction *const ld)
3129bf215546Sopenharmony_ci{
3130bf215546Sopenharmony_ci   for (Record *r = stores[ld->src(0).getFile()]; r; r = r->next)
3131bf215546Sopenharmony_ci      if (!r->locked && r->overlaps(ld))
3132bf215546Sopenharmony_ci         r->locked = true;
3133bf215546Sopenharmony_ci}
3134bf215546Sopenharmony_ci
3135bf215546Sopenharmony_ci// Prior loads from the location of @st are no longer valid.
3136bf215546Sopenharmony_ci// Stores to the location of @st may no longer be used to derive
3137bf215546Sopenharmony_ci// the value at it nor be coalesced into later stores.
3138bf215546Sopenharmony_civoid
3139bf215546Sopenharmony_ciMemoryOpt::purgeRecords(Instruction *const st, DataFile f)
3140bf215546Sopenharmony_ci{
3141bf215546Sopenharmony_ci   if (st)
3142bf215546Sopenharmony_ci      f = st->src(0).getFile();
3143bf215546Sopenharmony_ci
3144bf215546Sopenharmony_ci   for (Record *r = loads[f]; r; r = r->next)
3145bf215546Sopenharmony_ci      if (!st || r->overlaps(st))
3146bf215546Sopenharmony_ci         r->unlink(&loads[f]);
3147bf215546Sopenharmony_ci
3148bf215546Sopenharmony_ci   for (Record *r = stores[f]; r; r = r->next)
3149bf215546Sopenharmony_ci      if (!st || r->overlaps(st))
3150bf215546Sopenharmony_ci         r->unlink(&stores[f]);
3151bf215546Sopenharmony_ci}
3152bf215546Sopenharmony_ci
3153bf215546Sopenharmony_cibool
3154bf215546Sopenharmony_ciMemoryOpt::visit(BasicBlock *bb)
3155bf215546Sopenharmony_ci{
3156bf215546Sopenharmony_ci   bool ret = runOpt(bb);
3157bf215546Sopenharmony_ci   // Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st
3158bf215546Sopenharmony_ci   // where 96 bit memory operations are forbidden.
3159bf215546Sopenharmony_ci   if (ret)
3160bf215546Sopenharmony_ci      ret = runOpt(bb);
3161bf215546Sopenharmony_ci   return ret;
3162bf215546Sopenharmony_ci}
3163bf215546Sopenharmony_ci
3164bf215546Sopenharmony_cibool
3165bf215546Sopenharmony_ciMemoryOpt::runOpt(BasicBlock *bb)
3166bf215546Sopenharmony_ci{
3167bf215546Sopenharmony_ci   Instruction *ldst, *next;
3168bf215546Sopenharmony_ci   Record *rec;
3169bf215546Sopenharmony_ci   bool isAdjacent = true;
3170bf215546Sopenharmony_ci
3171bf215546Sopenharmony_ci   for (ldst = bb->getEntry(); ldst; ldst = next) {
3172bf215546Sopenharmony_ci      bool keep = true;
3173bf215546Sopenharmony_ci      bool isLoad = true;
3174bf215546Sopenharmony_ci      next = ldst->next;
3175bf215546Sopenharmony_ci
3176bf215546Sopenharmony_ci      if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) {
3177bf215546Sopenharmony_ci         if (ldst->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
3178bf215546Sopenharmony_ci            purgeRecords(ldst, ldst->src(0).getFile());
3179bf215546Sopenharmony_ci            continue;
3180bf215546Sopenharmony_ci         }
3181bf215546Sopenharmony_ci         if (ldst->isDead()) {
3182bf215546Sopenharmony_ci            // might have been produced by earlier optimization
3183bf215546Sopenharmony_ci            delete_Instruction(prog, ldst);
3184bf215546Sopenharmony_ci            continue;
3185bf215546Sopenharmony_ci         }
3186bf215546Sopenharmony_ci      } else
3187bf215546Sopenharmony_ci      if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) {
3188bf215546Sopenharmony_ci         if (ldst->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) {
3189bf215546Sopenharmony_ci            purgeRecords(ldst, ldst->src(0).getFile());
3190bf215546Sopenharmony_ci            continue;
3191bf215546Sopenharmony_ci         }
3192bf215546Sopenharmony_ci         if (typeSizeof(ldst->dType) == 4 &&
3193bf215546Sopenharmony_ci             ldst->src(1).getFile() == FILE_GPR &&
3194bf215546Sopenharmony_ci             ldst->getSrc(1)->getInsn()->op == OP_NOP) {
3195bf215546Sopenharmony_ci            delete_Instruction(prog, ldst);
3196bf215546Sopenharmony_ci            continue;
3197bf215546Sopenharmony_ci         }
3198bf215546Sopenharmony_ci         isLoad = false;
3199bf215546Sopenharmony_ci      } else {
3200bf215546Sopenharmony_ci         // TODO: maybe have all fixed ops act as barrier ?
3201bf215546Sopenharmony_ci         if (ldst->op == OP_CALL ||
3202bf215546Sopenharmony_ci             ldst->op == OP_BAR ||
3203bf215546Sopenharmony_ci             ldst->op == OP_MEMBAR) {
3204bf215546Sopenharmony_ci            purgeRecords(NULL, FILE_MEMORY_LOCAL);
3205bf215546Sopenharmony_ci            purgeRecords(NULL, FILE_MEMORY_GLOBAL);
3206bf215546Sopenharmony_ci            purgeRecords(NULL, FILE_MEMORY_SHARED);
3207bf215546Sopenharmony_ci            purgeRecords(NULL, FILE_SHADER_OUTPUT);
3208bf215546Sopenharmony_ci         } else
3209bf215546Sopenharmony_ci         if (ldst->op == OP_ATOM || ldst->op == OP_CCTL) {
3210bf215546Sopenharmony_ci            if (ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) {
3211bf215546Sopenharmony_ci               purgeRecords(NULL, FILE_MEMORY_LOCAL);
3212bf215546Sopenharmony_ci               purgeRecords(NULL, FILE_MEMORY_GLOBAL);
3213bf215546Sopenharmony_ci               purgeRecords(NULL, FILE_MEMORY_SHARED);
3214bf215546Sopenharmony_ci            } else {
3215bf215546Sopenharmony_ci               purgeRecords(NULL, ldst->src(0).getFile());
3216bf215546Sopenharmony_ci            }
3217bf215546Sopenharmony_ci         } else
3218bf215546Sopenharmony_ci         if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) {
3219bf215546Sopenharmony_ci            purgeRecords(NULL, FILE_SHADER_OUTPUT);
3220bf215546Sopenharmony_ci         }
3221bf215546Sopenharmony_ci         continue;
3222bf215546Sopenharmony_ci      }
3223bf215546Sopenharmony_ci      if (ldst->getPredicate()) // TODO: handle predicated ld/st
3224bf215546Sopenharmony_ci         continue;
3225bf215546Sopenharmony_ci      if (ldst->perPatch) // TODO: create separate per-patch lists
3226bf215546Sopenharmony_ci         continue;
3227bf215546Sopenharmony_ci
3228bf215546Sopenharmony_ci      if (isLoad) {
3229bf215546Sopenharmony_ci         DataFile file = ldst->src(0).getFile();
3230bf215546Sopenharmony_ci
3231bf215546Sopenharmony_ci         // if ld l[]/g[] look for previous store to eliminate the reload
3232bf215546Sopenharmony_ci         if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) {
3233bf215546Sopenharmony_ci            // TODO: shared memory ?
3234bf215546Sopenharmony_ci            rec = findRecord(ldst, false, isAdjacent);
3235bf215546Sopenharmony_ci            if (rec && !isAdjacent)
3236bf215546Sopenharmony_ci               keep = !replaceLdFromSt(ldst, rec);
3237bf215546Sopenharmony_ci         }
3238bf215546Sopenharmony_ci
3239bf215546Sopenharmony_ci         // or look for ld from the same location and replace this one
3240bf215546Sopenharmony_ci         rec = keep ? findRecord(ldst, true, isAdjacent) : NULL;
3241bf215546Sopenharmony_ci         if (rec) {
3242bf215546Sopenharmony_ci            if (!isAdjacent)
3243bf215546Sopenharmony_ci               keep = !replaceLdFromLd(ldst, rec);
3244bf215546Sopenharmony_ci            else
3245bf215546Sopenharmony_ci               // or combine a previous load with this one
3246bf215546Sopenharmony_ci               keep = !combineLd(rec, ldst);
3247bf215546Sopenharmony_ci         }
3248bf215546Sopenharmony_ci         if (keep)
3249bf215546Sopenharmony_ci            lockStores(ldst);
3250bf215546Sopenharmony_ci      } else {
3251bf215546Sopenharmony_ci         rec = findRecord(ldst, false, isAdjacent);
3252bf215546Sopenharmony_ci         if (rec) {
3253bf215546Sopenharmony_ci            if (!isAdjacent)
3254bf215546Sopenharmony_ci               keep = !replaceStFromSt(ldst, rec);
3255bf215546Sopenharmony_ci            else
3256bf215546Sopenharmony_ci               keep = !combineSt(rec, ldst);
3257bf215546Sopenharmony_ci         }
3258bf215546Sopenharmony_ci         if (keep)
3259bf215546Sopenharmony_ci            purgeRecords(ldst, DATA_FILE_COUNT);
3260bf215546Sopenharmony_ci      }
3261bf215546Sopenharmony_ci      if (keep)
3262bf215546Sopenharmony_ci         addRecord(ldst);
3263bf215546Sopenharmony_ci   }
3264bf215546Sopenharmony_ci   reset();
3265bf215546Sopenharmony_ci
3266bf215546Sopenharmony_ci   return true;
3267bf215546Sopenharmony_ci}
3268bf215546Sopenharmony_ci
3269bf215546Sopenharmony_ci// =============================================================================
3270bf215546Sopenharmony_ci
3271bf215546Sopenharmony_ci// Turn control flow into predicated instructions (after register allocation !).
3272bf215546Sopenharmony_ci// TODO:
3273bf215546Sopenharmony_ci// Could move this to before register allocation on NVC0 and also handle nested
3274bf215546Sopenharmony_ci// constructs.
3275bf215546Sopenharmony_ciclass FlatteningPass : public Pass
3276bf215546Sopenharmony_ci{
3277bf215546Sopenharmony_cipublic:
3278bf215546Sopenharmony_ci   FlatteningPass() : gpr_unit(0) {}
3279bf215546Sopenharmony_ci
3280bf215546Sopenharmony_ciprivate:
3281bf215546Sopenharmony_ci   virtual bool visit(Function *);
3282bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
3283bf215546Sopenharmony_ci
3284bf215546Sopenharmony_ci   bool tryPredicateConditional(BasicBlock *);
3285bf215546Sopenharmony_ci   void predicateInstructions(BasicBlock *, Value *pred, CondCode cc);
3286bf215546Sopenharmony_ci   void tryPropagateBranch(BasicBlock *);
3287bf215546Sopenharmony_ci   inline bool isConstantCondition(Value *pred);
3288bf215546Sopenharmony_ci   inline bool mayPredicate(const Instruction *, const Value *pred) const;
3289bf215546Sopenharmony_ci   inline void removeFlow(Instruction *);
3290bf215546Sopenharmony_ci
3291bf215546Sopenharmony_ci   uint8_t gpr_unit;
3292bf215546Sopenharmony_ci};
3293bf215546Sopenharmony_ci
3294bf215546Sopenharmony_cibool
3295bf215546Sopenharmony_ciFlatteningPass::isConstantCondition(Value *pred)
3296bf215546Sopenharmony_ci{
3297bf215546Sopenharmony_ci   Instruction *insn = pred->getUniqueInsn();
3298bf215546Sopenharmony_ci   assert(insn);
3299bf215546Sopenharmony_ci   if (insn->op != OP_SET || insn->srcExists(2))
3300bf215546Sopenharmony_ci      return false;
3301bf215546Sopenharmony_ci
3302bf215546Sopenharmony_ci   for (int s = 0; s < 2 && insn->srcExists(s); ++s) {
3303bf215546Sopenharmony_ci      Instruction *ld = insn->getSrc(s)->getUniqueInsn();
3304bf215546Sopenharmony_ci      DataFile file;
3305bf215546Sopenharmony_ci      if (ld) {
3306bf215546Sopenharmony_ci         if (ld->op != OP_MOV && ld->op != OP_LOAD)
3307bf215546Sopenharmony_ci            return false;
3308bf215546Sopenharmony_ci         if (ld->src(0).isIndirect(0))
3309bf215546Sopenharmony_ci            return false;
3310bf215546Sopenharmony_ci         file = ld->src(0).getFile();
3311bf215546Sopenharmony_ci      } else {
3312bf215546Sopenharmony_ci         file = insn->src(s).getFile();
3313bf215546Sopenharmony_ci         // catch $r63 on NVC0 and $r63/$r127 on NV50. Unfortunately maxGPR is
3314bf215546Sopenharmony_ci         // in register "units", which can vary between targets.
3315bf215546Sopenharmony_ci         if (file == FILE_GPR) {
3316bf215546Sopenharmony_ci            Value *v = insn->getSrc(s);
3317bf215546Sopenharmony_ci            int bytes = v->reg.data.id * MIN2(v->reg.size, 4);
3318bf215546Sopenharmony_ci            int units = bytes >> gpr_unit;
3319bf215546Sopenharmony_ci            if (units > prog->maxGPR)
3320bf215546Sopenharmony_ci               file = FILE_IMMEDIATE;
3321bf215546Sopenharmony_ci         }
3322bf215546Sopenharmony_ci      }
3323bf215546Sopenharmony_ci      if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST)
3324bf215546Sopenharmony_ci         return false;
3325bf215546Sopenharmony_ci   }
3326bf215546Sopenharmony_ci   return true;
3327bf215546Sopenharmony_ci}
3328bf215546Sopenharmony_ci
3329bf215546Sopenharmony_civoid
3330bf215546Sopenharmony_ciFlatteningPass::removeFlow(Instruction *insn)
3331bf215546Sopenharmony_ci{
3332bf215546Sopenharmony_ci   FlowInstruction *term = insn ? insn->asFlow() : NULL;
3333bf215546Sopenharmony_ci   if (!term)
3334bf215546Sopenharmony_ci      return;
3335bf215546Sopenharmony_ci   Graph::Edge::Type ty = term->bb->cfg.outgoing().getType();
3336bf215546Sopenharmony_ci
3337bf215546Sopenharmony_ci   if (term->op == OP_BRA) {
3338bf215546Sopenharmony_ci      // TODO: this might get more difficult when we get arbitrary BRAs
3339bf215546Sopenharmony_ci      if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK)
3340bf215546Sopenharmony_ci         return;
3341bf215546Sopenharmony_ci   } else
3342bf215546Sopenharmony_ci   if (term->op != OP_JOIN)
3343bf215546Sopenharmony_ci      return;
3344bf215546Sopenharmony_ci
3345bf215546Sopenharmony_ci   Value *pred = term->getPredicate();
3346bf215546Sopenharmony_ci
3347bf215546Sopenharmony_ci   delete_Instruction(prog, term);
3348bf215546Sopenharmony_ci
3349bf215546Sopenharmony_ci   if (pred && pred->refCount() == 0) {
3350bf215546Sopenharmony_ci      Instruction *pSet = pred->getUniqueInsn();
3351bf215546Sopenharmony_ci      pred->join->reg.data.id = -1; // deallocate
3352bf215546Sopenharmony_ci      if (pSet->isDead())
3353bf215546Sopenharmony_ci         delete_Instruction(prog, pSet);
3354bf215546Sopenharmony_ci   }
3355bf215546Sopenharmony_ci}
3356bf215546Sopenharmony_ci
3357bf215546Sopenharmony_civoid
3358bf215546Sopenharmony_ciFlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc)
3359bf215546Sopenharmony_ci{
3360bf215546Sopenharmony_ci   for (Instruction *i = bb->getEntry(); i; i = i->next) {
3361bf215546Sopenharmony_ci      if (i->isNop())
3362bf215546Sopenharmony_ci         continue;
3363bf215546Sopenharmony_ci      assert(!i->getPredicate());
3364bf215546Sopenharmony_ci      i->setPredicate(cc, pred);
3365bf215546Sopenharmony_ci   }
3366bf215546Sopenharmony_ci   removeFlow(bb->getExit());
3367bf215546Sopenharmony_ci}
3368bf215546Sopenharmony_ci
3369bf215546Sopenharmony_cibool
3370bf215546Sopenharmony_ciFlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const
3371bf215546Sopenharmony_ci{
3372bf215546Sopenharmony_ci   if (insn->isPseudo())
3373bf215546Sopenharmony_ci      return true;
3374bf215546Sopenharmony_ci   // TODO: calls where we don't know which registers are modified
3375bf215546Sopenharmony_ci
3376bf215546Sopenharmony_ci   if (!prog->getTarget()->mayPredicate(insn, pred))
3377bf215546Sopenharmony_ci      return false;
3378bf215546Sopenharmony_ci   for (int d = 0; insn->defExists(d); ++d)
3379bf215546Sopenharmony_ci      if (insn->getDef(d)->equals(pred))
3380bf215546Sopenharmony_ci         return false;
3381bf215546Sopenharmony_ci   return true;
3382bf215546Sopenharmony_ci}
3383bf215546Sopenharmony_ci
3384bf215546Sopenharmony_ci// If we jump to BRA/RET/EXIT, replace the jump with it.
3385bf215546Sopenharmony_ci// NOTE: We do not update the CFG anymore here !
3386bf215546Sopenharmony_ci//
3387bf215546Sopenharmony_ci// TODO: Handle cases where we skip over a branch (maybe do that elsewhere ?):
3388bf215546Sopenharmony_ci//  BB:0
3389bf215546Sopenharmony_ci//   @p0 bra BB:2 -> @!p0 bra BB:3 iff (!) BB:2 immediately adjoins BB:1
3390bf215546Sopenharmony_ci//  BB1:
3391bf215546Sopenharmony_ci//   bra BB:3
3392bf215546Sopenharmony_ci//  BB2:
3393bf215546Sopenharmony_ci//   ...
3394bf215546Sopenharmony_ci//  BB3:
3395bf215546Sopenharmony_ci//   ...
3396bf215546Sopenharmony_civoid
3397bf215546Sopenharmony_ciFlatteningPass::tryPropagateBranch(BasicBlock *bb)
3398bf215546Sopenharmony_ci{
3399bf215546Sopenharmony_ci   for (Instruction *i = bb->getExit(); i && i->op == OP_BRA; i = i->prev) {
3400bf215546Sopenharmony_ci      BasicBlock *bf = i->asFlow()->target.bb;
3401bf215546Sopenharmony_ci
3402bf215546Sopenharmony_ci      if (bf->getInsnCount() != 1)
3403bf215546Sopenharmony_ci         continue;
3404bf215546Sopenharmony_ci
3405bf215546Sopenharmony_ci      FlowInstruction *bra = i->asFlow();
3406bf215546Sopenharmony_ci      FlowInstruction *rep = bf->getExit()->asFlow();
3407bf215546Sopenharmony_ci
3408bf215546Sopenharmony_ci      if (!rep || rep->getPredicate())
3409bf215546Sopenharmony_ci         continue;
3410bf215546Sopenharmony_ci      if (rep->op != OP_BRA &&
3411bf215546Sopenharmony_ci          rep->op != OP_JOIN &&
3412bf215546Sopenharmony_ci          rep->op != OP_EXIT)
3413bf215546Sopenharmony_ci         continue;
3414bf215546Sopenharmony_ci
3415bf215546Sopenharmony_ci      // TODO: If there are multiple branches to @rep, only the first would
3416bf215546Sopenharmony_ci      // be replaced, so only remove them after this pass is done ?
3417bf215546Sopenharmony_ci      // Also, need to check all incident blocks for fall-through exits and
3418bf215546Sopenharmony_ci      // add the branch there.
3419bf215546Sopenharmony_ci      bra->op = rep->op;
3420bf215546Sopenharmony_ci      bra->target.bb = rep->target.bb;
3421bf215546Sopenharmony_ci      if (bf->cfg.incidentCount() == 1)
3422bf215546Sopenharmony_ci         bf->remove(rep);
3423bf215546Sopenharmony_ci   }
3424bf215546Sopenharmony_ci}
3425bf215546Sopenharmony_ci
3426bf215546Sopenharmony_cibool
3427bf215546Sopenharmony_ciFlatteningPass::visit(Function *fn)
3428bf215546Sopenharmony_ci{
3429bf215546Sopenharmony_ci   gpr_unit = prog->getTarget()->getFileUnit(FILE_GPR);
3430bf215546Sopenharmony_ci
3431bf215546Sopenharmony_ci   return true;
3432bf215546Sopenharmony_ci}
3433bf215546Sopenharmony_ci
3434bf215546Sopenharmony_cibool
3435bf215546Sopenharmony_ciFlatteningPass::visit(BasicBlock *bb)
3436bf215546Sopenharmony_ci{
3437bf215546Sopenharmony_ci   if (tryPredicateConditional(bb))
3438bf215546Sopenharmony_ci      return true;
3439bf215546Sopenharmony_ci
3440bf215546Sopenharmony_ci   // try to attach join to previous instruction
3441bf215546Sopenharmony_ci   if (prog->getTarget()->hasJoin) {
3442bf215546Sopenharmony_ci      Instruction *insn = bb->getExit();
3443bf215546Sopenharmony_ci      if (insn && insn->op == OP_JOIN && !insn->getPredicate()) {
3444bf215546Sopenharmony_ci         insn = insn->prev;
3445bf215546Sopenharmony_ci         if (insn && !insn->getPredicate() &&
3446bf215546Sopenharmony_ci             !insn->asFlow() &&
3447bf215546Sopenharmony_ci             insn->op != OP_DISCARD &&
3448bf215546Sopenharmony_ci             insn->op != OP_TEXBAR &&
3449bf215546Sopenharmony_ci             !isTextureOp(insn->op) && // probably just nve4
3450bf215546Sopenharmony_ci             !isSurfaceOp(insn->op) && // not confirmed
3451bf215546Sopenharmony_ci             insn->op != OP_LINTERP && // probably just nve4
3452bf215546Sopenharmony_ci             insn->op != OP_PINTERP && // probably just nve4
3453bf215546Sopenharmony_ci             ((insn->op != OP_LOAD && insn->op != OP_STORE && insn->op != OP_ATOM) ||
3454bf215546Sopenharmony_ci              (typeSizeof(insn->dType) <= 4 && !insn->src(0).isIndirect(0))) &&
3455bf215546Sopenharmony_ci             !insn->isNop()) {
3456bf215546Sopenharmony_ci            insn->join = 1;
3457bf215546Sopenharmony_ci            bb->remove(bb->getExit());
3458bf215546Sopenharmony_ci            return true;
3459bf215546Sopenharmony_ci         }
3460bf215546Sopenharmony_ci      }
3461bf215546Sopenharmony_ci   }
3462bf215546Sopenharmony_ci
3463bf215546Sopenharmony_ci   tryPropagateBranch(bb);
3464bf215546Sopenharmony_ci
3465bf215546Sopenharmony_ci   return true;
3466bf215546Sopenharmony_ci}
3467bf215546Sopenharmony_ci
3468bf215546Sopenharmony_cibool
3469bf215546Sopenharmony_ciFlatteningPass::tryPredicateConditional(BasicBlock *bb)
3470bf215546Sopenharmony_ci{
3471bf215546Sopenharmony_ci   BasicBlock *bL = NULL, *bR = NULL;
3472bf215546Sopenharmony_ci   unsigned int nL = 0, nR = 0, limit = 12;
3473bf215546Sopenharmony_ci   Instruction *insn;
3474bf215546Sopenharmony_ci   unsigned int mask;
3475bf215546Sopenharmony_ci
3476bf215546Sopenharmony_ci   mask = bb->initiatesSimpleConditional();
3477bf215546Sopenharmony_ci   if (!mask)
3478bf215546Sopenharmony_ci      return false;
3479bf215546Sopenharmony_ci
3480bf215546Sopenharmony_ci   assert(bb->getExit());
3481bf215546Sopenharmony_ci   Value *pred = bb->getExit()->getPredicate();
3482bf215546Sopenharmony_ci   assert(pred);
3483bf215546Sopenharmony_ci
3484bf215546Sopenharmony_ci   if (isConstantCondition(pred))
3485bf215546Sopenharmony_ci      limit = 4;
3486bf215546Sopenharmony_ci
3487bf215546Sopenharmony_ci   Graph::EdgeIterator ei = bb->cfg.outgoing();
3488bf215546Sopenharmony_ci
3489bf215546Sopenharmony_ci   if (mask & 1) {
3490bf215546Sopenharmony_ci      bL = BasicBlock::get(ei.getNode());
3491bf215546Sopenharmony_ci      for (insn = bL->getEntry(); insn; insn = insn->next, ++nL)
3492bf215546Sopenharmony_ci         if (!mayPredicate(insn, pred))
3493bf215546Sopenharmony_ci            return false;
3494bf215546Sopenharmony_ci      if (nL > limit)
3495bf215546Sopenharmony_ci         return false; // too long, do a real branch
3496bf215546Sopenharmony_ci   }
3497bf215546Sopenharmony_ci   ei.next();
3498bf215546Sopenharmony_ci
3499bf215546Sopenharmony_ci   if (mask & 2) {
3500bf215546Sopenharmony_ci      bR = BasicBlock::get(ei.getNode());
3501bf215546Sopenharmony_ci      for (insn = bR->getEntry(); insn; insn = insn->next, ++nR)
3502bf215546Sopenharmony_ci         if (!mayPredicate(insn, pred))
3503bf215546Sopenharmony_ci            return false;
3504bf215546Sopenharmony_ci      if (nR > limit)
3505bf215546Sopenharmony_ci         return false; // too long, do a real branch
3506bf215546Sopenharmony_ci   }
3507bf215546Sopenharmony_ci
3508bf215546Sopenharmony_ci   if (bL)
3509bf215546Sopenharmony_ci      predicateInstructions(bL, pred, bb->getExit()->cc);
3510bf215546Sopenharmony_ci   if (bR)
3511bf215546Sopenharmony_ci      predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc));
3512bf215546Sopenharmony_ci
3513bf215546Sopenharmony_ci   if (bb->joinAt) {
3514bf215546Sopenharmony_ci      bb->remove(bb->joinAt);
3515bf215546Sopenharmony_ci      bb->joinAt = NULL;
3516bf215546Sopenharmony_ci   }
3517bf215546Sopenharmony_ci   removeFlow(bb->getExit()); // delete the branch/join at the fork point
3518bf215546Sopenharmony_ci
3519bf215546Sopenharmony_ci   // remove potential join operations at the end of the conditional
3520bf215546Sopenharmony_ci   if (prog->getTarget()->joinAnterior) {
3521bf215546Sopenharmony_ci      bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode());
3522bf215546Sopenharmony_ci      if (bb->getEntry() && bb->getEntry()->op == OP_JOIN)
3523bf215546Sopenharmony_ci         removeFlow(bb->getEntry());
3524bf215546Sopenharmony_ci   }
3525bf215546Sopenharmony_ci
3526bf215546Sopenharmony_ci   return true;
3527bf215546Sopenharmony_ci}
3528bf215546Sopenharmony_ci
3529bf215546Sopenharmony_ci// =============================================================================
3530bf215546Sopenharmony_ci
3531bf215546Sopenharmony_ci// Fold Immediate into MAD; must be done after register allocation due to
3532bf215546Sopenharmony_ci// constraint SDST == SSRC2
3533bf215546Sopenharmony_ci// TODO:
3534bf215546Sopenharmony_ci// Does NVC0+ have other situations where this pass makes sense?
3535bf215546Sopenharmony_ciclass PostRaLoadPropagation : public Pass
3536bf215546Sopenharmony_ci{
3537bf215546Sopenharmony_ciprivate:
3538bf215546Sopenharmony_ci   virtual bool visit(Instruction *);
3539bf215546Sopenharmony_ci
3540bf215546Sopenharmony_ci   void handleMADforNV50(Instruction *);
3541bf215546Sopenharmony_ci   void handleMADforNVC0(Instruction *);
3542bf215546Sopenharmony_ci};
3543bf215546Sopenharmony_ci
3544bf215546Sopenharmony_cistatic bool
3545bf215546Sopenharmony_cipost_ra_dead(Instruction *i)
3546bf215546Sopenharmony_ci{
3547bf215546Sopenharmony_ci   for (int d = 0; i->defExists(d); ++d)
3548bf215546Sopenharmony_ci      if (i->getDef(d)->refCount())
3549bf215546Sopenharmony_ci         return false;
3550bf215546Sopenharmony_ci   return true;
3551bf215546Sopenharmony_ci}
3552bf215546Sopenharmony_ci
3553bf215546Sopenharmony_ci// Fold Immediate into MAD; must be done after register allocation due to
3554bf215546Sopenharmony_ci// constraint SDST == SSRC2
3555bf215546Sopenharmony_civoid
3556bf215546Sopenharmony_ciPostRaLoadPropagation::handleMADforNV50(Instruction *i)
3557bf215546Sopenharmony_ci{
3558bf215546Sopenharmony_ci   if (i->def(0).getFile() != FILE_GPR ||
3559bf215546Sopenharmony_ci       i->src(0).getFile() != FILE_GPR ||
3560bf215546Sopenharmony_ci       i->src(1).getFile() != FILE_GPR ||
3561bf215546Sopenharmony_ci       i->src(2).getFile() != FILE_GPR ||
3562bf215546Sopenharmony_ci       i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
3563bf215546Sopenharmony_ci      return;
3564bf215546Sopenharmony_ci
3565bf215546Sopenharmony_ci   if (i->getDef(0)->reg.data.id >= 64 ||
3566bf215546Sopenharmony_ci       i->getSrc(0)->reg.data.id >= 64)
3567bf215546Sopenharmony_ci      return;
3568bf215546Sopenharmony_ci
3569bf215546Sopenharmony_ci   if (i->flagsSrc >= 0 && i->getSrc(i->flagsSrc)->reg.data.id != 0)
3570bf215546Sopenharmony_ci      return;
3571bf215546Sopenharmony_ci
3572bf215546Sopenharmony_ci   if (i->getPredicate())
3573bf215546Sopenharmony_ci      return;
3574bf215546Sopenharmony_ci
3575bf215546Sopenharmony_ci   Value *vtmp;
3576bf215546Sopenharmony_ci   Instruction *def = i->getSrc(1)->getInsn();
3577bf215546Sopenharmony_ci
3578bf215546Sopenharmony_ci   if (def && def->op == OP_SPLIT && typeSizeof(def->sType) == 4)
3579bf215546Sopenharmony_ci      def = def->getSrc(0)->getInsn();
3580bf215546Sopenharmony_ci   if (def && def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
3581bf215546Sopenharmony_ci      vtmp = i->getSrc(1);
3582bf215546Sopenharmony_ci      if (isFloatType(i->sType)) {
3583bf215546Sopenharmony_ci         i->setSrc(1, def->getSrc(0));
3584bf215546Sopenharmony_ci      } else {
3585bf215546Sopenharmony_ci         ImmediateValue val;
3586bf215546Sopenharmony_ci         // getImmediate() has side-effects on the argument so this *shouldn't*
3587bf215546Sopenharmony_ci         // be folded into the assert()
3588bf215546Sopenharmony_ci         ASSERTED bool ret = def->src(0).getImmediate(val);
3589bf215546Sopenharmony_ci         assert(ret);
3590bf215546Sopenharmony_ci         if (i->getSrc(1)->reg.data.id & 1)
3591bf215546Sopenharmony_ci            val.reg.data.u32 >>= 16;
3592bf215546Sopenharmony_ci         val.reg.data.u32 &= 0xffff;
3593bf215546Sopenharmony_ci         i->setSrc(1, new_ImmediateValue(prog, val.reg.data.u32));
3594bf215546Sopenharmony_ci      }
3595bf215546Sopenharmony_ci
3596bf215546Sopenharmony_ci      /* There's no post-RA dead code elimination, so do it here
3597bf215546Sopenharmony_ci       * XXX: if we add more code-removing post-RA passes, we might
3598bf215546Sopenharmony_ci       *      want to create a post-RA dead-code elim pass */
3599bf215546Sopenharmony_ci      if (post_ra_dead(vtmp->getInsn())) {
3600bf215546Sopenharmony_ci         Value *src = vtmp->getInsn()->getSrc(0);
3601bf215546Sopenharmony_ci         // Careful -- splits will have already been removed from the
3602bf215546Sopenharmony_ci         // functions. Don't double-delete.
3603bf215546Sopenharmony_ci         if (vtmp->getInsn()->bb)
3604bf215546Sopenharmony_ci            delete_Instruction(prog, vtmp->getInsn());
3605bf215546Sopenharmony_ci         if (src->getInsn() && post_ra_dead(src->getInsn()))
3606bf215546Sopenharmony_ci            delete_Instruction(prog, src->getInsn());
3607bf215546Sopenharmony_ci      }
3608bf215546Sopenharmony_ci   }
3609bf215546Sopenharmony_ci}
3610bf215546Sopenharmony_ci
3611bf215546Sopenharmony_civoid
3612bf215546Sopenharmony_ciPostRaLoadPropagation::handleMADforNVC0(Instruction *i)
3613bf215546Sopenharmony_ci{
3614bf215546Sopenharmony_ci   if (i->def(0).getFile() != FILE_GPR ||
3615bf215546Sopenharmony_ci       i->src(0).getFile() != FILE_GPR ||
3616bf215546Sopenharmony_ci       i->src(1).getFile() != FILE_GPR ||
3617bf215546Sopenharmony_ci       i->src(2).getFile() != FILE_GPR ||
3618bf215546Sopenharmony_ci       i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
3619bf215546Sopenharmony_ci      return;
3620bf215546Sopenharmony_ci
3621bf215546Sopenharmony_ci   // TODO: gm107 can also do this for S32, maybe other chipsets as well
3622bf215546Sopenharmony_ci   if (i->dType != TYPE_F32)
3623bf215546Sopenharmony_ci      return;
3624bf215546Sopenharmony_ci
3625bf215546Sopenharmony_ci   if ((i->src(2).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG))
3626bf215546Sopenharmony_ci      return;
3627bf215546Sopenharmony_ci
3628bf215546Sopenharmony_ci   ImmediateValue val;
3629bf215546Sopenharmony_ci   int s;
3630bf215546Sopenharmony_ci
3631bf215546Sopenharmony_ci   if (i->src(0).getImmediate(val))
3632bf215546Sopenharmony_ci      s = 1;
3633bf215546Sopenharmony_ci   else if (i->src(1).getImmediate(val))
3634bf215546Sopenharmony_ci      s = 0;
3635bf215546Sopenharmony_ci   else
3636bf215546Sopenharmony_ci      return;
3637bf215546Sopenharmony_ci
3638bf215546Sopenharmony_ci   if ((i->src(s).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG))
3639bf215546Sopenharmony_ci      return;
3640bf215546Sopenharmony_ci
3641bf215546Sopenharmony_ci   if (s == 1)
3642bf215546Sopenharmony_ci      i->swapSources(0, 1);
3643bf215546Sopenharmony_ci
3644bf215546Sopenharmony_ci   Instruction *imm = i->getSrc(1)->getInsn();
3645bf215546Sopenharmony_ci   i->setSrc(1, imm->getSrc(0));
3646bf215546Sopenharmony_ci   if (post_ra_dead(imm))
3647bf215546Sopenharmony_ci      delete_Instruction(prog, imm);
3648bf215546Sopenharmony_ci}
3649bf215546Sopenharmony_ci
3650bf215546Sopenharmony_cibool
3651bf215546Sopenharmony_ciPostRaLoadPropagation::visit(Instruction *i)
3652bf215546Sopenharmony_ci{
3653bf215546Sopenharmony_ci   switch (i->op) {
3654bf215546Sopenharmony_ci   case OP_FMA:
3655bf215546Sopenharmony_ci   case OP_MAD:
3656bf215546Sopenharmony_ci      if (prog->getTarget()->getChipset() < 0xc0)
3657bf215546Sopenharmony_ci         handleMADforNV50(i);
3658bf215546Sopenharmony_ci      else
3659bf215546Sopenharmony_ci         handleMADforNVC0(i);
3660bf215546Sopenharmony_ci      break;
3661bf215546Sopenharmony_ci   default:
3662bf215546Sopenharmony_ci      break;
3663bf215546Sopenharmony_ci   }
3664bf215546Sopenharmony_ci
3665bf215546Sopenharmony_ci   return true;
3666bf215546Sopenharmony_ci}
3667bf215546Sopenharmony_ci
3668bf215546Sopenharmony_ci// =============================================================================
3669bf215546Sopenharmony_ci
3670bf215546Sopenharmony_ci// Common subexpression elimination. Stupid O^2 implementation.
3671bf215546Sopenharmony_ciclass LocalCSE : public Pass
3672bf215546Sopenharmony_ci{
3673bf215546Sopenharmony_ciprivate:
3674bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
3675bf215546Sopenharmony_ci
3676bf215546Sopenharmony_ci   inline bool tryReplace(Instruction **, Instruction *);
3677bf215546Sopenharmony_ci
3678bf215546Sopenharmony_ci   DLList ops[OP_LAST + 1];
3679bf215546Sopenharmony_ci};
3680bf215546Sopenharmony_ci
3681bf215546Sopenharmony_ciclass GlobalCSE : public Pass
3682bf215546Sopenharmony_ci{
3683bf215546Sopenharmony_ciprivate:
3684bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
3685bf215546Sopenharmony_ci};
3686bf215546Sopenharmony_ci
3687bf215546Sopenharmony_cibool
3688bf215546Sopenharmony_ciInstruction::isActionEqual(const Instruction *that) const
3689bf215546Sopenharmony_ci{
3690bf215546Sopenharmony_ci   if (this->op != that->op ||
3691bf215546Sopenharmony_ci       this->dType != that->dType ||
3692bf215546Sopenharmony_ci       this->sType != that->sType)
3693bf215546Sopenharmony_ci      return false;
3694bf215546Sopenharmony_ci   if (this->cc != that->cc)
3695bf215546Sopenharmony_ci      return false;
3696bf215546Sopenharmony_ci
3697bf215546Sopenharmony_ci   if (this->asTex()) {
3698bf215546Sopenharmony_ci      if (memcmp(&this->asTex()->tex,
3699bf215546Sopenharmony_ci                 &that->asTex()->tex,
3700bf215546Sopenharmony_ci                 sizeof(this->asTex()->tex)))
3701bf215546Sopenharmony_ci         return false;
3702bf215546Sopenharmony_ci   } else
3703bf215546Sopenharmony_ci   if (this->asCmp()) {
3704bf215546Sopenharmony_ci      if (this->asCmp()->setCond != that->asCmp()->setCond)
3705bf215546Sopenharmony_ci         return false;
3706bf215546Sopenharmony_ci   } else
3707bf215546Sopenharmony_ci   if (this->asFlow()) {
3708bf215546Sopenharmony_ci      return false;
3709bf215546Sopenharmony_ci   } else
3710bf215546Sopenharmony_ci   if (this->op == OP_PHI && this->bb != that->bb) {
3711bf215546Sopenharmony_ci      /* TODO: we could probably be a bit smarter here by following the
3712bf215546Sopenharmony_ci       * control flow, but honestly, it is quite painful to check */
3713bf215546Sopenharmony_ci      return false;
3714bf215546Sopenharmony_ci   } else {
3715bf215546Sopenharmony_ci      if (this->ipa != that->ipa ||
3716bf215546Sopenharmony_ci          this->lanes != that->lanes ||
3717bf215546Sopenharmony_ci          this->perPatch != that->perPatch)
3718bf215546Sopenharmony_ci         return false;
3719bf215546Sopenharmony_ci      if (this->postFactor != that->postFactor)
3720bf215546Sopenharmony_ci         return false;
3721bf215546Sopenharmony_ci   }
3722bf215546Sopenharmony_ci
3723bf215546Sopenharmony_ci   if (this->subOp != that->subOp ||
3724bf215546Sopenharmony_ci       this->saturate != that->saturate ||
3725bf215546Sopenharmony_ci       this->rnd != that->rnd ||
3726bf215546Sopenharmony_ci       this->ftz != that->ftz ||
3727bf215546Sopenharmony_ci       this->dnz != that->dnz ||
3728bf215546Sopenharmony_ci       this->cache != that->cache ||
3729bf215546Sopenharmony_ci       this->mask != that->mask)
3730bf215546Sopenharmony_ci      return false;
3731bf215546Sopenharmony_ci
3732bf215546Sopenharmony_ci   return true;
3733bf215546Sopenharmony_ci}
3734bf215546Sopenharmony_ci
3735bf215546Sopenharmony_cibool
3736bf215546Sopenharmony_ciInstruction::isResultEqual(const Instruction *that) const
3737bf215546Sopenharmony_ci{
3738bf215546Sopenharmony_ci   unsigned int d, s;
3739bf215546Sopenharmony_ci
3740bf215546Sopenharmony_ci   // NOTE: location of discard only affects tex with liveOnly and quadops
3741bf215546Sopenharmony_ci   if (!this->defExists(0) && this->op != OP_DISCARD)
3742bf215546Sopenharmony_ci      return false;
3743bf215546Sopenharmony_ci
3744bf215546Sopenharmony_ci   if (!isActionEqual(that))
3745bf215546Sopenharmony_ci      return false;
3746bf215546Sopenharmony_ci
3747bf215546Sopenharmony_ci   if (this->predSrc != that->predSrc)
3748bf215546Sopenharmony_ci      return false;
3749bf215546Sopenharmony_ci
3750bf215546Sopenharmony_ci   for (d = 0; this->defExists(d); ++d) {
3751bf215546Sopenharmony_ci      if (!that->defExists(d) ||
3752bf215546Sopenharmony_ci          !this->getDef(d)->equals(that->getDef(d), false))
3753bf215546Sopenharmony_ci         return false;
3754bf215546Sopenharmony_ci   }
3755bf215546Sopenharmony_ci   if (that->defExists(d))
3756bf215546Sopenharmony_ci      return false;
3757bf215546Sopenharmony_ci
3758bf215546Sopenharmony_ci   for (s = 0; this->srcExists(s); ++s) {
3759bf215546Sopenharmony_ci      if (!that->srcExists(s))
3760bf215546Sopenharmony_ci         return false;
3761bf215546Sopenharmony_ci      if (this->src(s).mod != that->src(s).mod)
3762bf215546Sopenharmony_ci         return false;
3763bf215546Sopenharmony_ci      if (!this->getSrc(s)->equals(that->getSrc(s), true))
3764bf215546Sopenharmony_ci         return false;
3765bf215546Sopenharmony_ci   }
3766bf215546Sopenharmony_ci   if (that->srcExists(s))
3767bf215546Sopenharmony_ci      return false;
3768bf215546Sopenharmony_ci
3769bf215546Sopenharmony_ci   if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) {
3770bf215546Sopenharmony_ci      switch (src(0).getFile()) {
3771bf215546Sopenharmony_ci      case FILE_MEMORY_CONST:
3772bf215546Sopenharmony_ci      case FILE_SHADER_INPUT:
3773bf215546Sopenharmony_ci         return true;
3774bf215546Sopenharmony_ci      case FILE_SHADER_OUTPUT:
3775bf215546Sopenharmony_ci         return bb->getProgram()->getType() == Program::TYPE_TESSELLATION_EVAL;
3776bf215546Sopenharmony_ci      default:
3777bf215546Sopenharmony_ci         return false;
3778bf215546Sopenharmony_ci      }
3779bf215546Sopenharmony_ci   }
3780bf215546Sopenharmony_ci
3781bf215546Sopenharmony_ci   return true;
3782bf215546Sopenharmony_ci}
3783bf215546Sopenharmony_ci
3784bf215546Sopenharmony_ci// pull through common expressions from different in-blocks
3785bf215546Sopenharmony_cibool
3786bf215546Sopenharmony_ciGlobalCSE::visit(BasicBlock *bb)
3787bf215546Sopenharmony_ci{
3788bf215546Sopenharmony_ci   Instruction *phi, *next, *ik;
3789bf215546Sopenharmony_ci   int s;
3790bf215546Sopenharmony_ci
3791bf215546Sopenharmony_ci   // TODO: maybe do this with OP_UNION, too
3792bf215546Sopenharmony_ci
3793bf215546Sopenharmony_ci   for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) {
3794bf215546Sopenharmony_ci      next = phi->next;
3795bf215546Sopenharmony_ci      if (phi->getSrc(0)->refCount() > 1)
3796bf215546Sopenharmony_ci         continue;
3797bf215546Sopenharmony_ci      ik = phi->getSrc(0)->getInsn();
3798bf215546Sopenharmony_ci      if (!ik)
3799bf215546Sopenharmony_ci         continue; // probably a function input
3800bf215546Sopenharmony_ci      if (ik->defCount(0xff) > 1)
3801bf215546Sopenharmony_ci         continue; // too painful to check if we can really push this forward
3802bf215546Sopenharmony_ci      for (s = 1; phi->srcExists(s); ++s) {
3803bf215546Sopenharmony_ci         if (phi->getSrc(s)->refCount() > 1)
3804bf215546Sopenharmony_ci            break;
3805bf215546Sopenharmony_ci         if (!phi->getSrc(s)->getInsn() ||
3806bf215546Sopenharmony_ci             !phi->getSrc(s)->getInsn()->isResultEqual(ik))
3807bf215546Sopenharmony_ci            break;
3808bf215546Sopenharmony_ci      }
3809bf215546Sopenharmony_ci      if (!phi->srcExists(s)) {
3810bf215546Sopenharmony_ci         assert(ik->op != OP_PHI);
3811bf215546Sopenharmony_ci         Instruction *entry = bb->getEntry();
3812bf215546Sopenharmony_ci         ik->bb->remove(ik);
3813bf215546Sopenharmony_ci         if (!entry || entry->op != OP_JOIN)
3814bf215546Sopenharmony_ci            bb->insertHead(ik);
3815bf215546Sopenharmony_ci         else
3816bf215546Sopenharmony_ci            bb->insertAfter(entry, ik);
3817bf215546Sopenharmony_ci         ik->setDef(0, phi->getDef(0));
3818bf215546Sopenharmony_ci         delete_Instruction(prog, phi);
3819bf215546Sopenharmony_ci      }
3820bf215546Sopenharmony_ci   }
3821bf215546Sopenharmony_ci
3822bf215546Sopenharmony_ci   return true;
3823bf215546Sopenharmony_ci}
3824bf215546Sopenharmony_ci
3825bf215546Sopenharmony_cibool
3826bf215546Sopenharmony_ciLocalCSE::tryReplace(Instruction **ptr, Instruction *i)
3827bf215546Sopenharmony_ci{
3828bf215546Sopenharmony_ci   Instruction *old = *ptr;
3829bf215546Sopenharmony_ci
3830bf215546Sopenharmony_ci   // TODO: maybe relax this later (causes trouble with OP_UNION)
3831bf215546Sopenharmony_ci   if (i->isPredicated())
3832bf215546Sopenharmony_ci      return false;
3833bf215546Sopenharmony_ci
3834bf215546Sopenharmony_ci   if (!old->isResultEqual(i))
3835bf215546Sopenharmony_ci      return false;
3836bf215546Sopenharmony_ci
3837bf215546Sopenharmony_ci   for (int d = 0; old->defExists(d); ++d)
3838bf215546Sopenharmony_ci      old->def(d).replace(i->getDef(d), false);
3839bf215546Sopenharmony_ci   delete_Instruction(prog, old);
3840bf215546Sopenharmony_ci   *ptr = NULL;
3841bf215546Sopenharmony_ci   return true;
3842bf215546Sopenharmony_ci}
3843bf215546Sopenharmony_ci
3844bf215546Sopenharmony_cibool
3845bf215546Sopenharmony_ciLocalCSE::visit(BasicBlock *bb)
3846bf215546Sopenharmony_ci{
3847bf215546Sopenharmony_ci   unsigned int replaced;
3848bf215546Sopenharmony_ci
3849bf215546Sopenharmony_ci   do {
3850bf215546Sopenharmony_ci      Instruction *ir, *next;
3851bf215546Sopenharmony_ci
3852bf215546Sopenharmony_ci      replaced = 0;
3853bf215546Sopenharmony_ci
3854bf215546Sopenharmony_ci      // will need to know the order of instructions
3855bf215546Sopenharmony_ci      int serial = 0;
3856bf215546Sopenharmony_ci      for (ir = bb->getFirst(); ir; ir = ir->next)
3857bf215546Sopenharmony_ci         ir->serial = serial++;
3858bf215546Sopenharmony_ci
3859bf215546Sopenharmony_ci      for (ir = bb->getFirst(); ir; ir = next) {
3860bf215546Sopenharmony_ci         int s;
3861bf215546Sopenharmony_ci         Value *src = NULL;
3862bf215546Sopenharmony_ci
3863bf215546Sopenharmony_ci         next = ir->next;
3864bf215546Sopenharmony_ci
3865bf215546Sopenharmony_ci         if (ir->fixed) {
3866bf215546Sopenharmony_ci            ops[ir->op].insert(ir);
3867bf215546Sopenharmony_ci            continue;
3868bf215546Sopenharmony_ci         }
3869bf215546Sopenharmony_ci
3870bf215546Sopenharmony_ci         for (s = 0; ir->srcExists(s); ++s)
3871bf215546Sopenharmony_ci            if (ir->getSrc(s)->asLValue())
3872bf215546Sopenharmony_ci               if (!src || ir->getSrc(s)->refCount() < src->refCount())
3873bf215546Sopenharmony_ci                  src = ir->getSrc(s);
3874bf215546Sopenharmony_ci
3875bf215546Sopenharmony_ci         if (src) {
3876bf215546Sopenharmony_ci            for (Value::UseIterator it = src->uses.begin();
3877bf215546Sopenharmony_ci                 it != src->uses.end(); ++it) {
3878bf215546Sopenharmony_ci               Instruction *ik = (*it)->getInsn();
3879bf215546Sopenharmony_ci               if (ik && ik->bb == ir->bb && ik->serial < ir->serial)
3880bf215546Sopenharmony_ci                  if (tryReplace(&ir, ik))
3881bf215546Sopenharmony_ci                     break;
3882bf215546Sopenharmony_ci            }
3883bf215546Sopenharmony_ci         } else {
3884bf215546Sopenharmony_ci            DLLIST_FOR_EACH(&ops[ir->op], iter)
3885bf215546Sopenharmony_ci            {
3886bf215546Sopenharmony_ci               Instruction *ik = reinterpret_cast<Instruction *>(iter.get());
3887bf215546Sopenharmony_ci               if (tryReplace(&ir, ik))
3888bf215546Sopenharmony_ci                  break;
3889bf215546Sopenharmony_ci            }
3890bf215546Sopenharmony_ci         }
3891bf215546Sopenharmony_ci
3892bf215546Sopenharmony_ci         if (ir)
3893bf215546Sopenharmony_ci            ops[ir->op].insert(ir);
3894bf215546Sopenharmony_ci         else
3895bf215546Sopenharmony_ci            ++replaced;
3896bf215546Sopenharmony_ci      }
3897bf215546Sopenharmony_ci      for (unsigned int i = 0; i <= OP_LAST; ++i)
3898bf215546Sopenharmony_ci         ops[i].clear();
3899bf215546Sopenharmony_ci
3900bf215546Sopenharmony_ci   } while (replaced);
3901bf215546Sopenharmony_ci
3902bf215546Sopenharmony_ci   return true;
3903bf215546Sopenharmony_ci}
3904bf215546Sopenharmony_ci
3905bf215546Sopenharmony_ci// =============================================================================
3906bf215546Sopenharmony_ci
3907bf215546Sopenharmony_ci// Remove computations of unused values.
3908bf215546Sopenharmony_ciclass DeadCodeElim : public Pass
3909bf215546Sopenharmony_ci{
3910bf215546Sopenharmony_cipublic:
3911bf215546Sopenharmony_ci   DeadCodeElim() : deadCount(0) {}
3912bf215546Sopenharmony_ci   bool buryAll(Program *);
3913bf215546Sopenharmony_ci
3914bf215546Sopenharmony_ciprivate:
3915bf215546Sopenharmony_ci   virtual bool visit(BasicBlock *);
3916bf215546Sopenharmony_ci
3917bf215546Sopenharmony_ci   void checkSplitLoad(Instruction *ld); // for partially dead loads
3918bf215546Sopenharmony_ci
3919bf215546Sopenharmony_ci   unsigned int deadCount;
3920bf215546Sopenharmony_ci};
3921bf215546Sopenharmony_ci
3922bf215546Sopenharmony_cibool
3923bf215546Sopenharmony_ciDeadCodeElim::buryAll(Program *prog)
3924bf215546Sopenharmony_ci{
3925bf215546Sopenharmony_ci   do {
3926bf215546Sopenharmony_ci      deadCount = 0;
3927bf215546Sopenharmony_ci      if (!this->run(prog, false, false))
3928bf215546Sopenharmony_ci         return false;
3929bf215546Sopenharmony_ci   } while (deadCount);
3930bf215546Sopenharmony_ci
3931bf215546Sopenharmony_ci   return true;
3932bf215546Sopenharmony_ci}
3933bf215546Sopenharmony_ci
3934bf215546Sopenharmony_cibool
3935bf215546Sopenharmony_ciDeadCodeElim::visit(BasicBlock *bb)
3936bf215546Sopenharmony_ci{
3937bf215546Sopenharmony_ci   Instruction *prev;
3938bf215546Sopenharmony_ci
3939bf215546Sopenharmony_ci   for (Instruction *i = bb->getExit(); i; i = prev) {
3940bf215546Sopenharmony_ci      prev = i->prev;
3941bf215546Sopenharmony_ci      if (i->isDead()) {
3942bf215546Sopenharmony_ci         ++deadCount;
3943bf215546Sopenharmony_ci         delete_Instruction(prog, i);
3944bf215546Sopenharmony_ci      } else
3945bf215546Sopenharmony_ci      if (i->defExists(1) &&
3946bf215546Sopenharmony_ci          i->subOp == 0 &&
3947bf215546Sopenharmony_ci          (i->op == OP_VFETCH || i->op == OP_LOAD)) {
3948bf215546Sopenharmony_ci         checkSplitLoad(i);
3949bf215546Sopenharmony_ci      } else
3950bf215546Sopenharmony_ci      if (i->defExists(0) && !i->getDef(0)->refCount()) {
3951bf215546Sopenharmony_ci         if (i->op == OP_ATOM ||
3952bf215546Sopenharmony_ci             i->op == OP_SUREDP ||
3953bf215546Sopenharmony_ci             i->op == OP_SUREDB) {
3954bf215546Sopenharmony_ci            const Target *targ = prog->getTarget();
3955bf215546Sopenharmony_ci            if (targ->getChipset() >= NVISA_GF100_CHIPSET ||
3956bf215546Sopenharmony_ci                i->subOp != NV50_IR_SUBOP_ATOM_CAS)
3957bf215546Sopenharmony_ci               i->setDef(0, NULL);
3958bf215546Sopenharmony_ci            if (i->op == OP_ATOM && i->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3959bf215546Sopenharmony_ci               i->cache = CACHE_CV;
3960bf215546Sopenharmony_ci               i->op = OP_STORE;
3961bf215546Sopenharmony_ci               i->subOp = 0;
3962bf215546Sopenharmony_ci            }
3963bf215546Sopenharmony_ci         } else if (i->op == OP_LOAD && i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) {
3964bf215546Sopenharmony_ci            i->setDef(0, i->getDef(1));
3965bf215546Sopenharmony_ci            i->setDef(1, NULL);
3966bf215546Sopenharmony_ci         }
3967bf215546Sopenharmony_ci      }
3968bf215546Sopenharmony_ci   }
3969bf215546Sopenharmony_ci   return true;
3970bf215546Sopenharmony_ci}
3971bf215546Sopenharmony_ci
3972bf215546Sopenharmony_ci// Each load can go into up to 4 destinations, any of which might potentially
3973bf215546Sopenharmony_ci// be dead (i.e. a hole). These can always be split into 2 loads, independent
3974bf215546Sopenharmony_ci// of where the holes are. We find the first contiguous region, put it into
3975bf215546Sopenharmony_ci// the first load, and then put the second contiguous region into the second
3976bf215546Sopenharmony_ci// load. There can be at most 2 contiguous regions.
3977bf215546Sopenharmony_ci//
3978bf215546Sopenharmony_ci// Note that there are some restrictions, for example it's not possible to do
3979bf215546Sopenharmony_ci// a 64-bit load that's not 64-bit aligned, so such a load has to be split
3980bf215546Sopenharmony_ci// up. Also hardware doesn't support 96-bit loads, so those also have to be
3981bf215546Sopenharmony_ci// split into a 64-bit and 32-bit load.
3982bf215546Sopenharmony_civoid
3983bf215546Sopenharmony_ciDeadCodeElim::checkSplitLoad(Instruction *ld1)
3984bf215546Sopenharmony_ci{
3985bf215546Sopenharmony_ci   Instruction *ld2 = NULL; // can get at most 2 loads
3986bf215546Sopenharmony_ci   Value *def1[4];
3987bf215546Sopenharmony_ci   Value *def2[4];
3988bf215546Sopenharmony_ci   int32_t addr1, addr2;
3989bf215546Sopenharmony_ci   int32_t size1, size2;
3990bf215546Sopenharmony_ci   int d, n1, n2;
3991bf215546Sopenharmony_ci   uint32_t mask = 0xffffffff;
3992bf215546Sopenharmony_ci
3993bf215546Sopenharmony_ci   for (d = 0; ld1->defExists(d); ++d)
3994bf215546Sopenharmony_ci      if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0)
3995bf215546Sopenharmony_ci         mask &= ~(1 << d);
3996bf215546Sopenharmony_ci   if (mask == 0xffffffff)
3997bf215546Sopenharmony_ci      return;
3998bf215546Sopenharmony_ci
3999bf215546Sopenharmony_ci   addr1 = ld1->getSrc(0)->reg.data.offset;
4000bf215546Sopenharmony_ci   n1 = n2 = 0;
4001bf215546Sopenharmony_ci   size1 = size2 = 0;
4002bf215546Sopenharmony_ci
4003bf215546Sopenharmony_ci   // Compute address/width for first load
4004bf215546Sopenharmony_ci   for (d = 0; ld1->defExists(d); ++d) {
4005bf215546Sopenharmony_ci      if (mask & (1 << d)) {
4006bf215546Sopenharmony_ci         if (size1 && (addr1 & 0x7))
4007bf215546Sopenharmony_ci            break;
4008bf215546Sopenharmony_ci         def1[n1] = ld1->getDef(d);
4009bf215546Sopenharmony_ci         size1 += def1[n1++]->reg.size;
4010bf215546Sopenharmony_ci      } else
4011bf215546Sopenharmony_ci      if (!n1) {
4012bf215546Sopenharmony_ci         addr1 += ld1->getDef(d)->reg.size;
4013bf215546Sopenharmony_ci      } else {
4014bf215546Sopenharmony_ci         break;
4015bf215546Sopenharmony_ci      }
4016bf215546Sopenharmony_ci   }
4017bf215546Sopenharmony_ci
4018bf215546Sopenharmony_ci   // Scale back the size of the first load until it can be loaded. This
4019bf215546Sopenharmony_ci   // typically happens for TYPE_B96 loads.
4020bf215546Sopenharmony_ci   while (n1 &&
4021bf215546Sopenharmony_ci          !prog->getTarget()->isAccessSupported(ld1->getSrc(0)->reg.file,
4022bf215546Sopenharmony_ci                                                typeOfSize(size1))) {
4023bf215546Sopenharmony_ci      size1 -= def1[--n1]->reg.size;
4024bf215546Sopenharmony_ci      d--;
4025bf215546Sopenharmony_ci   }
4026bf215546Sopenharmony_ci
4027bf215546Sopenharmony_ci   // Compute address/width for second load
4028bf215546Sopenharmony_ci   for (addr2 = addr1 + size1; ld1->defExists(d); ++d) {
4029bf215546Sopenharmony_ci      if (mask & (1 << d)) {
4030bf215546Sopenharmony_ci         assert(!size2 || !(addr2 & 0x7));
4031bf215546Sopenharmony_ci         def2[n2] = ld1->getDef(d);
4032bf215546Sopenharmony_ci         size2 += def2[n2++]->reg.size;
4033bf215546Sopenharmony_ci      } else if (!n2) {
4034bf215546Sopenharmony_ci         assert(!n2);
4035bf215546Sopenharmony_ci         addr2 += ld1->getDef(d)->reg.size;
4036bf215546Sopenharmony_ci      } else {
4037bf215546Sopenharmony_ci         break;
4038bf215546Sopenharmony_ci      }
4039bf215546Sopenharmony_ci   }
4040bf215546Sopenharmony_ci
4041bf215546Sopenharmony_ci   // Make sure that we've processed all the values
4042bf215546Sopenharmony_ci   for (; ld1->defExists(d); ++d)
4043bf215546Sopenharmony_ci      assert(!(mask & (1 << d)));
4044bf215546Sopenharmony_ci
4045bf215546Sopenharmony_ci   updateLdStOffset(ld1, addr1, func);
4046bf215546Sopenharmony_ci   ld1->setType(typeOfSize(size1));
4047bf215546Sopenharmony_ci   for (d = 0; d < 4; ++d)
4048bf215546Sopenharmony_ci      ld1->setDef(d, (d < n1) ? def1[d] : NULL);
4049bf215546Sopenharmony_ci
4050bf215546Sopenharmony_ci   if (!n2)
4051bf215546Sopenharmony_ci      return;
4052bf215546Sopenharmony_ci
4053bf215546Sopenharmony_ci   ld2 = cloneShallow(func, ld1);
4054bf215546Sopenharmony_ci   updateLdStOffset(ld2, addr2, func);
4055bf215546Sopenharmony_ci   ld2->setType(typeOfSize(size2));
4056bf215546Sopenharmony_ci   for (d = 0; d < 4; ++d)
4057bf215546Sopenharmony_ci      ld2->setDef(d, (d < n2) ? def2[d] : NULL);
4058bf215546Sopenharmony_ci
4059bf215546Sopenharmony_ci   ld1->bb->insertAfter(ld1, ld2);
4060bf215546Sopenharmony_ci}
4061bf215546Sopenharmony_ci
4062bf215546Sopenharmony_ci// =============================================================================
4063bf215546Sopenharmony_ci
4064bf215546Sopenharmony_ci#define RUN_PASS(l, n, f)                       \
4065bf215546Sopenharmony_ci   if (level >= (l)) {                          \
4066bf215546Sopenharmony_ci      if (dbgFlags & NV50_IR_DEBUG_VERBOSE)     \
4067bf215546Sopenharmony_ci         INFO("PEEPHOLE: %s\n", #n);            \
4068bf215546Sopenharmony_ci      n pass;                                   \
4069bf215546Sopenharmony_ci      if (!pass.f(this))                        \
4070bf215546Sopenharmony_ci         return false;                          \
4071bf215546Sopenharmony_ci   }
4072bf215546Sopenharmony_ci
4073bf215546Sopenharmony_cibool
4074bf215546Sopenharmony_ciProgram::optimizeSSA(int level)
4075bf215546Sopenharmony_ci{
4076bf215546Sopenharmony_ci   RUN_PASS(1, DeadCodeElim, buryAll);
4077bf215546Sopenharmony_ci   RUN_PASS(1, CopyPropagation, run);
4078bf215546Sopenharmony_ci   RUN_PASS(1, MergeSplits, run);
4079bf215546Sopenharmony_ci   RUN_PASS(2, GlobalCSE, run);
4080bf215546Sopenharmony_ci   RUN_PASS(1, LocalCSE, run);
4081bf215546Sopenharmony_ci   RUN_PASS(2, AlgebraicOpt, run);
4082bf215546Sopenharmony_ci   RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks
4083bf215546Sopenharmony_ci   RUN_PASS(1, ConstantFolding, foldAll);
4084bf215546Sopenharmony_ci   RUN_PASS(0, Split64BitOpPreRA, run);
4085bf215546Sopenharmony_ci   RUN_PASS(2, LateAlgebraicOpt, run);
4086bf215546Sopenharmony_ci   RUN_PASS(1, LoadPropagation, run);
4087bf215546Sopenharmony_ci   RUN_PASS(1, IndirectPropagation, run);
4088bf215546Sopenharmony_ci   RUN_PASS(2, MemoryOpt, run);
4089bf215546Sopenharmony_ci   RUN_PASS(2, LocalCSE, run);
4090bf215546Sopenharmony_ci   RUN_PASS(0, DeadCodeElim, buryAll);
4091bf215546Sopenharmony_ci
4092bf215546Sopenharmony_ci   return true;
4093bf215546Sopenharmony_ci}
4094bf215546Sopenharmony_ci
4095bf215546Sopenharmony_cibool
4096bf215546Sopenharmony_ciProgram::optimizePostRA(int level)
4097bf215546Sopenharmony_ci{
4098bf215546Sopenharmony_ci   RUN_PASS(2, FlatteningPass, run);
4099bf215546Sopenharmony_ci   RUN_PASS(2, PostRaLoadPropagation, run);
4100bf215546Sopenharmony_ci
4101bf215546Sopenharmony_ci   return true;
4102bf215546Sopenharmony_ci}
4103bf215546Sopenharmony_ci
4104bf215546Sopenharmony_ci}
4105