1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2011 Christoph Bumiller 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 12bf215546Sopenharmony_ci * all copies or substantial portions of the Software. 13bf215546Sopenharmony_ci * 14bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 21bf215546Sopenharmony_ci */ 22bf215546Sopenharmony_ci 23bf215546Sopenharmony_ci#include "nv50_ir.h" 24bf215546Sopenharmony_ci#include "nv50_ir_target.h" 25bf215546Sopenharmony_ci#include "nv50_ir_build_util.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ciextern "C" { 28bf215546Sopenharmony_ci#include "util/u_math.h" 29bf215546Sopenharmony_ci} 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cinamespace nv50_ir { 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_cibool 34bf215546Sopenharmony_ciInstruction::isNop() const 35bf215546Sopenharmony_ci{ 36bf215546Sopenharmony_ci if (op == OP_PHI || op == OP_SPLIT || op == OP_MERGE || op == OP_CONSTRAINT) 37bf215546Sopenharmony_ci return true; 38bf215546Sopenharmony_ci if (terminator || join) // XXX: should terminator imply flow ? 39bf215546Sopenharmony_ci return false; 40bf215546Sopenharmony_ci if (op == OP_ATOM) 41bf215546Sopenharmony_ci return false; 42bf215546Sopenharmony_ci if (!fixed && op == OP_NOP) 43bf215546Sopenharmony_ci return true; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci if (defExists(0) && def(0).rep()->reg.data.id < 0) { 46bf215546Sopenharmony_ci for (int d = 1; defExists(d); ++d) 47bf215546Sopenharmony_ci if (def(d).rep()->reg.data.id >= 0) 48bf215546Sopenharmony_ci WARN("part of vector result is unused !\n"); 49bf215546Sopenharmony_ci return true; 50bf215546Sopenharmony_ci } 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci if (op == OP_MOV || op == OP_UNION) { 53bf215546Sopenharmony_ci if (!getDef(0)->equals(getSrc(0))) 54bf215546Sopenharmony_ci return false; 55bf215546Sopenharmony_ci if (op == OP_UNION) 56bf215546Sopenharmony_ci if (!getDef(0)->equals(getSrc(1))) 57bf215546Sopenharmony_ci return false; 58bf215546Sopenharmony_ci return true; 59bf215546Sopenharmony_ci } 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci return false; 62bf215546Sopenharmony_ci} 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_cibool Instruction::isDead() const 65bf215546Sopenharmony_ci{ 66bf215546Sopenharmony_ci if (op == OP_STORE || 67bf215546Sopenharmony_ci op == OP_EXPORT || 68bf215546Sopenharmony_ci op == OP_ATOM || 69bf215546Sopenharmony_ci op == OP_SUSTB || op == OP_SUSTP || op == OP_SUREDP || op == OP_SUREDB || 70bf215546Sopenharmony_ci op == OP_WRSV) 71bf215546Sopenharmony_ci return false; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci for (int d = 0; defExists(d); ++d) 74bf215546Sopenharmony_ci if (getDef(d)->refCount() || getDef(d)->reg.data.id >= 0) 75bf215546Sopenharmony_ci return false; 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci if (terminator || asFlow()) 78bf215546Sopenharmony_ci return false; 79bf215546Sopenharmony_ci if (fixed) 80bf215546Sopenharmony_ci return false; 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci return true; 83bf215546Sopenharmony_ci}; 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci// ============================================================================= 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ciclass CopyPropagation : public Pass 88bf215546Sopenharmony_ci{ 89bf215546Sopenharmony_ciprivate: 90bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 91bf215546Sopenharmony_ci}; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci// Propagate all MOVs forward to make subsequent optimization easier, except if 94bf215546Sopenharmony_ci// the sources stem from a phi, in which case we don't want to mess up potential 95bf215546Sopenharmony_ci// swaps $rX <-> $rY, i.e. do not create live range overlaps of phi src and def. 96bf215546Sopenharmony_cibool 97bf215546Sopenharmony_ciCopyPropagation::visit(BasicBlock *bb) 98bf215546Sopenharmony_ci{ 99bf215546Sopenharmony_ci Instruction *mov, *si, *next; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci for (mov = bb->getEntry(); mov; mov = next) { 102bf215546Sopenharmony_ci next = mov->next; 103bf215546Sopenharmony_ci if (mov->op != OP_MOV || mov->fixed || !mov->getSrc(0)->asLValue()) 104bf215546Sopenharmony_ci continue; 105bf215546Sopenharmony_ci if (mov->getPredicate()) 106bf215546Sopenharmony_ci continue; 107bf215546Sopenharmony_ci if (mov->def(0).getFile() != mov->src(0).getFile()) 108bf215546Sopenharmony_ci continue; 109bf215546Sopenharmony_ci si = mov->getSrc(0)->getInsn(); 110bf215546Sopenharmony_ci if (mov->getDef(0)->reg.data.id < 0 && si && si->op != OP_PHI) { 111bf215546Sopenharmony_ci // propagate 112bf215546Sopenharmony_ci mov->def(0).replace(mov->getSrc(0), false); 113bf215546Sopenharmony_ci delete_Instruction(prog, mov); 114bf215546Sopenharmony_ci } 115bf215546Sopenharmony_ci } 116bf215546Sopenharmony_ci return true; 117bf215546Sopenharmony_ci} 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci// ============================================================================= 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ciclass MergeSplits : public Pass 122bf215546Sopenharmony_ci{ 123bf215546Sopenharmony_ciprivate: 124bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 125bf215546Sopenharmony_ci}; 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci// For SPLIT / MERGE pairs that operate on the same registers, replace the 128bf215546Sopenharmony_ci// post-merge def with the SPLIT's source. 129bf215546Sopenharmony_cibool 130bf215546Sopenharmony_ciMergeSplits::visit(BasicBlock *bb) 131bf215546Sopenharmony_ci{ 132bf215546Sopenharmony_ci Instruction *i, *next, *si; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci for (i = bb->getEntry(); i; i = next) { 135bf215546Sopenharmony_ci next = i->next; 136bf215546Sopenharmony_ci if (i->op != OP_MERGE || typeSizeof(i->dType) != 8) 137bf215546Sopenharmony_ci continue; 138bf215546Sopenharmony_ci si = i->getSrc(0)->getInsn(); 139bf215546Sopenharmony_ci if (si->op != OP_SPLIT || si != i->getSrc(1)->getInsn()) 140bf215546Sopenharmony_ci continue; 141bf215546Sopenharmony_ci i->def(0).replace(si->getSrc(0), false); 142bf215546Sopenharmony_ci delete_Instruction(prog, i); 143bf215546Sopenharmony_ci } 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci return true; 146bf215546Sopenharmony_ci} 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci// ============================================================================= 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ciclass LoadPropagation : public Pass 151bf215546Sopenharmony_ci{ 152bf215546Sopenharmony_ciprivate: 153bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci void checkSwapSrc01(Instruction *); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci bool isCSpaceLoad(Instruction *); 158bf215546Sopenharmony_ci bool isImmdLoad(Instruction *); 159bf215546Sopenharmony_ci bool isAttribOrSharedLoad(Instruction *); 160bf215546Sopenharmony_ci}; 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_cibool 163bf215546Sopenharmony_ciLoadPropagation::isCSpaceLoad(Instruction *ld) 164bf215546Sopenharmony_ci{ 165bf215546Sopenharmony_ci return ld && ld->op == OP_LOAD && ld->src(0).getFile() == FILE_MEMORY_CONST; 166bf215546Sopenharmony_ci} 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_cibool 169bf215546Sopenharmony_ciLoadPropagation::isImmdLoad(Instruction *ld) 170bf215546Sopenharmony_ci{ 171bf215546Sopenharmony_ci if (!ld || (ld->op != OP_MOV) || 172bf215546Sopenharmony_ci ((typeSizeof(ld->dType) != 4) && (typeSizeof(ld->dType) != 8))) 173bf215546Sopenharmony_ci return false; 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci // A 0 can be replaced with a register, so it doesn't count as an immediate. 176bf215546Sopenharmony_ci ImmediateValue val; 177bf215546Sopenharmony_ci return ld->src(0).getImmediate(val) && !val.isInteger(0); 178bf215546Sopenharmony_ci} 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_cibool 181bf215546Sopenharmony_ciLoadPropagation::isAttribOrSharedLoad(Instruction *ld) 182bf215546Sopenharmony_ci{ 183bf215546Sopenharmony_ci return ld && 184bf215546Sopenharmony_ci (ld->op == OP_VFETCH || 185bf215546Sopenharmony_ci (ld->op == OP_LOAD && 186bf215546Sopenharmony_ci (ld->src(0).getFile() == FILE_SHADER_INPUT || 187bf215546Sopenharmony_ci ld->src(0).getFile() == FILE_MEMORY_SHARED))); 188bf215546Sopenharmony_ci} 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_civoid 191bf215546Sopenharmony_ciLoadPropagation::checkSwapSrc01(Instruction *insn) 192bf215546Sopenharmony_ci{ 193bf215546Sopenharmony_ci const Target *targ = prog->getTarget(); 194bf215546Sopenharmony_ci if (!targ->getOpInfo(insn).commutative) { 195bf215546Sopenharmony_ci if (insn->op != OP_SET && insn->op != OP_SLCT && 196bf215546Sopenharmony_ci insn->op != OP_SUB && insn->op != OP_XMAD) 197bf215546Sopenharmony_ci return; 198bf215546Sopenharmony_ci // XMAD is only commutative if both the CBCC and MRG flags are not set. 199bf215546Sopenharmony_ci if (insn->op == OP_XMAD && 200bf215546Sopenharmony_ci (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK) == NV50_IR_SUBOP_XMAD_CBCC) 201bf215546Sopenharmony_ci return; 202bf215546Sopenharmony_ci if (insn->op == OP_XMAD && (insn->subOp & NV50_IR_SUBOP_XMAD_MRG)) 203bf215546Sopenharmony_ci return; 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci if (insn->src(1).getFile() != FILE_GPR) 206bf215546Sopenharmony_ci return; 207bf215546Sopenharmony_ci // This is the special OP_SET used for alphatesting, we can't reverse its 208bf215546Sopenharmony_ci // arguments as that will confuse the fixup code. 209bf215546Sopenharmony_ci if (insn->op == OP_SET && insn->subOp) 210bf215546Sopenharmony_ci return; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci Instruction *i0 = insn->getSrc(0)->getInsn(); 213bf215546Sopenharmony_ci Instruction *i1 = insn->getSrc(1)->getInsn(); 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci // Swap sources to inline the less frequently used source. That way, 216bf215546Sopenharmony_ci // optimistically, it will eventually be able to remove the instruction. 217bf215546Sopenharmony_ci int i0refs = insn->getSrc(0)->refCount(); 218bf215546Sopenharmony_ci int i1refs = insn->getSrc(1)->refCount(); 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci if ((isCSpaceLoad(i0) || isImmdLoad(i0)) && targ->insnCanLoad(insn, 1, i0)) { 221bf215546Sopenharmony_ci if ((!isImmdLoad(i1) && !isCSpaceLoad(i1)) || 222bf215546Sopenharmony_ci !targ->insnCanLoad(insn, 1, i1) || 223bf215546Sopenharmony_ci i0refs < i1refs) 224bf215546Sopenharmony_ci insn->swapSources(0, 1); 225bf215546Sopenharmony_ci else 226bf215546Sopenharmony_ci return; 227bf215546Sopenharmony_ci } else 228bf215546Sopenharmony_ci if (isAttribOrSharedLoad(i1)) { 229bf215546Sopenharmony_ci if (!isAttribOrSharedLoad(i0)) 230bf215546Sopenharmony_ci insn->swapSources(0, 1); 231bf215546Sopenharmony_ci else 232bf215546Sopenharmony_ci return; 233bf215546Sopenharmony_ci } else { 234bf215546Sopenharmony_ci return; 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci if (insn->op == OP_SET || insn->op == OP_SET_AND || 238bf215546Sopenharmony_ci insn->op == OP_SET_OR || insn->op == OP_SET_XOR) 239bf215546Sopenharmony_ci insn->asCmp()->setCond = reverseCondCode(insn->asCmp()->setCond); 240bf215546Sopenharmony_ci else 241bf215546Sopenharmony_ci if (insn->op == OP_SLCT) 242bf215546Sopenharmony_ci insn->asCmp()->setCond = inverseCondCode(insn->asCmp()->setCond); 243bf215546Sopenharmony_ci else 244bf215546Sopenharmony_ci if (insn->op == OP_SUB) { 245bf215546Sopenharmony_ci insn->src(0).mod = insn->src(0).mod ^ Modifier(NV50_IR_MOD_NEG); 246bf215546Sopenharmony_ci insn->src(1).mod = insn->src(1).mod ^ Modifier(NV50_IR_MOD_NEG); 247bf215546Sopenharmony_ci } else 248bf215546Sopenharmony_ci if (insn->op == OP_XMAD) { 249bf215546Sopenharmony_ci // swap h1 flags 250bf215546Sopenharmony_ci uint16_t h1 = (insn->subOp >> 1 & NV50_IR_SUBOP_XMAD_H1(0)) | 251bf215546Sopenharmony_ci (insn->subOp << 1 & NV50_IR_SUBOP_XMAD_H1(1)); 252bf215546Sopenharmony_ci insn->subOp = (insn->subOp & ~NV50_IR_SUBOP_XMAD_H1_MASK) | h1; 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci} 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_cibool 257bf215546Sopenharmony_ciLoadPropagation::visit(BasicBlock *bb) 258bf215546Sopenharmony_ci{ 259bf215546Sopenharmony_ci const Target *targ = prog->getTarget(); 260bf215546Sopenharmony_ci Instruction *next; 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci for (Instruction *i = bb->getEntry(); i; i = next) { 263bf215546Sopenharmony_ci next = i->next; 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci if (i->op == OP_CALL) // calls have args as sources, they must be in regs 266bf215546Sopenharmony_ci continue; 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci if (i->op == OP_PFETCH) // pfetch expects arg1 to be a reg 269bf215546Sopenharmony_ci continue; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci if (i->srcExists(1)) 272bf215546Sopenharmony_ci checkSwapSrc01(i); 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci for (int s = 0; i->srcExists(s); ++s) { 275bf215546Sopenharmony_ci Instruction *ld = i->getSrc(s)->getInsn(); 276bf215546Sopenharmony_ci 277bf215546Sopenharmony_ci if (!ld || ld->fixed || (ld->op != OP_LOAD && ld->op != OP_MOV)) 278bf215546Sopenharmony_ci continue; 279bf215546Sopenharmony_ci if (ld->op == OP_LOAD && ld->subOp == NV50_IR_SUBOP_LOAD_LOCKED) 280bf215546Sopenharmony_ci continue; 281bf215546Sopenharmony_ci if (!targ->insnCanLoad(i, s, ld)) 282bf215546Sopenharmony_ci continue; 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci // propagate ! 285bf215546Sopenharmony_ci i->setSrc(s, ld->getSrc(0)); 286bf215546Sopenharmony_ci if (ld->src(0).isIndirect(0)) 287bf215546Sopenharmony_ci i->setIndirect(s, 0, ld->getIndirect(0, 0)); 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci if (ld->getDef(0)->refCount() == 0) 290bf215546Sopenharmony_ci delete_Instruction(prog, ld); 291bf215546Sopenharmony_ci } 292bf215546Sopenharmony_ci } 293bf215546Sopenharmony_ci return true; 294bf215546Sopenharmony_ci} 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci// ============================================================================= 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ciclass IndirectPropagation : public Pass 299bf215546Sopenharmony_ci{ 300bf215546Sopenharmony_ciprivate: 301bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci BuildUtil bld; 304bf215546Sopenharmony_ci}; 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_cibool 307bf215546Sopenharmony_ciIndirectPropagation::visit(BasicBlock *bb) 308bf215546Sopenharmony_ci{ 309bf215546Sopenharmony_ci const Target *targ = prog->getTarget(); 310bf215546Sopenharmony_ci Instruction *next; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci for (Instruction *i = bb->getEntry(); i; i = next) { 313bf215546Sopenharmony_ci next = i->next; 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci bld.setPosition(i, false); 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci for (int s = 0; i->srcExists(s); ++s) { 318bf215546Sopenharmony_ci Instruction *insn; 319bf215546Sopenharmony_ci ImmediateValue imm; 320bf215546Sopenharmony_ci if (!i->src(s).isIndirect(0)) 321bf215546Sopenharmony_ci continue; 322bf215546Sopenharmony_ci insn = i->getIndirect(s, 0)->getInsn(); 323bf215546Sopenharmony_ci if (!insn) 324bf215546Sopenharmony_ci continue; 325bf215546Sopenharmony_ci if (insn->op == OP_ADD && !isFloatType(insn->dType)) { 326bf215546Sopenharmony_ci if (insn->src(0).getFile() != targ->nativeFile(FILE_ADDRESS) || 327bf215546Sopenharmony_ci !insn->src(1).getImmediate(imm) || 328bf215546Sopenharmony_ci !targ->insnCanLoadOffset(i, s, imm.reg.data.s32)) 329bf215546Sopenharmony_ci continue; 330bf215546Sopenharmony_ci i->setIndirect(s, 0, insn->getSrc(0)); 331bf215546Sopenharmony_ci i->setSrc(s, cloneShallow(func, i->getSrc(s))); 332bf215546Sopenharmony_ci i->src(s).get()->reg.data.offset += imm.reg.data.u32; 333bf215546Sopenharmony_ci } else if (insn->op == OP_SUB && !isFloatType(insn->dType)) { 334bf215546Sopenharmony_ci if (insn->src(0).getFile() != targ->nativeFile(FILE_ADDRESS) || 335bf215546Sopenharmony_ci !insn->src(1).getImmediate(imm) || 336bf215546Sopenharmony_ci !targ->insnCanLoadOffset(i, s, -imm.reg.data.s32)) 337bf215546Sopenharmony_ci continue; 338bf215546Sopenharmony_ci i->setIndirect(s, 0, insn->getSrc(0)); 339bf215546Sopenharmony_ci i->setSrc(s, cloneShallow(func, i->getSrc(s))); 340bf215546Sopenharmony_ci i->src(s).get()->reg.data.offset -= imm.reg.data.u32; 341bf215546Sopenharmony_ci } else if (insn->op == OP_MOV) { 342bf215546Sopenharmony_ci if (!insn->src(0).getImmediate(imm) || 343bf215546Sopenharmony_ci !targ->insnCanLoadOffset(i, s, imm.reg.data.s32)) 344bf215546Sopenharmony_ci continue; 345bf215546Sopenharmony_ci i->setIndirect(s, 0, NULL); 346bf215546Sopenharmony_ci i->setSrc(s, cloneShallow(func, i->getSrc(s))); 347bf215546Sopenharmony_ci i->src(s).get()->reg.data.offset += imm.reg.data.u32; 348bf215546Sopenharmony_ci } else if (insn->op == OP_SHLADD) { 349bf215546Sopenharmony_ci if (!insn->src(2).getImmediate(imm) || 350bf215546Sopenharmony_ci !targ->insnCanLoadOffset(i, s, imm.reg.data.s32)) 351bf215546Sopenharmony_ci continue; 352bf215546Sopenharmony_ci i->setIndirect(s, 0, bld.mkOp2v( 353bf215546Sopenharmony_ci OP_SHL, TYPE_U32, bld.getSSA(), insn->getSrc(0), insn->getSrc(1))); 354bf215546Sopenharmony_ci i->setSrc(s, cloneShallow(func, i->getSrc(s))); 355bf215546Sopenharmony_ci i->src(s).get()->reg.data.offset += imm.reg.data.u32; 356bf215546Sopenharmony_ci } 357bf215546Sopenharmony_ci } 358bf215546Sopenharmony_ci } 359bf215546Sopenharmony_ci return true; 360bf215546Sopenharmony_ci} 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_ci// ============================================================================= 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci// Evaluate constant expressions. 365bf215546Sopenharmony_ciclass ConstantFolding : public Pass 366bf215546Sopenharmony_ci{ 367bf215546Sopenharmony_cipublic: 368bf215546Sopenharmony_ci ConstantFolding() : foldCount(0) {} 369bf215546Sopenharmony_ci bool foldAll(Program *); 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ciprivate: 372bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci void expr(Instruction *, ImmediateValue&, ImmediateValue&); 375bf215546Sopenharmony_ci void expr(Instruction *, ImmediateValue&, ImmediateValue&, ImmediateValue&); 376bf215546Sopenharmony_ci /* true if i was deleted */ 377bf215546Sopenharmony_ci bool opnd(Instruction *i, ImmediateValue&, int s); 378bf215546Sopenharmony_ci void opnd3(Instruction *, ImmediateValue&); 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci void unary(Instruction *, const ImmediateValue&); 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci void tryCollapseChainedMULs(Instruction *, const int s, ImmediateValue&); 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci CmpInstruction *findOriginForTestWithZero(Value *); 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_ci bool createMul(DataType ty, Value *def, Value *a, int64_t b, Value *c); 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci unsigned int foldCount; 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci BuildUtil bld; 391bf215546Sopenharmony_ci}; 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci// TODO: remember generated immediates and only revisit these 394bf215546Sopenharmony_cibool 395bf215546Sopenharmony_ciConstantFolding::foldAll(Program *prog) 396bf215546Sopenharmony_ci{ 397bf215546Sopenharmony_ci unsigned int iterCount = 0; 398bf215546Sopenharmony_ci do { 399bf215546Sopenharmony_ci foldCount = 0; 400bf215546Sopenharmony_ci if (!run(prog)) 401bf215546Sopenharmony_ci return false; 402bf215546Sopenharmony_ci } while (foldCount && ++iterCount < 2); 403bf215546Sopenharmony_ci return true; 404bf215546Sopenharmony_ci} 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_cibool 407bf215546Sopenharmony_ciConstantFolding::visit(BasicBlock *bb) 408bf215546Sopenharmony_ci{ 409bf215546Sopenharmony_ci Instruction *i, *next; 410bf215546Sopenharmony_ci 411bf215546Sopenharmony_ci for (i = bb->getEntry(); i; i = next) { 412bf215546Sopenharmony_ci next = i->next; 413bf215546Sopenharmony_ci if (i->op == OP_MOV || i->op == OP_CALL) 414bf215546Sopenharmony_ci continue; 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci ImmediateValue src0, src1, src2; 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci if (i->srcExists(2) && 419bf215546Sopenharmony_ci i->src(0).getImmediate(src0) && 420bf215546Sopenharmony_ci i->src(1).getImmediate(src1) && 421bf215546Sopenharmony_ci i->src(2).getImmediate(src2)) { 422bf215546Sopenharmony_ci expr(i, src0, src1, src2); 423bf215546Sopenharmony_ci } else 424bf215546Sopenharmony_ci if (i->srcExists(1) && 425bf215546Sopenharmony_ci i->src(0).getImmediate(src0) && i->src(1).getImmediate(src1)) { 426bf215546Sopenharmony_ci expr(i, src0, src1); 427bf215546Sopenharmony_ci } else 428bf215546Sopenharmony_ci if (i->srcExists(0) && i->src(0).getImmediate(src0)) { 429bf215546Sopenharmony_ci if (opnd(i, src0, 0)) 430bf215546Sopenharmony_ci continue; 431bf215546Sopenharmony_ci } else 432bf215546Sopenharmony_ci if (i->srcExists(1) && i->src(1).getImmediate(src1)) { 433bf215546Sopenharmony_ci if (opnd(i, src1, 1)) 434bf215546Sopenharmony_ci continue; 435bf215546Sopenharmony_ci } 436bf215546Sopenharmony_ci if (i->srcExists(2) && i->src(2).getImmediate(src2)) 437bf215546Sopenharmony_ci opnd3(i, src2); 438bf215546Sopenharmony_ci } 439bf215546Sopenharmony_ci return true; 440bf215546Sopenharmony_ci} 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ciCmpInstruction * 443bf215546Sopenharmony_ciConstantFolding::findOriginForTestWithZero(Value *value) 444bf215546Sopenharmony_ci{ 445bf215546Sopenharmony_ci if (!value) 446bf215546Sopenharmony_ci return NULL; 447bf215546Sopenharmony_ci Instruction *insn = value->getInsn(); 448bf215546Sopenharmony_ci if (!insn) 449bf215546Sopenharmony_ci return NULL; 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci if (insn->asCmp() && insn->op != OP_SLCT) 452bf215546Sopenharmony_ci return insn->asCmp(); 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci /* Sometimes mov's will sneak in as a result of other folding. This gets 455bf215546Sopenharmony_ci * cleaned up later. 456bf215546Sopenharmony_ci */ 457bf215546Sopenharmony_ci if (insn->op == OP_MOV) 458bf215546Sopenharmony_ci return findOriginForTestWithZero(insn->getSrc(0)); 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci /* Deal with AND 1.0 here since nv50 can't fold into boolean float */ 461bf215546Sopenharmony_ci if (insn->op == OP_AND) { 462bf215546Sopenharmony_ci int s = 0; 463bf215546Sopenharmony_ci ImmediateValue imm; 464bf215546Sopenharmony_ci if (!insn->src(s).getImmediate(imm)) { 465bf215546Sopenharmony_ci s = 1; 466bf215546Sopenharmony_ci if (!insn->src(s).getImmediate(imm)) 467bf215546Sopenharmony_ci return NULL; 468bf215546Sopenharmony_ci } 469bf215546Sopenharmony_ci if (imm.reg.data.f32 != 1.0f) 470bf215546Sopenharmony_ci return NULL; 471bf215546Sopenharmony_ci /* TODO: Come up with a way to handle the condition being inverted */ 472bf215546Sopenharmony_ci if (insn->src(!s).mod != Modifier(0)) 473bf215546Sopenharmony_ci return NULL; 474bf215546Sopenharmony_ci return findOriginForTestWithZero(insn->getSrc(!s)); 475bf215546Sopenharmony_ci } 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci return NULL; 478bf215546Sopenharmony_ci} 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_civoid 481bf215546Sopenharmony_ciModifier::applyTo(ImmediateValue& imm) const 482bf215546Sopenharmony_ci{ 483bf215546Sopenharmony_ci if (!bits) // avoid failure if imm.reg.type is unhandled (e.g. b128) 484bf215546Sopenharmony_ci return; 485bf215546Sopenharmony_ci switch (imm.reg.type) { 486bf215546Sopenharmony_ci case TYPE_F32: 487bf215546Sopenharmony_ci if (bits & NV50_IR_MOD_ABS) 488bf215546Sopenharmony_ci imm.reg.data.f32 = fabsf(imm.reg.data.f32); 489bf215546Sopenharmony_ci if (bits & NV50_IR_MOD_NEG) 490bf215546Sopenharmony_ci imm.reg.data.f32 = -imm.reg.data.f32; 491bf215546Sopenharmony_ci if (bits & NV50_IR_MOD_SAT) { 492bf215546Sopenharmony_ci if (imm.reg.data.f32 < 0.0f) 493bf215546Sopenharmony_ci imm.reg.data.f32 = 0.0f; 494bf215546Sopenharmony_ci else 495bf215546Sopenharmony_ci if (imm.reg.data.f32 > 1.0f) 496bf215546Sopenharmony_ci imm.reg.data.f32 = 1.0f; 497bf215546Sopenharmony_ci } 498bf215546Sopenharmony_ci assert(!(bits & NV50_IR_MOD_NOT)); 499bf215546Sopenharmony_ci break; 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci case TYPE_S8: // NOTE: will be extended 502bf215546Sopenharmony_ci case TYPE_S16: 503bf215546Sopenharmony_ci case TYPE_S32: 504bf215546Sopenharmony_ci case TYPE_U8: // NOTE: treated as signed 505bf215546Sopenharmony_ci case TYPE_U16: 506bf215546Sopenharmony_ci case TYPE_U32: 507bf215546Sopenharmony_ci if (bits & NV50_IR_MOD_ABS) 508bf215546Sopenharmony_ci imm.reg.data.s32 = (imm.reg.data.s32 >= 0) ? 509bf215546Sopenharmony_ci imm.reg.data.s32 : -imm.reg.data.s32; 510bf215546Sopenharmony_ci if (bits & NV50_IR_MOD_NEG) 511bf215546Sopenharmony_ci imm.reg.data.s32 = -imm.reg.data.s32; 512bf215546Sopenharmony_ci if (bits & NV50_IR_MOD_NOT) 513bf215546Sopenharmony_ci imm.reg.data.s32 = ~imm.reg.data.s32; 514bf215546Sopenharmony_ci break; 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_ci case TYPE_F64: 517bf215546Sopenharmony_ci if (bits & NV50_IR_MOD_ABS) 518bf215546Sopenharmony_ci imm.reg.data.f64 = fabs(imm.reg.data.f64); 519bf215546Sopenharmony_ci if (bits & NV50_IR_MOD_NEG) 520bf215546Sopenharmony_ci imm.reg.data.f64 = -imm.reg.data.f64; 521bf215546Sopenharmony_ci if (bits & NV50_IR_MOD_SAT) { 522bf215546Sopenharmony_ci if (imm.reg.data.f64 < 0.0) 523bf215546Sopenharmony_ci imm.reg.data.f64 = 0.0; 524bf215546Sopenharmony_ci else 525bf215546Sopenharmony_ci if (imm.reg.data.f64 > 1.0) 526bf215546Sopenharmony_ci imm.reg.data.f64 = 1.0; 527bf215546Sopenharmony_ci } 528bf215546Sopenharmony_ci assert(!(bits & NV50_IR_MOD_NOT)); 529bf215546Sopenharmony_ci break; 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci default: 532bf215546Sopenharmony_ci assert(!"invalid/unhandled type"); 533bf215546Sopenharmony_ci imm.reg.data.u64 = 0; 534bf215546Sopenharmony_ci break; 535bf215546Sopenharmony_ci } 536bf215546Sopenharmony_ci} 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_cioperation 539bf215546Sopenharmony_ciModifier::getOp() const 540bf215546Sopenharmony_ci{ 541bf215546Sopenharmony_ci switch (bits) { 542bf215546Sopenharmony_ci case NV50_IR_MOD_ABS: return OP_ABS; 543bf215546Sopenharmony_ci case NV50_IR_MOD_NEG: return OP_NEG; 544bf215546Sopenharmony_ci case NV50_IR_MOD_SAT: return OP_SAT; 545bf215546Sopenharmony_ci case NV50_IR_MOD_NOT: return OP_NOT; 546bf215546Sopenharmony_ci case 0: 547bf215546Sopenharmony_ci return OP_MOV; 548bf215546Sopenharmony_ci default: 549bf215546Sopenharmony_ci return OP_CVT; 550bf215546Sopenharmony_ci } 551bf215546Sopenharmony_ci} 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_civoid 554bf215546Sopenharmony_ciConstantFolding::expr(Instruction *i, 555bf215546Sopenharmony_ci ImmediateValue &imm0, ImmediateValue &imm1) 556bf215546Sopenharmony_ci{ 557bf215546Sopenharmony_ci struct Storage *const a = &imm0.reg, *const b = &imm1.reg; 558bf215546Sopenharmony_ci struct Storage res; 559bf215546Sopenharmony_ci DataType type = i->dType; 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci memset(&res.data, 0, sizeof(res.data)); 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci switch (i->op) { 564bf215546Sopenharmony_ci case OP_SGXT: { 565bf215546Sopenharmony_ci int bits = b->data.u32; 566bf215546Sopenharmony_ci if (bits) { 567bf215546Sopenharmony_ci uint32_t data = a->data.u32 & (0xffffffff >> (32 - bits)); 568bf215546Sopenharmony_ci if (bits < 32 && (data & (1 << (bits - 1)))) 569bf215546Sopenharmony_ci data = data - (1 << bits); 570bf215546Sopenharmony_ci res.data.u32 = data; 571bf215546Sopenharmony_ci } 572bf215546Sopenharmony_ci break; 573bf215546Sopenharmony_ci } 574bf215546Sopenharmony_ci case OP_BMSK: 575bf215546Sopenharmony_ci res.data.u32 = ((1 << b->data.u32) - 1) << a->data.u32; 576bf215546Sopenharmony_ci break; 577bf215546Sopenharmony_ci case OP_MAD: 578bf215546Sopenharmony_ci case OP_FMA: 579bf215546Sopenharmony_ci case OP_MUL: 580bf215546Sopenharmony_ci if (i->dnz && i->dType == TYPE_F32) { 581bf215546Sopenharmony_ci if (!isfinite(a->data.f32)) 582bf215546Sopenharmony_ci a->data.f32 = 0.0f; 583bf215546Sopenharmony_ci if (!isfinite(b->data.f32)) 584bf215546Sopenharmony_ci b->data.f32 = 0.0f; 585bf215546Sopenharmony_ci } 586bf215546Sopenharmony_ci switch (i->dType) { 587bf215546Sopenharmony_ci case TYPE_F32: 588bf215546Sopenharmony_ci res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor); 589bf215546Sopenharmony_ci break; 590bf215546Sopenharmony_ci case TYPE_F64: res.data.f64 = a->data.f64 * b->data.f64; break; 591bf215546Sopenharmony_ci case TYPE_S32: 592bf215546Sopenharmony_ci if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { 593bf215546Sopenharmony_ci res.data.s32 = ((int64_t)a->data.s32 * b->data.s32) >> 32; 594bf215546Sopenharmony_ci break; 595bf215546Sopenharmony_ci } 596bf215546Sopenharmony_ci FALLTHROUGH; 597bf215546Sopenharmony_ci case TYPE_U32: 598bf215546Sopenharmony_ci if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { 599bf215546Sopenharmony_ci res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32) >> 32; 600bf215546Sopenharmony_ci break; 601bf215546Sopenharmony_ci } 602bf215546Sopenharmony_ci res.data.u32 = a->data.u32 * b->data.u32; break; 603bf215546Sopenharmony_ci default: 604bf215546Sopenharmony_ci return; 605bf215546Sopenharmony_ci } 606bf215546Sopenharmony_ci break; 607bf215546Sopenharmony_ci case OP_DIV: 608bf215546Sopenharmony_ci if (b->data.u32 == 0) 609bf215546Sopenharmony_ci break; 610bf215546Sopenharmony_ci switch (i->dType) { 611bf215546Sopenharmony_ci case TYPE_F32: res.data.f32 = a->data.f32 / b->data.f32; break; 612bf215546Sopenharmony_ci case TYPE_F64: res.data.f64 = a->data.f64 / b->data.f64; break; 613bf215546Sopenharmony_ci case TYPE_S32: res.data.s32 = a->data.s32 / b->data.s32; break; 614bf215546Sopenharmony_ci case TYPE_U32: res.data.u32 = a->data.u32 / b->data.u32; break; 615bf215546Sopenharmony_ci default: 616bf215546Sopenharmony_ci return; 617bf215546Sopenharmony_ci } 618bf215546Sopenharmony_ci break; 619bf215546Sopenharmony_ci case OP_ADD: 620bf215546Sopenharmony_ci switch (i->dType) { 621bf215546Sopenharmony_ci case TYPE_F32: res.data.f32 = a->data.f32 + b->data.f32; break; 622bf215546Sopenharmony_ci case TYPE_F64: res.data.f64 = a->data.f64 + b->data.f64; break; 623bf215546Sopenharmony_ci case TYPE_S32: 624bf215546Sopenharmony_ci case TYPE_U32: res.data.u32 = a->data.u32 + b->data.u32; break; 625bf215546Sopenharmony_ci default: 626bf215546Sopenharmony_ci return; 627bf215546Sopenharmony_ci } 628bf215546Sopenharmony_ci break; 629bf215546Sopenharmony_ci case OP_SUB: 630bf215546Sopenharmony_ci switch (i->dType) { 631bf215546Sopenharmony_ci case TYPE_F32: res.data.f32 = a->data.f32 - b->data.f32; break; 632bf215546Sopenharmony_ci case TYPE_F64: res.data.f64 = a->data.f64 - b->data.f64; break; 633bf215546Sopenharmony_ci case TYPE_S32: 634bf215546Sopenharmony_ci case TYPE_U32: res.data.u32 = a->data.u32 - b->data.u32; break; 635bf215546Sopenharmony_ci default: 636bf215546Sopenharmony_ci return; 637bf215546Sopenharmony_ci } 638bf215546Sopenharmony_ci break; 639bf215546Sopenharmony_ci case OP_POW: 640bf215546Sopenharmony_ci switch (i->dType) { 641bf215546Sopenharmony_ci case TYPE_F32: res.data.f32 = pow(a->data.f32, b->data.f32); break; 642bf215546Sopenharmony_ci case TYPE_F64: res.data.f64 = pow(a->data.f64, b->data.f64); break; 643bf215546Sopenharmony_ci default: 644bf215546Sopenharmony_ci return; 645bf215546Sopenharmony_ci } 646bf215546Sopenharmony_ci break; 647bf215546Sopenharmony_ci case OP_MAX: 648bf215546Sopenharmony_ci switch (i->dType) { 649bf215546Sopenharmony_ci case TYPE_F32: res.data.f32 = MAX2(a->data.f32, b->data.f32); break; 650bf215546Sopenharmony_ci case TYPE_F64: res.data.f64 = MAX2(a->data.f64, b->data.f64); break; 651bf215546Sopenharmony_ci case TYPE_S32: res.data.s32 = MAX2(a->data.s32, b->data.s32); break; 652bf215546Sopenharmony_ci case TYPE_U32: res.data.u32 = MAX2(a->data.u32, b->data.u32); break; 653bf215546Sopenharmony_ci default: 654bf215546Sopenharmony_ci return; 655bf215546Sopenharmony_ci } 656bf215546Sopenharmony_ci break; 657bf215546Sopenharmony_ci case OP_MIN: 658bf215546Sopenharmony_ci switch (i->dType) { 659bf215546Sopenharmony_ci case TYPE_F32: res.data.f32 = MIN2(a->data.f32, b->data.f32); break; 660bf215546Sopenharmony_ci case TYPE_F64: res.data.f64 = MIN2(a->data.f64, b->data.f64); break; 661bf215546Sopenharmony_ci case TYPE_S32: res.data.s32 = MIN2(a->data.s32, b->data.s32); break; 662bf215546Sopenharmony_ci case TYPE_U32: res.data.u32 = MIN2(a->data.u32, b->data.u32); break; 663bf215546Sopenharmony_ci default: 664bf215546Sopenharmony_ci return; 665bf215546Sopenharmony_ci } 666bf215546Sopenharmony_ci break; 667bf215546Sopenharmony_ci case OP_AND: 668bf215546Sopenharmony_ci res.data.u64 = a->data.u64 & b->data.u64; 669bf215546Sopenharmony_ci break; 670bf215546Sopenharmony_ci case OP_OR: 671bf215546Sopenharmony_ci res.data.u64 = a->data.u64 | b->data.u64; 672bf215546Sopenharmony_ci break; 673bf215546Sopenharmony_ci case OP_XOR: 674bf215546Sopenharmony_ci res.data.u64 = a->data.u64 ^ b->data.u64; 675bf215546Sopenharmony_ci break; 676bf215546Sopenharmony_ci case OP_SHL: 677bf215546Sopenharmony_ci res.data.u32 = a->data.u32 << b->data.u32; 678bf215546Sopenharmony_ci break; 679bf215546Sopenharmony_ci case OP_SHR: 680bf215546Sopenharmony_ci switch (i->dType) { 681bf215546Sopenharmony_ci case TYPE_S32: res.data.s32 = a->data.s32 >> b->data.u32; break; 682bf215546Sopenharmony_ci case TYPE_U32: res.data.u32 = a->data.u32 >> b->data.u32; break; 683bf215546Sopenharmony_ci default: 684bf215546Sopenharmony_ci return; 685bf215546Sopenharmony_ci } 686bf215546Sopenharmony_ci break; 687bf215546Sopenharmony_ci case OP_SLCT: 688bf215546Sopenharmony_ci if (a->data.u32 != b->data.u32) 689bf215546Sopenharmony_ci return; 690bf215546Sopenharmony_ci res.data.u32 = a->data.u32; 691bf215546Sopenharmony_ci break; 692bf215546Sopenharmony_ci case OP_EXTBF: { 693bf215546Sopenharmony_ci int offset = b->data.u32 & 0xff; 694bf215546Sopenharmony_ci int width = (b->data.u32 >> 8) & 0xff; 695bf215546Sopenharmony_ci int rshift = offset; 696bf215546Sopenharmony_ci int lshift = 0; 697bf215546Sopenharmony_ci if (width == 0) { 698bf215546Sopenharmony_ci res.data.u32 = 0; 699bf215546Sopenharmony_ci break; 700bf215546Sopenharmony_ci } 701bf215546Sopenharmony_ci if (width + offset < 32) { 702bf215546Sopenharmony_ci rshift = 32 - width; 703bf215546Sopenharmony_ci lshift = 32 - width - offset; 704bf215546Sopenharmony_ci } 705bf215546Sopenharmony_ci if (i->subOp == NV50_IR_SUBOP_EXTBF_REV) 706bf215546Sopenharmony_ci res.data.u32 = util_bitreverse(a->data.u32); 707bf215546Sopenharmony_ci else 708bf215546Sopenharmony_ci res.data.u32 = a->data.u32; 709bf215546Sopenharmony_ci switch (i->dType) { 710bf215546Sopenharmony_ci case TYPE_S32: res.data.s32 = (res.data.s32 << lshift) >> rshift; break; 711bf215546Sopenharmony_ci case TYPE_U32: res.data.u32 = (res.data.u32 << lshift) >> rshift; break; 712bf215546Sopenharmony_ci default: 713bf215546Sopenharmony_ci return; 714bf215546Sopenharmony_ci } 715bf215546Sopenharmony_ci break; 716bf215546Sopenharmony_ci } 717bf215546Sopenharmony_ci case OP_POPCNT: 718bf215546Sopenharmony_ci res.data.u32 = util_bitcount(a->data.u32 & b->data.u32); 719bf215546Sopenharmony_ci break; 720bf215546Sopenharmony_ci case OP_PFETCH: 721bf215546Sopenharmony_ci // The two arguments to pfetch are logically added together. Normally 722bf215546Sopenharmony_ci // the second argument will not be constant, but that can happen. 723bf215546Sopenharmony_ci res.data.u32 = a->data.u32 + b->data.u32; 724bf215546Sopenharmony_ci type = TYPE_U32; 725bf215546Sopenharmony_ci break; 726bf215546Sopenharmony_ci case OP_MERGE: 727bf215546Sopenharmony_ci switch (i->dType) { 728bf215546Sopenharmony_ci case TYPE_U64: 729bf215546Sopenharmony_ci case TYPE_S64: 730bf215546Sopenharmony_ci case TYPE_F64: 731bf215546Sopenharmony_ci res.data.u64 = (((uint64_t)b->data.u32) << 32) | a->data.u32; 732bf215546Sopenharmony_ci break; 733bf215546Sopenharmony_ci default: 734bf215546Sopenharmony_ci return; 735bf215546Sopenharmony_ci } 736bf215546Sopenharmony_ci break; 737bf215546Sopenharmony_ci default: 738bf215546Sopenharmony_ci return; 739bf215546Sopenharmony_ci } 740bf215546Sopenharmony_ci ++foldCount; 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ci i->src(0).mod = Modifier(0); 743bf215546Sopenharmony_ci i->src(1).mod = Modifier(0); 744bf215546Sopenharmony_ci i->postFactor = 0; 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); 747bf215546Sopenharmony_ci i->setSrc(1, NULL); 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci i->getSrc(0)->reg.data = res.data; 750bf215546Sopenharmony_ci i->getSrc(0)->reg.type = type; 751bf215546Sopenharmony_ci i->getSrc(0)->reg.size = typeSizeof(type); 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_ci switch (i->op) { 754bf215546Sopenharmony_ci case OP_MAD: 755bf215546Sopenharmony_ci case OP_FMA: { 756bf215546Sopenharmony_ci ImmediateValue src0, src1 = *i->getSrc(0)->asImm(); 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci // Move the immediate into position 1, where we know it might be 759bf215546Sopenharmony_ci // emittable. However it might not be anyways, as there may be other 760bf215546Sopenharmony_ci // restrictions, so move it into a separate LValue. 761bf215546Sopenharmony_ci bld.setPosition(i, false); 762bf215546Sopenharmony_ci i->op = OP_ADD; 763bf215546Sopenharmony_ci i->dnz = 0; 764bf215546Sopenharmony_ci i->setSrc(1, bld.mkMov(bld.getSSA(type), i->getSrc(0), type)->getDef(0)); 765bf215546Sopenharmony_ci i->setSrc(0, i->getSrc(2)); 766bf215546Sopenharmony_ci i->src(0).mod = i->src(2).mod; 767bf215546Sopenharmony_ci i->setSrc(2, NULL); 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci if (i->src(0).getImmediate(src0)) 770bf215546Sopenharmony_ci expr(i, src0, src1); 771bf215546Sopenharmony_ci else 772bf215546Sopenharmony_ci opnd(i, src1, 1); 773bf215546Sopenharmony_ci break; 774bf215546Sopenharmony_ci } 775bf215546Sopenharmony_ci case OP_PFETCH: 776bf215546Sopenharmony_ci // Leave PFETCH alone... we just folded its 2 args into 1. 777bf215546Sopenharmony_ci break; 778bf215546Sopenharmony_ci default: 779bf215546Sopenharmony_ci i->op = i->saturate ? OP_SAT : OP_MOV; 780bf215546Sopenharmony_ci if (i->saturate) 781bf215546Sopenharmony_ci unary(i, *i->getSrc(0)->asImm()); 782bf215546Sopenharmony_ci break; 783bf215546Sopenharmony_ci } 784bf215546Sopenharmony_ci i->subOp = 0; 785bf215546Sopenharmony_ci} 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_civoid 788bf215546Sopenharmony_ciConstantFolding::expr(Instruction *i, 789bf215546Sopenharmony_ci ImmediateValue &imm0, 790bf215546Sopenharmony_ci ImmediateValue &imm1, 791bf215546Sopenharmony_ci ImmediateValue &imm2) 792bf215546Sopenharmony_ci{ 793bf215546Sopenharmony_ci struct Storage *const a = &imm0.reg, *const b = &imm1.reg, *const c = &imm2.reg; 794bf215546Sopenharmony_ci struct Storage res; 795bf215546Sopenharmony_ci 796bf215546Sopenharmony_ci memset(&res.data, 0, sizeof(res.data)); 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci switch (i->op) { 799bf215546Sopenharmony_ci case OP_LOP3_LUT: 800bf215546Sopenharmony_ci for (int n = 0; n < 32; n++) { 801bf215546Sopenharmony_ci uint8_t lut = ((a->data.u32 >> n) & 1) << 2 | 802bf215546Sopenharmony_ci ((b->data.u32 >> n) & 1) << 1 | 803bf215546Sopenharmony_ci ((c->data.u32 >> n) & 1); 804bf215546Sopenharmony_ci res.data.u32 |= !!(i->subOp & (1 << lut)) << n; 805bf215546Sopenharmony_ci } 806bf215546Sopenharmony_ci break; 807bf215546Sopenharmony_ci case OP_PERMT: 808bf215546Sopenharmony_ci if (!i->subOp) { 809bf215546Sopenharmony_ci uint64_t input = (uint64_t)c->data.u32 << 32 | a->data.u32; 810bf215546Sopenharmony_ci uint16_t permt = b->data.u32; 811bf215546Sopenharmony_ci for (int n = 0 ; n < 4; n++, permt >>= 4) 812bf215546Sopenharmony_ci res.data.u32 |= ((input >> ((permt & 0xf) * 8)) & 0xff) << n * 8; 813bf215546Sopenharmony_ci } else 814bf215546Sopenharmony_ci return; 815bf215546Sopenharmony_ci break; 816bf215546Sopenharmony_ci case OP_INSBF: { 817bf215546Sopenharmony_ci int offset = b->data.u32 & 0xff; 818bf215546Sopenharmony_ci int width = (b->data.u32 >> 8) & 0xff; 819bf215546Sopenharmony_ci unsigned bitmask = ((1 << width) - 1) << offset; 820bf215546Sopenharmony_ci res.data.u32 = ((a->data.u32 << offset) & bitmask) | (c->data.u32 & ~bitmask); 821bf215546Sopenharmony_ci break; 822bf215546Sopenharmony_ci } 823bf215546Sopenharmony_ci case OP_MAD: 824bf215546Sopenharmony_ci case OP_FMA: { 825bf215546Sopenharmony_ci switch (i->dType) { 826bf215546Sopenharmony_ci case TYPE_F32: 827bf215546Sopenharmony_ci res.data.f32 = a->data.f32 * b->data.f32 * exp2f(i->postFactor) + 828bf215546Sopenharmony_ci c->data.f32; 829bf215546Sopenharmony_ci break; 830bf215546Sopenharmony_ci case TYPE_F64: 831bf215546Sopenharmony_ci res.data.f64 = a->data.f64 * b->data.f64 + c->data.f64; 832bf215546Sopenharmony_ci break; 833bf215546Sopenharmony_ci case TYPE_S32: 834bf215546Sopenharmony_ci if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { 835bf215546Sopenharmony_ci res.data.s32 = ((int64_t)a->data.s32 * b->data.s32 >> 32) + c->data.s32; 836bf215546Sopenharmony_ci break; 837bf215546Sopenharmony_ci } 838bf215546Sopenharmony_ci FALLTHROUGH; 839bf215546Sopenharmony_ci case TYPE_U32: 840bf215546Sopenharmony_ci if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { 841bf215546Sopenharmony_ci res.data.u32 = ((uint64_t)a->data.u32 * b->data.u32 >> 32) + c->data.u32; 842bf215546Sopenharmony_ci break; 843bf215546Sopenharmony_ci } 844bf215546Sopenharmony_ci res.data.u32 = a->data.u32 * b->data.u32 + c->data.u32; 845bf215546Sopenharmony_ci break; 846bf215546Sopenharmony_ci default: 847bf215546Sopenharmony_ci return; 848bf215546Sopenharmony_ci } 849bf215546Sopenharmony_ci break; 850bf215546Sopenharmony_ci } 851bf215546Sopenharmony_ci case OP_SHLADD: 852bf215546Sopenharmony_ci res.data.u32 = (a->data.u32 << b->data.u32) + c->data.u32; 853bf215546Sopenharmony_ci break; 854bf215546Sopenharmony_ci default: 855bf215546Sopenharmony_ci return; 856bf215546Sopenharmony_ci } 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci ++foldCount; 859bf215546Sopenharmony_ci i->src(0).mod = Modifier(0); 860bf215546Sopenharmony_ci i->src(1).mod = Modifier(0); 861bf215546Sopenharmony_ci i->src(2).mod = Modifier(0); 862bf215546Sopenharmony_ci 863bf215546Sopenharmony_ci i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.u32)); 864bf215546Sopenharmony_ci i->setSrc(1, NULL); 865bf215546Sopenharmony_ci i->setSrc(2, NULL); 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci i->getSrc(0)->reg.data = res.data; 868bf215546Sopenharmony_ci i->getSrc(0)->reg.type = i->dType; 869bf215546Sopenharmony_ci i->getSrc(0)->reg.size = typeSizeof(i->dType); 870bf215546Sopenharmony_ci 871bf215546Sopenharmony_ci i->op = OP_MOV; 872bf215546Sopenharmony_ci} 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_civoid 875bf215546Sopenharmony_ciConstantFolding::unary(Instruction *i, const ImmediateValue &imm) 876bf215546Sopenharmony_ci{ 877bf215546Sopenharmony_ci Storage res; 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci if (i->dType != TYPE_F32) 880bf215546Sopenharmony_ci return; 881bf215546Sopenharmony_ci switch (i->op) { 882bf215546Sopenharmony_ci case OP_NEG: res.data.f32 = -imm.reg.data.f32; break; 883bf215546Sopenharmony_ci case OP_ABS: res.data.f32 = fabsf(imm.reg.data.f32); break; 884bf215546Sopenharmony_ci case OP_SAT: res.data.f32 = SATURATE(imm.reg.data.f32); break; 885bf215546Sopenharmony_ci case OP_RCP: res.data.f32 = 1.0f / imm.reg.data.f32; break; 886bf215546Sopenharmony_ci case OP_RSQ: res.data.f32 = 1.0f / sqrtf(imm.reg.data.f32); break; 887bf215546Sopenharmony_ci case OP_LG2: res.data.f32 = log2f(imm.reg.data.f32); break; 888bf215546Sopenharmony_ci case OP_EX2: res.data.f32 = exp2f(imm.reg.data.f32); break; 889bf215546Sopenharmony_ci case OP_SIN: res.data.f32 = sinf(imm.reg.data.f32); break; 890bf215546Sopenharmony_ci case OP_COS: res.data.f32 = cosf(imm.reg.data.f32); break; 891bf215546Sopenharmony_ci case OP_SQRT: res.data.f32 = sqrtf(imm.reg.data.f32); break; 892bf215546Sopenharmony_ci case OP_PRESIN: 893bf215546Sopenharmony_ci case OP_PREEX2: 894bf215546Sopenharmony_ci // these should be handled in subsequent OP_SIN/COS/EX2 895bf215546Sopenharmony_ci res.data.f32 = imm.reg.data.f32; 896bf215546Sopenharmony_ci break; 897bf215546Sopenharmony_ci default: 898bf215546Sopenharmony_ci return; 899bf215546Sopenharmony_ci } 900bf215546Sopenharmony_ci i->op = OP_MOV; 901bf215546Sopenharmony_ci i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res.data.f32)); 902bf215546Sopenharmony_ci i->src(0).mod = Modifier(0); 903bf215546Sopenharmony_ci} 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_civoid 906bf215546Sopenharmony_ciConstantFolding::tryCollapseChainedMULs(Instruction *mul2, 907bf215546Sopenharmony_ci const int s, ImmediateValue& imm2) 908bf215546Sopenharmony_ci{ 909bf215546Sopenharmony_ci const int t = s ? 0 : 1; 910bf215546Sopenharmony_ci Instruction *insn; 911bf215546Sopenharmony_ci Instruction *mul1 = NULL; // mul1 before mul2 912bf215546Sopenharmony_ci int e = 0; 913bf215546Sopenharmony_ci float f = imm2.reg.data.f32 * exp2f(mul2->postFactor); 914bf215546Sopenharmony_ci ImmediateValue imm1; 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci assert(mul2->op == OP_MUL && mul2->dType == TYPE_F32); 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci if (mul2->getSrc(t)->refCount() == 1) { 919bf215546Sopenharmony_ci insn = mul2->getSrc(t)->getInsn(); 920bf215546Sopenharmony_ci if (!mul2->src(t).mod && insn->op == OP_MUL && insn->dType == TYPE_F32) 921bf215546Sopenharmony_ci mul1 = insn; 922bf215546Sopenharmony_ci if (mul1 && !mul1->saturate) { 923bf215546Sopenharmony_ci int s1; 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_ci if (mul1->src(s1 = 0).getImmediate(imm1) || 926bf215546Sopenharmony_ci mul1->src(s1 = 1).getImmediate(imm1)) { 927bf215546Sopenharmony_ci bld.setPosition(mul1, false); 928bf215546Sopenharmony_ci // a = mul r, imm1 929bf215546Sopenharmony_ci // d = mul a, imm2 -> d = mul r, (imm1 * imm2) 930bf215546Sopenharmony_ci mul1->setSrc(s1, bld.loadImm(NULL, f * imm1.reg.data.f32)); 931bf215546Sopenharmony_ci mul1->src(s1).mod = Modifier(0); 932bf215546Sopenharmony_ci mul2->def(0).replace(mul1->getDef(0), false); 933bf215546Sopenharmony_ci mul1->saturate = mul2->saturate; 934bf215546Sopenharmony_ci } else 935bf215546Sopenharmony_ci if (prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { 936bf215546Sopenharmony_ci // c = mul a, b 937bf215546Sopenharmony_ci // d = mul c, imm -> d = mul_x_imm a, b 938bf215546Sopenharmony_ci mul1->postFactor = e; 939bf215546Sopenharmony_ci mul2->def(0).replace(mul1->getDef(0), false); 940bf215546Sopenharmony_ci if (f < 0) 941bf215546Sopenharmony_ci mul1->src(0).mod *= Modifier(NV50_IR_MOD_NEG); 942bf215546Sopenharmony_ci mul1->saturate = mul2->saturate; 943bf215546Sopenharmony_ci } 944bf215546Sopenharmony_ci return; 945bf215546Sopenharmony_ci } 946bf215546Sopenharmony_ci } 947bf215546Sopenharmony_ci if (mul2->getDef(0)->refCount() == 1 && !mul2->saturate) { 948bf215546Sopenharmony_ci // b = mul a, imm 949bf215546Sopenharmony_ci // d = mul b, c -> d = mul_x_imm a, c 950bf215546Sopenharmony_ci int s2, t2; 951bf215546Sopenharmony_ci insn = (*mul2->getDef(0)->uses.begin())->getInsn(); 952bf215546Sopenharmony_ci if (!insn) 953bf215546Sopenharmony_ci return; 954bf215546Sopenharmony_ci mul1 = mul2; 955bf215546Sopenharmony_ci mul2 = NULL; 956bf215546Sopenharmony_ci s2 = insn->getSrc(0) == mul1->getDef(0) ? 0 : 1; 957bf215546Sopenharmony_ci t2 = s2 ? 0 : 1; 958bf215546Sopenharmony_ci if (insn->op == OP_MUL && insn->dType == TYPE_F32) 959bf215546Sopenharmony_ci if (!insn->src(s2).mod && !insn->src(t2).getImmediate(imm1)) 960bf215546Sopenharmony_ci mul2 = insn; 961bf215546Sopenharmony_ci if (mul2 && prog->getTarget()->isPostMultiplySupported(OP_MUL, f, e)) { 962bf215546Sopenharmony_ci mul2->postFactor = e; 963bf215546Sopenharmony_ci mul2->setSrc(s2, mul1->src(t)); 964bf215546Sopenharmony_ci if (f < 0) 965bf215546Sopenharmony_ci mul2->src(s2).mod *= Modifier(NV50_IR_MOD_NEG); 966bf215546Sopenharmony_ci } 967bf215546Sopenharmony_ci } 968bf215546Sopenharmony_ci} 969bf215546Sopenharmony_ci 970bf215546Sopenharmony_civoid 971bf215546Sopenharmony_ciConstantFolding::opnd3(Instruction *i, ImmediateValue &imm2) 972bf215546Sopenharmony_ci{ 973bf215546Sopenharmony_ci switch (i->op) { 974bf215546Sopenharmony_ci case OP_MAD: 975bf215546Sopenharmony_ci case OP_FMA: 976bf215546Sopenharmony_ci if (imm2.isInteger(0)) { 977bf215546Sopenharmony_ci i->op = OP_MUL; 978bf215546Sopenharmony_ci i->setSrc(2, NULL); 979bf215546Sopenharmony_ci foldCount++; 980bf215546Sopenharmony_ci return; 981bf215546Sopenharmony_ci } 982bf215546Sopenharmony_ci break; 983bf215546Sopenharmony_ci case OP_SHLADD: 984bf215546Sopenharmony_ci if (imm2.isInteger(0)) { 985bf215546Sopenharmony_ci i->op = OP_SHL; 986bf215546Sopenharmony_ci i->setSrc(2, NULL); 987bf215546Sopenharmony_ci foldCount++; 988bf215546Sopenharmony_ci return; 989bf215546Sopenharmony_ci } 990bf215546Sopenharmony_ci break; 991bf215546Sopenharmony_ci default: 992bf215546Sopenharmony_ci return; 993bf215546Sopenharmony_ci } 994bf215546Sopenharmony_ci} 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_cibool 997bf215546Sopenharmony_ciConstantFolding::createMul(DataType ty, Value *def, Value *a, int64_t b, Value *c) 998bf215546Sopenharmony_ci{ 999bf215546Sopenharmony_ci const Target *target = prog->getTarget(); 1000bf215546Sopenharmony_ci int64_t absB = llabs(b); 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci //a * (2^shl) -> a << shl 1003bf215546Sopenharmony_ci if (b >= 0 && util_is_power_of_two_or_zero64(b)) { 1004bf215546Sopenharmony_ci int shl = util_logbase2_64(b); 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_ci Value *res = c ? bld.getSSA(typeSizeof(ty)) : def; 1007bf215546Sopenharmony_ci bld.mkOp2(OP_SHL, ty, res, a, bld.mkImm(shl)); 1008bf215546Sopenharmony_ci if (c) 1009bf215546Sopenharmony_ci bld.mkOp2(OP_ADD, ty, def, res, c); 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_ci return true; 1012bf215546Sopenharmony_ci } 1013bf215546Sopenharmony_ci 1014bf215546Sopenharmony_ci //a * (2^shl + 1) -> a << shl + a 1015bf215546Sopenharmony_ci //a * -(2^shl + 1) -> -a << shl + a 1016bf215546Sopenharmony_ci //a * (2^shl - 1) -> a << shl - a 1017bf215546Sopenharmony_ci //a * -(2^shl - 1) -> -a << shl - a 1018bf215546Sopenharmony_ci if (typeSizeof(ty) == 4 && 1019bf215546Sopenharmony_ci (util_is_power_of_two_or_zero64(absB - 1) || 1020bf215546Sopenharmony_ci util_is_power_of_two_or_zero64(absB + 1)) && 1021bf215546Sopenharmony_ci target->isOpSupported(OP_SHLADD, TYPE_U32)) { 1022bf215546Sopenharmony_ci bool subA = util_is_power_of_two_or_zero64(absB + 1); 1023bf215546Sopenharmony_ci int shl = subA ? util_logbase2_64(absB + 1) : util_logbase2_64(absB - 1); 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci Value *res = c ? bld.getSSA() : def; 1026bf215546Sopenharmony_ci Instruction *insn = bld.mkOp3(OP_SHLADD, TYPE_U32, res, a, bld.mkImm(shl), a); 1027bf215546Sopenharmony_ci if (b < 0) 1028bf215546Sopenharmony_ci insn->src(0).mod = Modifier(NV50_IR_MOD_NEG); 1029bf215546Sopenharmony_ci if (subA) 1030bf215546Sopenharmony_ci insn->src(2).mod = Modifier(NV50_IR_MOD_NEG); 1031bf215546Sopenharmony_ci 1032bf215546Sopenharmony_ci if (c) 1033bf215546Sopenharmony_ci bld.mkOp2(OP_ADD, TYPE_U32, def, res, c); 1034bf215546Sopenharmony_ci 1035bf215546Sopenharmony_ci return true; 1036bf215546Sopenharmony_ci } 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_ci if (typeSizeof(ty) == 4 && b >= 0 && b <= 0xffff && 1039bf215546Sopenharmony_ci target->isOpSupported(OP_XMAD, TYPE_U32)) { 1040bf215546Sopenharmony_ci Value *tmp = bld.mkOp3v(OP_XMAD, TYPE_U32, bld.getSSA(), 1041bf215546Sopenharmony_ci a, bld.mkImm((uint32_t)b), c ? c : bld.mkImm(0)); 1042bf215546Sopenharmony_ci bld.mkOp3(OP_XMAD, TYPE_U32, def, a, bld.mkImm((uint32_t)b), tmp)->subOp = 1043bf215546Sopenharmony_ci NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_H1(0); 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_ci return true; 1046bf215546Sopenharmony_ci } 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci return false; 1049bf215546Sopenharmony_ci} 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_cibool 1052bf215546Sopenharmony_ciConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s) 1053bf215546Sopenharmony_ci{ 1054bf215546Sopenharmony_ci const int t = !s; 1055bf215546Sopenharmony_ci const operation op = i->op; 1056bf215546Sopenharmony_ci Instruction *newi = i; 1057bf215546Sopenharmony_ci bool deleted = false; 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci switch (i->op) { 1060bf215546Sopenharmony_ci case OP_SPLIT: { 1061bf215546Sopenharmony_ci bld.setPosition(i, false); 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_ci uint8_t size = i->getDef(0)->reg.size; 1064bf215546Sopenharmony_ci uint8_t bitsize = size * 8; 1065bf215546Sopenharmony_ci uint32_t mask = (1ULL << bitsize) - 1; 1066bf215546Sopenharmony_ci assert(bitsize <= 32); 1067bf215546Sopenharmony_ci 1068bf215546Sopenharmony_ci uint64_t val = imm0.reg.data.u64; 1069bf215546Sopenharmony_ci for (int8_t d = 0; i->defExists(d); ++d) { 1070bf215546Sopenharmony_ci Value *def = i->getDef(d); 1071bf215546Sopenharmony_ci assert(def->reg.size == size); 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci newi = bld.mkMov(def, bld.mkImm((uint32_t)(val & mask)), TYPE_U32); 1074bf215546Sopenharmony_ci val >>= bitsize; 1075bf215546Sopenharmony_ci } 1076bf215546Sopenharmony_ci delete_Instruction(prog, i); 1077bf215546Sopenharmony_ci deleted = true; 1078bf215546Sopenharmony_ci break; 1079bf215546Sopenharmony_ci } 1080bf215546Sopenharmony_ci case OP_MUL: 1081bf215546Sopenharmony_ci if (i->dType == TYPE_F32 && !i->precise) 1082bf215546Sopenharmony_ci tryCollapseChainedMULs(i, s, imm0); 1083bf215546Sopenharmony_ci 1084bf215546Sopenharmony_ci if (i->subOp == NV50_IR_SUBOP_MUL_HIGH) { 1085bf215546Sopenharmony_ci assert(!isFloatType(i->sType)); 1086bf215546Sopenharmony_ci if (imm0.isInteger(1) && i->dType == TYPE_S32) { 1087bf215546Sopenharmony_ci bld.setPosition(i, false); 1088bf215546Sopenharmony_ci // Need to set to the sign value, which is a compare. 1089bf215546Sopenharmony_ci newi = bld.mkCmp(OP_SET, CC_LT, TYPE_S32, i->getDef(0), 1090bf215546Sopenharmony_ci TYPE_S32, i->getSrc(t), bld.mkImm(0)); 1091bf215546Sopenharmony_ci delete_Instruction(prog, i); 1092bf215546Sopenharmony_ci deleted = true; 1093bf215546Sopenharmony_ci } else if (imm0.isInteger(0) || imm0.isInteger(1)) { 1094bf215546Sopenharmony_ci // The high bits can't be set in this case (either mul by 0 or 1095bf215546Sopenharmony_ci // unsigned by 1) 1096bf215546Sopenharmony_ci i->op = OP_MOV; 1097bf215546Sopenharmony_ci i->subOp = 0; 1098bf215546Sopenharmony_ci i->setSrc(0, new_ImmediateValue(prog, 0u)); 1099bf215546Sopenharmony_ci i->src(0).mod = Modifier(0); 1100bf215546Sopenharmony_ci i->setSrc(1, NULL); 1101bf215546Sopenharmony_ci } else if (!imm0.isNegative() && imm0.isPow2()) { 1102bf215546Sopenharmony_ci // Translate into a shift 1103bf215546Sopenharmony_ci imm0.applyLog2(); 1104bf215546Sopenharmony_ci i->op = OP_SHR; 1105bf215546Sopenharmony_ci i->subOp = 0; 1106bf215546Sopenharmony_ci imm0.reg.data.u32 = 32 - imm0.reg.data.u32; 1107bf215546Sopenharmony_ci i->setSrc(0, i->getSrc(t)); 1108bf215546Sopenharmony_ci i->src(0).mod = i->src(t).mod; 1109bf215546Sopenharmony_ci i->setSrc(1, new_ImmediateValue(prog, imm0.reg.data.u32)); 1110bf215546Sopenharmony_ci i->src(1).mod = 0; 1111bf215546Sopenharmony_ci } 1112bf215546Sopenharmony_ci } else 1113bf215546Sopenharmony_ci if (imm0.isInteger(0)) { 1114bf215546Sopenharmony_ci i->dnz = 0; 1115bf215546Sopenharmony_ci i->op = OP_MOV; 1116bf215546Sopenharmony_ci i->setSrc(0, new_ImmediateValue(prog, 0u)); 1117bf215546Sopenharmony_ci i->src(0).mod = Modifier(0); 1118bf215546Sopenharmony_ci i->postFactor = 0; 1119bf215546Sopenharmony_ci i->setSrc(1, NULL); 1120bf215546Sopenharmony_ci } else 1121bf215546Sopenharmony_ci if (!i->postFactor && (imm0.isInteger(1) || imm0.isInteger(-1))) { 1122bf215546Sopenharmony_ci if (imm0.isNegative()) 1123bf215546Sopenharmony_ci i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); 1124bf215546Sopenharmony_ci i->dnz = 0; 1125bf215546Sopenharmony_ci i->op = i->src(t).mod.getOp(); 1126bf215546Sopenharmony_ci if (s == 0) { 1127bf215546Sopenharmony_ci i->setSrc(0, i->getSrc(1)); 1128bf215546Sopenharmony_ci i->src(0).mod = i->src(1).mod; 1129bf215546Sopenharmony_ci i->src(1).mod = 0; 1130bf215546Sopenharmony_ci } 1131bf215546Sopenharmony_ci if (i->op != OP_CVT) 1132bf215546Sopenharmony_ci i->src(0).mod = 0; 1133bf215546Sopenharmony_ci i->setSrc(1, NULL); 1134bf215546Sopenharmony_ci } else 1135bf215546Sopenharmony_ci if (!i->postFactor && (imm0.isInteger(2) || imm0.isInteger(-2))) { 1136bf215546Sopenharmony_ci if (imm0.isNegative()) 1137bf215546Sopenharmony_ci i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); 1138bf215546Sopenharmony_ci i->op = OP_ADD; 1139bf215546Sopenharmony_ci i->dnz = 0; 1140bf215546Sopenharmony_ci i->setSrc(s, i->getSrc(t)); 1141bf215546Sopenharmony_ci i->src(s).mod = i->src(t).mod; 1142bf215546Sopenharmony_ci } else 1143bf215546Sopenharmony_ci if (!isFloatType(i->dType) && !i->src(t).mod) { 1144bf215546Sopenharmony_ci bld.setPosition(i, false); 1145bf215546Sopenharmony_ci int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32; 1146bf215546Sopenharmony_ci if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, NULL)) { 1147bf215546Sopenharmony_ci delete_Instruction(prog, i); 1148bf215546Sopenharmony_ci deleted = true; 1149bf215546Sopenharmony_ci } 1150bf215546Sopenharmony_ci } else 1151bf215546Sopenharmony_ci if (i->postFactor && i->sType == TYPE_F32) { 1152bf215546Sopenharmony_ci /* Can't emit a postfactor with an immediate, have to fold it in */ 1153bf215546Sopenharmony_ci i->setSrc(s, new_ImmediateValue( 1154bf215546Sopenharmony_ci prog, imm0.reg.data.f32 * exp2f(i->postFactor))); 1155bf215546Sopenharmony_ci i->postFactor = 0; 1156bf215546Sopenharmony_ci } 1157bf215546Sopenharmony_ci break; 1158bf215546Sopenharmony_ci case OP_FMA: 1159bf215546Sopenharmony_ci case OP_MAD: 1160bf215546Sopenharmony_ci if (imm0.isInteger(0)) { 1161bf215546Sopenharmony_ci i->setSrc(0, i->getSrc(2)); 1162bf215546Sopenharmony_ci i->src(0).mod = i->src(2).mod; 1163bf215546Sopenharmony_ci i->setSrc(1, NULL); 1164bf215546Sopenharmony_ci i->setSrc(2, NULL); 1165bf215546Sopenharmony_ci i->dnz = 0; 1166bf215546Sopenharmony_ci i->op = i->src(0).mod.getOp(); 1167bf215546Sopenharmony_ci if (i->op != OP_CVT) 1168bf215546Sopenharmony_ci i->src(0).mod = 0; 1169bf215546Sopenharmony_ci } else 1170bf215546Sopenharmony_ci if (i->subOp != NV50_IR_SUBOP_MUL_HIGH && 1171bf215546Sopenharmony_ci (imm0.isInteger(1) || imm0.isInteger(-1))) { 1172bf215546Sopenharmony_ci if (imm0.isNegative()) 1173bf215546Sopenharmony_ci i->src(t).mod = i->src(t).mod ^ Modifier(NV50_IR_MOD_NEG); 1174bf215546Sopenharmony_ci if (s == 0) { 1175bf215546Sopenharmony_ci i->setSrc(0, i->getSrc(1)); 1176bf215546Sopenharmony_ci i->src(0).mod = i->src(1).mod; 1177bf215546Sopenharmony_ci } 1178bf215546Sopenharmony_ci i->setSrc(1, i->getSrc(2)); 1179bf215546Sopenharmony_ci i->src(1).mod = i->src(2).mod; 1180bf215546Sopenharmony_ci i->setSrc(2, NULL); 1181bf215546Sopenharmony_ci i->dnz = 0; 1182bf215546Sopenharmony_ci i->op = OP_ADD; 1183bf215546Sopenharmony_ci } else 1184bf215546Sopenharmony_ci if (!isFloatType(i->dType) && !i->subOp && !i->src(t).mod && !i->src(2).mod) { 1185bf215546Sopenharmony_ci bld.setPosition(i, false); 1186bf215546Sopenharmony_ci int64_t b = typeSizeof(i->dType) == 8 ? imm0.reg.data.s64 : imm0.reg.data.s32; 1187bf215546Sopenharmony_ci if (createMul(i->dType, i->getDef(0), i->getSrc(t), b, i->getSrc(2))) { 1188bf215546Sopenharmony_ci delete_Instruction(prog, i); 1189bf215546Sopenharmony_ci deleted = true; 1190bf215546Sopenharmony_ci } 1191bf215546Sopenharmony_ci } 1192bf215546Sopenharmony_ci break; 1193bf215546Sopenharmony_ci case OP_SUB: 1194bf215546Sopenharmony_ci if (imm0.isInteger(0) && s == 0 && typeSizeof(i->dType) == 8 && 1195bf215546Sopenharmony_ci !isFloatType(i->dType)) 1196bf215546Sopenharmony_ci break; 1197bf215546Sopenharmony_ci FALLTHROUGH; 1198bf215546Sopenharmony_ci case OP_ADD: 1199bf215546Sopenharmony_ci if (i->usesFlags()) 1200bf215546Sopenharmony_ci break; 1201bf215546Sopenharmony_ci if (imm0.isInteger(0)) { 1202bf215546Sopenharmony_ci if (s == 0) { 1203bf215546Sopenharmony_ci i->setSrc(0, i->getSrc(1)); 1204bf215546Sopenharmony_ci i->src(0).mod = i->src(1).mod; 1205bf215546Sopenharmony_ci if (i->op == OP_SUB) 1206bf215546Sopenharmony_ci i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG); 1207bf215546Sopenharmony_ci } 1208bf215546Sopenharmony_ci i->setSrc(1, NULL); 1209bf215546Sopenharmony_ci i->op = i->src(0).mod.getOp(); 1210bf215546Sopenharmony_ci if (i->op != OP_CVT) 1211bf215546Sopenharmony_ci i->src(0).mod = Modifier(0); 1212bf215546Sopenharmony_ci } 1213bf215546Sopenharmony_ci break; 1214bf215546Sopenharmony_ci 1215bf215546Sopenharmony_ci case OP_DIV: 1216bf215546Sopenharmony_ci if (s != 1 || (i->dType != TYPE_S32 && i->dType != TYPE_U32)) 1217bf215546Sopenharmony_ci break; 1218bf215546Sopenharmony_ci bld.setPosition(i, false); 1219bf215546Sopenharmony_ci if (imm0.reg.data.u32 == 0) { 1220bf215546Sopenharmony_ci break; 1221bf215546Sopenharmony_ci } else 1222bf215546Sopenharmony_ci if (imm0.reg.data.u32 == 1) { 1223bf215546Sopenharmony_ci i->op = OP_MOV; 1224bf215546Sopenharmony_ci i->setSrc(1, NULL); 1225bf215546Sopenharmony_ci } else 1226bf215546Sopenharmony_ci if (i->dType == TYPE_U32 && imm0.isPow2()) { 1227bf215546Sopenharmony_ci i->op = OP_SHR; 1228bf215546Sopenharmony_ci i->setSrc(1, bld.mkImm(util_logbase2(imm0.reg.data.u32))); 1229bf215546Sopenharmony_ci } else 1230bf215546Sopenharmony_ci if (i->dType == TYPE_U32) { 1231bf215546Sopenharmony_ci Instruction *mul; 1232bf215546Sopenharmony_ci Value *tA, *tB; 1233bf215546Sopenharmony_ci const uint32_t d = imm0.reg.data.u32; 1234bf215546Sopenharmony_ci uint32_t m; 1235bf215546Sopenharmony_ci int r, s; 1236bf215546Sopenharmony_ci uint32_t l = util_logbase2(d); 1237bf215546Sopenharmony_ci if (((uint32_t)1 << l) < d) 1238bf215546Sopenharmony_ci ++l; 1239bf215546Sopenharmony_ci m = (((uint64_t)1 << 32) * (((uint64_t)1 << l) - d)) / d + 1; 1240bf215546Sopenharmony_ci r = l ? 1 : 0; 1241bf215546Sopenharmony_ci s = l ? (l - 1) : 0; 1242bf215546Sopenharmony_ci 1243bf215546Sopenharmony_ci tA = bld.getSSA(); 1244bf215546Sopenharmony_ci tB = bld.getSSA(); 1245bf215546Sopenharmony_ci mul = bld.mkOp2(OP_MUL, TYPE_U32, tA, i->getSrc(0), 1246bf215546Sopenharmony_ci bld.loadImm(NULL, m)); 1247bf215546Sopenharmony_ci mul->subOp = NV50_IR_SUBOP_MUL_HIGH; 1248bf215546Sopenharmony_ci bld.mkOp2(OP_SUB, TYPE_U32, tB, i->getSrc(0), tA); 1249bf215546Sopenharmony_ci tA = bld.getSSA(); 1250bf215546Sopenharmony_ci if (r) 1251bf215546Sopenharmony_ci bld.mkOp2(OP_SHR, TYPE_U32, tA, tB, bld.mkImm(r)); 1252bf215546Sopenharmony_ci else 1253bf215546Sopenharmony_ci tA = tB; 1254bf215546Sopenharmony_ci tB = s ? bld.getSSA() : i->getDef(0); 1255bf215546Sopenharmony_ci newi = bld.mkOp2(OP_ADD, TYPE_U32, tB, mul->getDef(0), tA); 1256bf215546Sopenharmony_ci if (s) 1257bf215546Sopenharmony_ci bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), tB, bld.mkImm(s)); 1258bf215546Sopenharmony_ci 1259bf215546Sopenharmony_ci delete_Instruction(prog, i); 1260bf215546Sopenharmony_ci deleted = true; 1261bf215546Sopenharmony_ci } else 1262bf215546Sopenharmony_ci if (imm0.reg.data.s32 == -1) { 1263bf215546Sopenharmony_ci i->op = OP_NEG; 1264bf215546Sopenharmony_ci i->setSrc(1, NULL); 1265bf215546Sopenharmony_ci } else { 1266bf215546Sopenharmony_ci LValue *tA, *tB; 1267bf215546Sopenharmony_ci LValue *tD; 1268bf215546Sopenharmony_ci const int32_t d = imm0.reg.data.s32; 1269bf215546Sopenharmony_ci int32_t m; 1270bf215546Sopenharmony_ci int32_t l = util_logbase2(static_cast<unsigned>(abs(d))); 1271bf215546Sopenharmony_ci if ((1 << l) < abs(d)) 1272bf215546Sopenharmony_ci ++l; 1273bf215546Sopenharmony_ci if (!l) 1274bf215546Sopenharmony_ci l = 1; 1275bf215546Sopenharmony_ci m = ((uint64_t)1 << (32 + l - 1)) / abs(d) + 1 - ((uint64_t)1 << 32); 1276bf215546Sopenharmony_ci 1277bf215546Sopenharmony_ci tA = bld.getSSA(); 1278bf215546Sopenharmony_ci tB = bld.getSSA(); 1279bf215546Sopenharmony_ci bld.mkOp3(OP_MAD, TYPE_S32, tA, i->getSrc(0), bld.loadImm(NULL, m), 1280bf215546Sopenharmony_ci i->getSrc(0))->subOp = NV50_IR_SUBOP_MUL_HIGH; 1281bf215546Sopenharmony_ci if (l > 1) 1282bf215546Sopenharmony_ci bld.mkOp2(OP_SHR, TYPE_S32, tB, tA, bld.mkImm(l - 1)); 1283bf215546Sopenharmony_ci else 1284bf215546Sopenharmony_ci tB = tA; 1285bf215546Sopenharmony_ci tA = bld.getSSA(); 1286bf215546Sopenharmony_ci bld.mkCmp(OP_SET, CC_LT, TYPE_S32, tA, TYPE_S32, i->getSrc(0), bld.mkImm(0)); 1287bf215546Sopenharmony_ci tD = (d < 0) ? bld.getSSA() : i->getDef(0)->asLValue(); 1288bf215546Sopenharmony_ci newi = bld.mkOp2(OP_SUB, TYPE_U32, tD, tB, tA); 1289bf215546Sopenharmony_ci if (d < 0) 1290bf215546Sopenharmony_ci bld.mkOp1(OP_NEG, TYPE_S32, i->getDef(0), tB); 1291bf215546Sopenharmony_ci 1292bf215546Sopenharmony_ci delete_Instruction(prog, i); 1293bf215546Sopenharmony_ci deleted = true; 1294bf215546Sopenharmony_ci } 1295bf215546Sopenharmony_ci break; 1296bf215546Sopenharmony_ci 1297bf215546Sopenharmony_ci case OP_MOD: 1298bf215546Sopenharmony_ci if (s == 1 && imm0.isPow2()) { 1299bf215546Sopenharmony_ci bld.setPosition(i, false); 1300bf215546Sopenharmony_ci if (i->sType == TYPE_U32) { 1301bf215546Sopenharmony_ci i->op = OP_AND; 1302bf215546Sopenharmony_ci i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 - 1)); 1303bf215546Sopenharmony_ci } else if (i->sType == TYPE_S32) { 1304bf215546Sopenharmony_ci // Do it on the absolute value of the input, and then restore the 1305bf215546Sopenharmony_ci // sign. The only odd case is MIN_INT, but that should work out 1306bf215546Sopenharmony_ci // as well, since MIN_INT mod any power of 2 is 0. 1307bf215546Sopenharmony_ci // 1308bf215546Sopenharmony_ci // Technically we don't have to do any of this since MOD is 1309bf215546Sopenharmony_ci // undefined with negative arguments in GLSL, but this seems like 1310bf215546Sopenharmony_ci // the nice thing to do. 1311bf215546Sopenharmony_ci Value *abs = bld.mkOp1v(OP_ABS, TYPE_S32, bld.getSSA(), i->getSrc(0)); 1312bf215546Sopenharmony_ci Value *neg, *v1, *v2; 1313bf215546Sopenharmony_ci bld.mkCmp(OP_SET, CC_LT, TYPE_S32, 1314bf215546Sopenharmony_ci (neg = bld.getSSA(1, prog->getTarget()->nativeFile(FILE_PREDICATE))), 1315bf215546Sopenharmony_ci TYPE_S32, i->getSrc(0), bld.loadImm(NULL, 0)); 1316bf215546Sopenharmony_ci Value *mod = bld.mkOp2v(OP_AND, TYPE_U32, bld.getSSA(), abs, 1317bf215546Sopenharmony_ci bld.loadImm(NULL, imm0.reg.data.u32 - 1)); 1318bf215546Sopenharmony_ci bld.mkOp1(OP_NEG, TYPE_S32, (v1 = bld.getSSA()), mod) 1319bf215546Sopenharmony_ci ->setPredicate(CC_P, neg); 1320bf215546Sopenharmony_ci bld.mkOp1(OP_MOV, TYPE_S32, (v2 = bld.getSSA()), mod) 1321bf215546Sopenharmony_ci ->setPredicate(CC_NOT_P, neg); 1322bf215546Sopenharmony_ci newi = bld.mkOp2(OP_UNION, TYPE_S32, i->getDef(0), v1, v2); 1323bf215546Sopenharmony_ci 1324bf215546Sopenharmony_ci delete_Instruction(prog, i); 1325bf215546Sopenharmony_ci deleted = true; 1326bf215546Sopenharmony_ci } 1327bf215546Sopenharmony_ci } else if (s == 1) { 1328bf215546Sopenharmony_ci // In this case, we still want the optimized lowering that we get 1329bf215546Sopenharmony_ci // from having division by an immediate. 1330bf215546Sopenharmony_ci // 1331bf215546Sopenharmony_ci // a % b == a - (a/b) * b 1332bf215546Sopenharmony_ci bld.setPosition(i, false); 1333bf215546Sopenharmony_ci Value *div = bld.mkOp2v(OP_DIV, i->sType, bld.getSSA(), 1334bf215546Sopenharmony_ci i->getSrc(0), i->getSrc(1)); 1335bf215546Sopenharmony_ci newi = bld.mkOp2(OP_ADD, i->sType, i->getDef(0), i->getSrc(0), 1336bf215546Sopenharmony_ci bld.mkOp2v(OP_MUL, i->sType, bld.getSSA(), div, i->getSrc(1))); 1337bf215546Sopenharmony_ci // TODO: Check that target supports this. In this case, we know that 1338bf215546Sopenharmony_ci // all backends do. 1339bf215546Sopenharmony_ci newi->src(1).mod = Modifier(NV50_IR_MOD_NEG); 1340bf215546Sopenharmony_ci 1341bf215546Sopenharmony_ci delete_Instruction(prog, i); 1342bf215546Sopenharmony_ci deleted = true; 1343bf215546Sopenharmony_ci } 1344bf215546Sopenharmony_ci break; 1345bf215546Sopenharmony_ci 1346bf215546Sopenharmony_ci case OP_SET: // TODO: SET_AND,OR,XOR 1347bf215546Sopenharmony_ci { 1348bf215546Sopenharmony_ci /* This optimizes the case where the output of a set is being compared 1349bf215546Sopenharmony_ci * to zero. Since the set can only produce 0/-1 (int) or 0/1 (float), we 1350bf215546Sopenharmony_ci * can be a lot cleverer in our comparison. 1351bf215546Sopenharmony_ci */ 1352bf215546Sopenharmony_ci CmpInstruction *si = findOriginForTestWithZero(i->getSrc(t)); 1353bf215546Sopenharmony_ci CondCode cc, ccZ; 1354bf215546Sopenharmony_ci if (imm0.reg.data.u32 != 0 || !si) 1355bf215546Sopenharmony_ci return false; 1356bf215546Sopenharmony_ci cc = si->setCond; 1357bf215546Sopenharmony_ci ccZ = (CondCode)((unsigned int)i->asCmp()->setCond & ~CC_U); 1358bf215546Sopenharmony_ci // We do everything assuming var (cmp) 0, reverse the condition if 0 is 1359bf215546Sopenharmony_ci // first. 1360bf215546Sopenharmony_ci if (s == 0) 1361bf215546Sopenharmony_ci ccZ = reverseCondCode(ccZ); 1362bf215546Sopenharmony_ci // If there is a negative modifier, we need to undo that, by flipping 1363bf215546Sopenharmony_ci // the comparison to zero. 1364bf215546Sopenharmony_ci if (i->src(t).mod.neg()) 1365bf215546Sopenharmony_ci ccZ = reverseCondCode(ccZ); 1366bf215546Sopenharmony_ci // If this is a signed comparison, we expect the input to be a regular 1367bf215546Sopenharmony_ci // boolean, i.e. 0/-1. However the rest of the logic assumes that true 1368bf215546Sopenharmony_ci // is positive, so just flip the sign. 1369bf215546Sopenharmony_ci if (i->sType == TYPE_S32) { 1370bf215546Sopenharmony_ci assert(!isFloatType(si->dType)); 1371bf215546Sopenharmony_ci ccZ = reverseCondCode(ccZ); 1372bf215546Sopenharmony_ci } 1373bf215546Sopenharmony_ci switch (ccZ) { 1374bf215546Sopenharmony_ci case CC_LT: cc = CC_FL; break; // bool < 0 -- this is never true 1375bf215546Sopenharmony_ci case CC_GE: cc = CC_TR; break; // bool >= 0 -- this is always true 1376bf215546Sopenharmony_ci case CC_EQ: cc = inverseCondCode(cc); break; // bool == 0 -- !bool 1377bf215546Sopenharmony_ci case CC_LE: cc = inverseCondCode(cc); break; // bool <= 0 -- !bool 1378bf215546Sopenharmony_ci case CC_GT: break; // bool > 0 -- bool 1379bf215546Sopenharmony_ci case CC_NE: break; // bool != 0 -- bool 1380bf215546Sopenharmony_ci default: 1381bf215546Sopenharmony_ci return false; 1382bf215546Sopenharmony_ci } 1383bf215546Sopenharmony_ci 1384bf215546Sopenharmony_ci // Update the condition of this SET to be identical to the origin set, 1385bf215546Sopenharmony_ci // but with the updated condition code. The original SET should get 1386bf215546Sopenharmony_ci // DCE'd, ideally. 1387bf215546Sopenharmony_ci i->op = si->op; 1388bf215546Sopenharmony_ci i->asCmp()->setCond = cc; 1389bf215546Sopenharmony_ci i->setSrc(0, si->src(0)); 1390bf215546Sopenharmony_ci i->setSrc(1, si->src(1)); 1391bf215546Sopenharmony_ci if (si->srcExists(2)) 1392bf215546Sopenharmony_ci i->setSrc(2, si->src(2)); 1393bf215546Sopenharmony_ci i->sType = si->sType; 1394bf215546Sopenharmony_ci } 1395bf215546Sopenharmony_ci break; 1396bf215546Sopenharmony_ci 1397bf215546Sopenharmony_ci case OP_AND: 1398bf215546Sopenharmony_ci { 1399bf215546Sopenharmony_ci Instruction *src = i->getSrc(t)->getInsn(); 1400bf215546Sopenharmony_ci ImmediateValue imm1; 1401bf215546Sopenharmony_ci if (imm0.reg.data.u32 == 0) { 1402bf215546Sopenharmony_ci i->op = OP_MOV; 1403bf215546Sopenharmony_ci i->setSrc(0, new_ImmediateValue(prog, 0u)); 1404bf215546Sopenharmony_ci i->src(0).mod = Modifier(0); 1405bf215546Sopenharmony_ci i->setSrc(1, NULL); 1406bf215546Sopenharmony_ci } else if (imm0.reg.data.u32 == ~0U) { 1407bf215546Sopenharmony_ci i->op = i->src(t).mod.getOp(); 1408bf215546Sopenharmony_ci if (t) { 1409bf215546Sopenharmony_ci i->setSrc(0, i->getSrc(t)); 1410bf215546Sopenharmony_ci i->src(0).mod = i->src(t).mod; 1411bf215546Sopenharmony_ci } 1412bf215546Sopenharmony_ci i->setSrc(1, NULL); 1413bf215546Sopenharmony_ci } else if (src->asCmp()) { 1414bf215546Sopenharmony_ci CmpInstruction *cmp = src->asCmp(); 1415bf215546Sopenharmony_ci if (!cmp || cmp->op == OP_SLCT || cmp->getDef(0)->refCount() > 1) 1416bf215546Sopenharmony_ci return false; 1417bf215546Sopenharmony_ci if (!prog->getTarget()->isOpSupported(cmp->op, TYPE_F32)) 1418bf215546Sopenharmony_ci return false; 1419bf215546Sopenharmony_ci if (imm0.reg.data.f32 != 1.0) 1420bf215546Sopenharmony_ci return false; 1421bf215546Sopenharmony_ci if (cmp->dType != TYPE_U32) 1422bf215546Sopenharmony_ci return false; 1423bf215546Sopenharmony_ci 1424bf215546Sopenharmony_ci cmp->dType = TYPE_F32; 1425bf215546Sopenharmony_ci if (i->src(t).mod != Modifier(0)) { 1426bf215546Sopenharmony_ci assert(i->src(t).mod == Modifier(NV50_IR_MOD_NOT)); 1427bf215546Sopenharmony_ci i->src(t).mod = Modifier(0); 1428bf215546Sopenharmony_ci cmp->setCond = inverseCondCode(cmp->setCond); 1429bf215546Sopenharmony_ci } 1430bf215546Sopenharmony_ci i->op = OP_MOV; 1431bf215546Sopenharmony_ci i->setSrc(s, NULL); 1432bf215546Sopenharmony_ci if (t) { 1433bf215546Sopenharmony_ci i->setSrc(0, i->getSrc(t)); 1434bf215546Sopenharmony_ci i->setSrc(t, NULL); 1435bf215546Sopenharmony_ci } 1436bf215546Sopenharmony_ci } else if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32) && 1437bf215546Sopenharmony_ci src->op == OP_SHR && 1438bf215546Sopenharmony_ci src->src(1).getImmediate(imm1) && 1439bf215546Sopenharmony_ci i->src(t).mod == Modifier(0) && 1440bf215546Sopenharmony_ci util_is_power_of_two_or_zero(imm0.reg.data.u32 + 1)) { 1441bf215546Sopenharmony_ci // low byte = offset, high byte = width 1442bf215546Sopenharmony_ci uint32_t ext = (util_last_bit(imm0.reg.data.u32) << 8) | imm1.reg.data.u32; 1443bf215546Sopenharmony_ci i->op = OP_EXTBF; 1444bf215546Sopenharmony_ci i->setSrc(0, src->getSrc(0)); 1445bf215546Sopenharmony_ci i->setSrc(1, new_ImmediateValue(prog, ext)); 1446bf215546Sopenharmony_ci } else if (src->op == OP_SHL && 1447bf215546Sopenharmony_ci src->src(1).getImmediate(imm1) && 1448bf215546Sopenharmony_ci i->src(t).mod == Modifier(0) && 1449bf215546Sopenharmony_ci util_is_power_of_two_or_zero(~imm0.reg.data.u32 + 1) && 1450bf215546Sopenharmony_ci util_last_bit(~imm0.reg.data.u32) <= imm1.reg.data.u32) { 1451bf215546Sopenharmony_ci i->op = OP_MOV; 1452bf215546Sopenharmony_ci i->setSrc(s, NULL); 1453bf215546Sopenharmony_ci if (t) { 1454bf215546Sopenharmony_ci i->setSrc(0, i->getSrc(t)); 1455bf215546Sopenharmony_ci i->setSrc(t, NULL); 1456bf215546Sopenharmony_ci } 1457bf215546Sopenharmony_ci } 1458bf215546Sopenharmony_ci } 1459bf215546Sopenharmony_ci break; 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_ci case OP_SHL: 1462bf215546Sopenharmony_ci { 1463bf215546Sopenharmony_ci if (s != 1 || i->src(0).mod != Modifier(0)) 1464bf215546Sopenharmony_ci break; 1465bf215546Sopenharmony_ci 1466bf215546Sopenharmony_ci if (imm0.reg.data.u32 == 0) { 1467bf215546Sopenharmony_ci i->op = OP_MOV; 1468bf215546Sopenharmony_ci i->setSrc(1, NULL); 1469bf215546Sopenharmony_ci break; 1470bf215546Sopenharmony_ci } 1471bf215546Sopenharmony_ci // try to concatenate shifts 1472bf215546Sopenharmony_ci Instruction *si = i->getSrc(0)->getInsn(); 1473bf215546Sopenharmony_ci if (!si) 1474bf215546Sopenharmony_ci break; 1475bf215546Sopenharmony_ci ImmediateValue imm1; 1476bf215546Sopenharmony_ci switch (si->op) { 1477bf215546Sopenharmony_ci case OP_SHL: 1478bf215546Sopenharmony_ci if (si->src(1).getImmediate(imm1)) { 1479bf215546Sopenharmony_ci bld.setPosition(i, false); 1480bf215546Sopenharmony_ci i->setSrc(0, si->getSrc(0)); 1481bf215546Sopenharmony_ci i->setSrc(1, bld.loadImm(NULL, imm0.reg.data.u32 + imm1.reg.data.u32)); 1482bf215546Sopenharmony_ci } 1483bf215546Sopenharmony_ci break; 1484bf215546Sopenharmony_ci case OP_SHR: 1485bf215546Sopenharmony_ci if (si->src(1).getImmediate(imm1) && imm0.reg.data.u32 == imm1.reg.data.u32) { 1486bf215546Sopenharmony_ci bld.setPosition(i, false); 1487bf215546Sopenharmony_ci i->op = OP_AND; 1488bf215546Sopenharmony_ci i->setSrc(0, si->getSrc(0)); 1489bf215546Sopenharmony_ci i->setSrc(1, bld.loadImm(NULL, ~((1 << imm0.reg.data.u32) - 1))); 1490bf215546Sopenharmony_ci } 1491bf215546Sopenharmony_ci break; 1492bf215546Sopenharmony_ci case OP_MUL: 1493bf215546Sopenharmony_ci int muls; 1494bf215546Sopenharmony_ci if (isFloatType(si->dType)) 1495bf215546Sopenharmony_ci return false; 1496bf215546Sopenharmony_ci if (si->subOp) 1497bf215546Sopenharmony_ci return false; 1498bf215546Sopenharmony_ci if (si->src(1).getImmediate(imm1)) 1499bf215546Sopenharmony_ci muls = 1; 1500bf215546Sopenharmony_ci else if (si->src(0).getImmediate(imm1)) 1501bf215546Sopenharmony_ci muls = 0; 1502bf215546Sopenharmony_ci else 1503bf215546Sopenharmony_ci return false; 1504bf215546Sopenharmony_ci 1505bf215546Sopenharmony_ci bld.setPosition(i, false); 1506bf215546Sopenharmony_ci i->op = OP_MUL; 1507bf215546Sopenharmony_ci i->subOp = 0; 1508bf215546Sopenharmony_ci i->dType = si->dType; 1509bf215546Sopenharmony_ci i->sType = si->sType; 1510bf215546Sopenharmony_ci i->setSrc(0, si->getSrc(!muls)); 1511bf215546Sopenharmony_ci i->setSrc(1, bld.loadImm(NULL, imm1.reg.data.u32 << imm0.reg.data.u32)); 1512bf215546Sopenharmony_ci break; 1513bf215546Sopenharmony_ci case OP_SUB: 1514bf215546Sopenharmony_ci case OP_ADD: 1515bf215546Sopenharmony_ci int adds; 1516bf215546Sopenharmony_ci if (isFloatType(si->dType)) 1517bf215546Sopenharmony_ci return false; 1518bf215546Sopenharmony_ci if (si->op != OP_SUB && si->src(0).getImmediate(imm1)) 1519bf215546Sopenharmony_ci adds = 0; 1520bf215546Sopenharmony_ci else if (si->src(1).getImmediate(imm1)) 1521bf215546Sopenharmony_ci adds = 1; 1522bf215546Sopenharmony_ci else 1523bf215546Sopenharmony_ci return false; 1524bf215546Sopenharmony_ci if (si->src(!adds).mod != Modifier(0)) 1525bf215546Sopenharmony_ci return false; 1526bf215546Sopenharmony_ci // SHL(ADD(x, y), z) = ADD(SHL(x, z), SHL(y, z)) 1527bf215546Sopenharmony_ci 1528bf215546Sopenharmony_ci // This is more operations, but if one of x, y is an immediate, then 1529bf215546Sopenharmony_ci // we can get a situation where (a) we can use ISCADD, or (b) 1530bf215546Sopenharmony_ci // propagate the add bit into an indirect load. 1531bf215546Sopenharmony_ci bld.setPosition(i, false); 1532bf215546Sopenharmony_ci i->op = si->op; 1533bf215546Sopenharmony_ci i->setSrc(adds, bld.loadImm(NULL, imm1.reg.data.u32 << imm0.reg.data.u32)); 1534bf215546Sopenharmony_ci i->setSrc(!adds, bld.mkOp2v(OP_SHL, i->dType, 1535bf215546Sopenharmony_ci bld.getSSA(i->def(0).getSize(), i->def(0).getFile()), 1536bf215546Sopenharmony_ci si->getSrc(!adds), 1537bf215546Sopenharmony_ci bld.mkImm(imm0.reg.data.u32))); 1538bf215546Sopenharmony_ci break; 1539bf215546Sopenharmony_ci default: 1540bf215546Sopenharmony_ci return false; 1541bf215546Sopenharmony_ci } 1542bf215546Sopenharmony_ci } 1543bf215546Sopenharmony_ci break; 1544bf215546Sopenharmony_ci 1545bf215546Sopenharmony_ci case OP_ABS: 1546bf215546Sopenharmony_ci case OP_NEG: 1547bf215546Sopenharmony_ci case OP_SAT: 1548bf215546Sopenharmony_ci case OP_LG2: 1549bf215546Sopenharmony_ci case OP_RCP: 1550bf215546Sopenharmony_ci case OP_SQRT: 1551bf215546Sopenharmony_ci case OP_RSQ: 1552bf215546Sopenharmony_ci case OP_PRESIN: 1553bf215546Sopenharmony_ci case OP_SIN: 1554bf215546Sopenharmony_ci case OP_COS: 1555bf215546Sopenharmony_ci case OP_PREEX2: 1556bf215546Sopenharmony_ci case OP_EX2: 1557bf215546Sopenharmony_ci unary(i, imm0); 1558bf215546Sopenharmony_ci break; 1559bf215546Sopenharmony_ci case OP_BFIND: { 1560bf215546Sopenharmony_ci int32_t res; 1561bf215546Sopenharmony_ci switch (i->dType) { 1562bf215546Sopenharmony_ci case TYPE_S32: res = util_last_bit_signed(imm0.reg.data.s32) - 1; break; 1563bf215546Sopenharmony_ci case TYPE_U32: res = util_last_bit(imm0.reg.data.u32) - 1; break; 1564bf215546Sopenharmony_ci default: 1565bf215546Sopenharmony_ci return false; 1566bf215546Sopenharmony_ci } 1567bf215546Sopenharmony_ci if (i->subOp == NV50_IR_SUBOP_BFIND_SAMT && res >= 0) 1568bf215546Sopenharmony_ci res = 31 - res; 1569bf215546Sopenharmony_ci bld.setPosition(i, false); /* make sure bld is init'ed */ 1570bf215546Sopenharmony_ci i->setSrc(0, bld.mkImm(res)); 1571bf215546Sopenharmony_ci i->setSrc(1, NULL); 1572bf215546Sopenharmony_ci i->op = OP_MOV; 1573bf215546Sopenharmony_ci i->subOp = 0; 1574bf215546Sopenharmony_ci break; 1575bf215546Sopenharmony_ci } 1576bf215546Sopenharmony_ci case OP_BREV: { 1577bf215546Sopenharmony_ci uint32_t res = util_bitreverse(imm0.reg.data.u32); 1578bf215546Sopenharmony_ci i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res)); 1579bf215546Sopenharmony_ci i->op = OP_MOV; 1580bf215546Sopenharmony_ci break; 1581bf215546Sopenharmony_ci } 1582bf215546Sopenharmony_ci case OP_POPCNT: { 1583bf215546Sopenharmony_ci // Only deal with 1-arg POPCNT here 1584bf215546Sopenharmony_ci if (i->srcExists(1)) 1585bf215546Sopenharmony_ci break; 1586bf215546Sopenharmony_ci uint32_t res = util_bitcount(imm0.reg.data.u32); 1587bf215546Sopenharmony_ci i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res)); 1588bf215546Sopenharmony_ci i->setSrc(1, NULL); 1589bf215546Sopenharmony_ci i->op = OP_MOV; 1590bf215546Sopenharmony_ci break; 1591bf215546Sopenharmony_ci } 1592bf215546Sopenharmony_ci case OP_CVT: { 1593bf215546Sopenharmony_ci Storage res; 1594bf215546Sopenharmony_ci 1595bf215546Sopenharmony_ci // TODO: handle 64-bit values properly 1596bf215546Sopenharmony_ci if (typeSizeof(i->dType) == 8 || typeSizeof(i->sType) == 8) 1597bf215546Sopenharmony_ci return false; 1598bf215546Sopenharmony_ci 1599bf215546Sopenharmony_ci // TODO: handle single byte/word extractions 1600bf215546Sopenharmony_ci if (i->subOp) 1601bf215546Sopenharmony_ci return false; 1602bf215546Sopenharmony_ci 1603bf215546Sopenharmony_ci bld.setPosition(i, true); /* make sure bld is init'ed */ 1604bf215546Sopenharmony_ci 1605bf215546Sopenharmony_ci#define CASE(type, dst, fmin, fmax, imin, imax, umin, umax) \ 1606bf215546Sopenharmony_ci case type: \ 1607bf215546Sopenharmony_ci switch (i->sType) { \ 1608bf215546Sopenharmony_ci case TYPE_F64: \ 1609bf215546Sopenharmony_ci res.data.dst = util_iround(i->saturate ? \ 1610bf215546Sopenharmony_ci CLAMP(imm0.reg.data.f64, fmin, fmax) : \ 1611bf215546Sopenharmony_ci imm0.reg.data.f64); \ 1612bf215546Sopenharmony_ci break; \ 1613bf215546Sopenharmony_ci case TYPE_F32: \ 1614bf215546Sopenharmony_ci res.data.dst = util_iround(i->saturate ? \ 1615bf215546Sopenharmony_ci CLAMP(imm0.reg.data.f32, fmin, fmax) : \ 1616bf215546Sopenharmony_ci imm0.reg.data.f32); \ 1617bf215546Sopenharmony_ci break; \ 1618bf215546Sopenharmony_ci case TYPE_S32: \ 1619bf215546Sopenharmony_ci res.data.dst = i->saturate ? \ 1620bf215546Sopenharmony_ci CLAMP(imm0.reg.data.s32, imin, imax) : \ 1621bf215546Sopenharmony_ci imm0.reg.data.s32; \ 1622bf215546Sopenharmony_ci break; \ 1623bf215546Sopenharmony_ci case TYPE_U32: \ 1624bf215546Sopenharmony_ci res.data.dst = i->saturate ? \ 1625bf215546Sopenharmony_ci CLAMP(imm0.reg.data.u32, umin, umax) : \ 1626bf215546Sopenharmony_ci imm0.reg.data.u32; \ 1627bf215546Sopenharmony_ci break; \ 1628bf215546Sopenharmony_ci case TYPE_S16: \ 1629bf215546Sopenharmony_ci res.data.dst = i->saturate ? \ 1630bf215546Sopenharmony_ci CLAMP(imm0.reg.data.s16, imin, imax) : \ 1631bf215546Sopenharmony_ci imm0.reg.data.s16; \ 1632bf215546Sopenharmony_ci break; \ 1633bf215546Sopenharmony_ci case TYPE_U16: \ 1634bf215546Sopenharmony_ci res.data.dst = i->saturate ? \ 1635bf215546Sopenharmony_ci CLAMP(imm0.reg.data.u16, umin, umax) : \ 1636bf215546Sopenharmony_ci imm0.reg.data.u16; \ 1637bf215546Sopenharmony_ci break; \ 1638bf215546Sopenharmony_ci default: return false; \ 1639bf215546Sopenharmony_ci } \ 1640bf215546Sopenharmony_ci i->setSrc(0, bld.mkImm(res.data.dst)); \ 1641bf215546Sopenharmony_ci break 1642bf215546Sopenharmony_ci 1643bf215546Sopenharmony_ci switch(i->dType) { 1644bf215546Sopenharmony_ci CASE(TYPE_U16, u16, 0, UINT16_MAX, 0, UINT16_MAX, 0, UINT16_MAX); 1645bf215546Sopenharmony_ci CASE(TYPE_S16, s16, INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, 0, INT16_MAX); 1646bf215546Sopenharmony_ci CASE(TYPE_U32, u32, 0, UINT32_MAX, 0, INT32_MAX, 0, UINT32_MAX); 1647bf215546Sopenharmony_ci CASE(TYPE_S32, s32, INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX, 0, INT32_MAX); 1648bf215546Sopenharmony_ci case TYPE_F32: 1649bf215546Sopenharmony_ci switch (i->sType) { 1650bf215546Sopenharmony_ci case TYPE_F64: 1651bf215546Sopenharmony_ci res.data.f32 = i->saturate ? 1652bf215546Sopenharmony_ci SATURATE(imm0.reg.data.f64) : 1653bf215546Sopenharmony_ci imm0.reg.data.f64; 1654bf215546Sopenharmony_ci break; 1655bf215546Sopenharmony_ci case TYPE_F32: 1656bf215546Sopenharmony_ci res.data.f32 = i->saturate ? 1657bf215546Sopenharmony_ci SATURATE(imm0.reg.data.f32) : 1658bf215546Sopenharmony_ci imm0.reg.data.f32; 1659bf215546Sopenharmony_ci break; 1660bf215546Sopenharmony_ci case TYPE_U16: res.data.f32 = (float) imm0.reg.data.u16; break; 1661bf215546Sopenharmony_ci case TYPE_U32: res.data.f32 = (float) imm0.reg.data.u32; break; 1662bf215546Sopenharmony_ci case TYPE_S16: res.data.f32 = (float) imm0.reg.data.s16; break; 1663bf215546Sopenharmony_ci case TYPE_S32: res.data.f32 = (float) imm0.reg.data.s32; break; 1664bf215546Sopenharmony_ci default: 1665bf215546Sopenharmony_ci return false; 1666bf215546Sopenharmony_ci } 1667bf215546Sopenharmony_ci i->setSrc(0, bld.mkImm(res.data.f32)); 1668bf215546Sopenharmony_ci break; 1669bf215546Sopenharmony_ci case TYPE_F64: 1670bf215546Sopenharmony_ci switch (i->sType) { 1671bf215546Sopenharmony_ci case TYPE_F64: 1672bf215546Sopenharmony_ci res.data.f64 = i->saturate ? 1673bf215546Sopenharmony_ci SATURATE(imm0.reg.data.f64) : 1674bf215546Sopenharmony_ci imm0.reg.data.f64; 1675bf215546Sopenharmony_ci break; 1676bf215546Sopenharmony_ci case TYPE_F32: 1677bf215546Sopenharmony_ci res.data.f64 = i->saturate ? 1678bf215546Sopenharmony_ci SATURATE(imm0.reg.data.f32) : 1679bf215546Sopenharmony_ci imm0.reg.data.f32; 1680bf215546Sopenharmony_ci break; 1681bf215546Sopenharmony_ci case TYPE_U16: res.data.f64 = (double) imm0.reg.data.u16; break; 1682bf215546Sopenharmony_ci case TYPE_U32: res.data.f64 = (double) imm0.reg.data.u32; break; 1683bf215546Sopenharmony_ci case TYPE_S16: res.data.f64 = (double) imm0.reg.data.s16; break; 1684bf215546Sopenharmony_ci case TYPE_S32: res.data.f64 = (double) imm0.reg.data.s32; break; 1685bf215546Sopenharmony_ci default: 1686bf215546Sopenharmony_ci return false; 1687bf215546Sopenharmony_ci } 1688bf215546Sopenharmony_ci i->setSrc(0, bld.mkImm(res.data.f64)); 1689bf215546Sopenharmony_ci break; 1690bf215546Sopenharmony_ci default: 1691bf215546Sopenharmony_ci return false; 1692bf215546Sopenharmony_ci } 1693bf215546Sopenharmony_ci#undef CASE 1694bf215546Sopenharmony_ci 1695bf215546Sopenharmony_ci i->setType(i->dType); /* Remove i->sType, which we don't need anymore */ 1696bf215546Sopenharmony_ci i->op = OP_MOV; 1697bf215546Sopenharmony_ci i->saturate = 0; 1698bf215546Sopenharmony_ci i->src(0).mod = Modifier(0); /* Clear the already applied modifier */ 1699bf215546Sopenharmony_ci break; 1700bf215546Sopenharmony_ci } 1701bf215546Sopenharmony_ci default: 1702bf215546Sopenharmony_ci return false; 1703bf215546Sopenharmony_ci } 1704bf215546Sopenharmony_ci 1705bf215546Sopenharmony_ci // This can get left behind some of the optimizations which simplify 1706bf215546Sopenharmony_ci // saturatable values. 1707bf215546Sopenharmony_ci if (newi->op == OP_MOV && newi->saturate) { 1708bf215546Sopenharmony_ci ImmediateValue tmp; 1709bf215546Sopenharmony_ci newi->saturate = 0; 1710bf215546Sopenharmony_ci newi->op = OP_SAT; 1711bf215546Sopenharmony_ci if (newi->src(0).getImmediate(tmp)) 1712bf215546Sopenharmony_ci unary(newi, tmp); 1713bf215546Sopenharmony_ci } 1714bf215546Sopenharmony_ci 1715bf215546Sopenharmony_ci if (newi->op != op) 1716bf215546Sopenharmony_ci foldCount++; 1717bf215546Sopenharmony_ci return deleted; 1718bf215546Sopenharmony_ci} 1719bf215546Sopenharmony_ci 1720bf215546Sopenharmony_ci// ============================================================================= 1721bf215546Sopenharmony_ci 1722bf215546Sopenharmony_ci// Merge modifier operations (ABS, NEG, NOT) into ValueRefs where allowed. 1723bf215546Sopenharmony_ciclass ModifierFolding : public Pass 1724bf215546Sopenharmony_ci{ 1725bf215546Sopenharmony_ciprivate: 1726bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 1727bf215546Sopenharmony_ci}; 1728bf215546Sopenharmony_ci 1729bf215546Sopenharmony_cibool 1730bf215546Sopenharmony_ciModifierFolding::visit(BasicBlock *bb) 1731bf215546Sopenharmony_ci{ 1732bf215546Sopenharmony_ci const Target *target = prog->getTarget(); 1733bf215546Sopenharmony_ci 1734bf215546Sopenharmony_ci Instruction *i, *next, *mi; 1735bf215546Sopenharmony_ci Modifier mod; 1736bf215546Sopenharmony_ci 1737bf215546Sopenharmony_ci for (i = bb->getEntry(); i; i = next) { 1738bf215546Sopenharmony_ci next = i->next; 1739bf215546Sopenharmony_ci 1740bf215546Sopenharmony_ci if (false && i->op == OP_SUB) { 1741bf215546Sopenharmony_ci // turn "sub" into "add neg" (do we really want this ?) 1742bf215546Sopenharmony_ci i->op = OP_ADD; 1743bf215546Sopenharmony_ci i->src(0).mod = i->src(0).mod ^ Modifier(NV50_IR_MOD_NEG); 1744bf215546Sopenharmony_ci } 1745bf215546Sopenharmony_ci 1746bf215546Sopenharmony_ci for (int s = 0; s < 3 && i->srcExists(s); ++s) { 1747bf215546Sopenharmony_ci mi = i->getSrc(s)->getInsn(); 1748bf215546Sopenharmony_ci if (!mi || 1749bf215546Sopenharmony_ci mi->predSrc >= 0 || mi->getDef(0)->refCount() > 8) 1750bf215546Sopenharmony_ci continue; 1751bf215546Sopenharmony_ci if (i->sType == TYPE_U32 && mi->dType == TYPE_S32) { 1752bf215546Sopenharmony_ci if ((i->op != OP_ADD && 1753bf215546Sopenharmony_ci i->op != OP_MUL) || 1754bf215546Sopenharmony_ci (mi->op != OP_ABS && 1755bf215546Sopenharmony_ci mi->op != OP_NEG)) 1756bf215546Sopenharmony_ci continue; 1757bf215546Sopenharmony_ci } else 1758bf215546Sopenharmony_ci if (i->sType != mi->dType) { 1759bf215546Sopenharmony_ci continue; 1760bf215546Sopenharmony_ci } 1761bf215546Sopenharmony_ci if ((mod = Modifier(mi->op)) == Modifier(0)) 1762bf215546Sopenharmony_ci continue; 1763bf215546Sopenharmony_ci mod *= mi->src(0).mod; 1764bf215546Sopenharmony_ci 1765bf215546Sopenharmony_ci if ((i->op == OP_ABS) || i->src(s).mod.abs()) { 1766bf215546Sopenharmony_ci // abs neg [abs] = abs 1767bf215546Sopenharmony_ci mod = mod & Modifier(~(NV50_IR_MOD_NEG | NV50_IR_MOD_ABS)); 1768bf215546Sopenharmony_ci } else 1769bf215546Sopenharmony_ci if ((i->op == OP_NEG) && mod.neg()) { 1770bf215546Sopenharmony_ci assert(s == 0); 1771bf215546Sopenharmony_ci // neg as both opcode and modifier on same insn is prohibited 1772bf215546Sopenharmony_ci // neg neg abs = abs, neg neg = identity 1773bf215546Sopenharmony_ci mod = mod & Modifier(~NV50_IR_MOD_NEG); 1774bf215546Sopenharmony_ci i->op = mod.getOp(); 1775bf215546Sopenharmony_ci mod = mod & Modifier(~NV50_IR_MOD_ABS); 1776bf215546Sopenharmony_ci if (mod == Modifier(0)) 1777bf215546Sopenharmony_ci i->op = OP_MOV; 1778bf215546Sopenharmony_ci } 1779bf215546Sopenharmony_ci 1780bf215546Sopenharmony_ci if (target->isModSupported(i, s, mod)) { 1781bf215546Sopenharmony_ci i->setSrc(s, mi->getSrc(0)); 1782bf215546Sopenharmony_ci i->src(s).mod *= mod; 1783bf215546Sopenharmony_ci } 1784bf215546Sopenharmony_ci } 1785bf215546Sopenharmony_ci 1786bf215546Sopenharmony_ci if (i->op == OP_SAT) { 1787bf215546Sopenharmony_ci mi = i->getSrc(0)->getInsn(); 1788bf215546Sopenharmony_ci if (mi && 1789bf215546Sopenharmony_ci mi->getDef(0)->refCount() <= 1 && target->isSatSupported(mi)) { 1790bf215546Sopenharmony_ci mi->saturate = 1; 1791bf215546Sopenharmony_ci mi->setDef(0, i->getDef(0)); 1792bf215546Sopenharmony_ci delete_Instruction(prog, i); 1793bf215546Sopenharmony_ci } 1794bf215546Sopenharmony_ci } 1795bf215546Sopenharmony_ci } 1796bf215546Sopenharmony_ci 1797bf215546Sopenharmony_ci return true; 1798bf215546Sopenharmony_ci} 1799bf215546Sopenharmony_ci 1800bf215546Sopenharmony_ci// ============================================================================= 1801bf215546Sopenharmony_ci 1802bf215546Sopenharmony_ci// MUL + ADD -> MAD/FMA 1803bf215546Sopenharmony_ci// MIN/MAX(a, a) -> a, etc. 1804bf215546Sopenharmony_ci// SLCT(a, b, const) -> cc(const) ? a : b 1805bf215546Sopenharmony_ci// RCP(RCP(a)) -> a 1806bf215546Sopenharmony_ci// MUL(MUL(a, b), const) -> MUL_Xconst(a, b) 1807bf215546Sopenharmony_ci// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID) 1808bf215546Sopenharmony_ciclass AlgebraicOpt : public Pass 1809bf215546Sopenharmony_ci{ 1810bf215546Sopenharmony_ciprivate: 1811bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 1812bf215546Sopenharmony_ci 1813bf215546Sopenharmony_ci void handleABS(Instruction *); 1814bf215546Sopenharmony_ci bool handleADD(Instruction *); 1815bf215546Sopenharmony_ci bool tryADDToMADOrSAD(Instruction *, operation toOp); 1816bf215546Sopenharmony_ci void handleMINMAX(Instruction *); 1817bf215546Sopenharmony_ci void handleRCP(Instruction *); 1818bf215546Sopenharmony_ci void handleSLCT(Instruction *); 1819bf215546Sopenharmony_ci void handleLOGOP(Instruction *); 1820bf215546Sopenharmony_ci void handleCVT_NEG(Instruction *); 1821bf215546Sopenharmony_ci void handleCVT_CVT(Instruction *); 1822bf215546Sopenharmony_ci void handleCVT_EXTBF(Instruction *); 1823bf215546Sopenharmony_ci void handleSUCLAMP(Instruction *); 1824bf215546Sopenharmony_ci void handleNEG(Instruction *); 1825bf215546Sopenharmony_ci void handleEXTBF_RDSV(Instruction *); 1826bf215546Sopenharmony_ci 1827bf215546Sopenharmony_ci BuildUtil bld; 1828bf215546Sopenharmony_ci}; 1829bf215546Sopenharmony_ci 1830bf215546Sopenharmony_civoid 1831bf215546Sopenharmony_ciAlgebraicOpt::handleABS(Instruction *abs) 1832bf215546Sopenharmony_ci{ 1833bf215546Sopenharmony_ci Instruction *sub = abs->getSrc(0)->getInsn(); 1834bf215546Sopenharmony_ci DataType ty; 1835bf215546Sopenharmony_ci if (!sub || 1836bf215546Sopenharmony_ci !prog->getTarget()->isOpSupported(OP_SAD, abs->dType)) 1837bf215546Sopenharmony_ci return; 1838bf215546Sopenharmony_ci // hidden conversion ? 1839bf215546Sopenharmony_ci ty = intTypeToSigned(sub->dType); 1840bf215546Sopenharmony_ci if (abs->dType != abs->sType || ty != abs->sType) 1841bf215546Sopenharmony_ci return; 1842bf215546Sopenharmony_ci 1843bf215546Sopenharmony_ci if ((sub->op != OP_ADD && sub->op != OP_SUB) || 1844bf215546Sopenharmony_ci sub->src(0).getFile() != FILE_GPR || sub->src(0).mod || 1845bf215546Sopenharmony_ci sub->src(1).getFile() != FILE_GPR || sub->src(1).mod) 1846bf215546Sopenharmony_ci return; 1847bf215546Sopenharmony_ci 1848bf215546Sopenharmony_ci Value *src0 = sub->getSrc(0); 1849bf215546Sopenharmony_ci Value *src1 = sub->getSrc(1); 1850bf215546Sopenharmony_ci 1851bf215546Sopenharmony_ci if (sub->op == OP_ADD) { 1852bf215546Sopenharmony_ci Instruction *neg = sub->getSrc(1)->getInsn(); 1853bf215546Sopenharmony_ci if (neg && neg->op != OP_NEG) { 1854bf215546Sopenharmony_ci neg = sub->getSrc(0)->getInsn(); 1855bf215546Sopenharmony_ci src0 = sub->getSrc(1); 1856bf215546Sopenharmony_ci } 1857bf215546Sopenharmony_ci if (!neg || neg->op != OP_NEG || 1858bf215546Sopenharmony_ci neg->dType != neg->sType || neg->sType != ty) 1859bf215546Sopenharmony_ci return; 1860bf215546Sopenharmony_ci src1 = neg->getSrc(0); 1861bf215546Sopenharmony_ci } 1862bf215546Sopenharmony_ci 1863bf215546Sopenharmony_ci // found ABS(SUB)) 1864bf215546Sopenharmony_ci abs->moveSources(1, 2); // move sources >=1 up by 2 1865bf215546Sopenharmony_ci abs->op = OP_SAD; 1866bf215546Sopenharmony_ci abs->setType(sub->dType); 1867bf215546Sopenharmony_ci abs->setSrc(0, src0); 1868bf215546Sopenharmony_ci abs->setSrc(1, src1); 1869bf215546Sopenharmony_ci bld.setPosition(abs, false); 1870bf215546Sopenharmony_ci abs->setSrc(2, bld.loadImm(bld.getSSA(typeSizeof(ty)), 0)); 1871bf215546Sopenharmony_ci} 1872bf215546Sopenharmony_ci 1873bf215546Sopenharmony_cibool 1874bf215546Sopenharmony_ciAlgebraicOpt::handleADD(Instruction *add) 1875bf215546Sopenharmony_ci{ 1876bf215546Sopenharmony_ci Value *src0 = add->getSrc(0); 1877bf215546Sopenharmony_ci Value *src1 = add->getSrc(1); 1878bf215546Sopenharmony_ci 1879bf215546Sopenharmony_ci if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) 1880bf215546Sopenharmony_ci return false; 1881bf215546Sopenharmony_ci 1882bf215546Sopenharmony_ci bool changed = false; 1883bf215546Sopenharmony_ci // we can't optimize to MAD if the add is precise 1884bf215546Sopenharmony_ci if (!add->precise && prog->getTarget()->isOpSupported(OP_MAD, add->dType)) 1885bf215546Sopenharmony_ci changed = tryADDToMADOrSAD(add, OP_MAD); 1886bf215546Sopenharmony_ci if (!changed && prog->getTarget()->isOpSupported(OP_SAD, add->dType)) 1887bf215546Sopenharmony_ci changed = tryADDToMADOrSAD(add, OP_SAD); 1888bf215546Sopenharmony_ci return changed; 1889bf215546Sopenharmony_ci} 1890bf215546Sopenharmony_ci 1891bf215546Sopenharmony_ci// ADD(SAD(a,b,0), c) -> SAD(a,b,c) 1892bf215546Sopenharmony_ci// ADD(MUL(a,b), c) -> MAD(a,b,c) 1893bf215546Sopenharmony_cibool 1894bf215546Sopenharmony_ciAlgebraicOpt::tryADDToMADOrSAD(Instruction *add, operation toOp) 1895bf215546Sopenharmony_ci{ 1896bf215546Sopenharmony_ci Value *src0 = add->getSrc(0); 1897bf215546Sopenharmony_ci Value *src1 = add->getSrc(1); 1898bf215546Sopenharmony_ci Value *src; 1899bf215546Sopenharmony_ci int s; 1900bf215546Sopenharmony_ci const operation srcOp = toOp == OP_SAD ? OP_SAD : OP_MUL; 1901bf215546Sopenharmony_ci const Modifier modBad = Modifier(~((toOp == OP_MAD) ? NV50_IR_MOD_NEG : 0)); 1902bf215546Sopenharmony_ci Modifier mod[4]; 1903bf215546Sopenharmony_ci 1904bf215546Sopenharmony_ci if (src0->refCount() == 1 && 1905bf215546Sopenharmony_ci src0->getUniqueInsn() && src0->getUniqueInsn()->op == srcOp) 1906bf215546Sopenharmony_ci s = 0; 1907bf215546Sopenharmony_ci else 1908bf215546Sopenharmony_ci if (src1->refCount() == 1 && 1909bf215546Sopenharmony_ci src1->getUniqueInsn() && src1->getUniqueInsn()->op == srcOp) 1910bf215546Sopenharmony_ci s = 1; 1911bf215546Sopenharmony_ci else 1912bf215546Sopenharmony_ci return false; 1913bf215546Sopenharmony_ci 1914bf215546Sopenharmony_ci src = add->getSrc(s); 1915bf215546Sopenharmony_ci 1916bf215546Sopenharmony_ci if (src->getUniqueInsn() && src->getUniqueInsn()->bb != add->bb) 1917bf215546Sopenharmony_ci return false; 1918bf215546Sopenharmony_ci 1919bf215546Sopenharmony_ci if (src->getInsn()->saturate || src->getInsn()->postFactor || 1920bf215546Sopenharmony_ci src->getInsn()->dnz || src->getInsn()->precise) 1921bf215546Sopenharmony_ci return false; 1922bf215546Sopenharmony_ci 1923bf215546Sopenharmony_ci if (toOp == OP_SAD) { 1924bf215546Sopenharmony_ci ImmediateValue imm; 1925bf215546Sopenharmony_ci if (!src->getInsn()->src(2).getImmediate(imm)) 1926bf215546Sopenharmony_ci return false; 1927bf215546Sopenharmony_ci if (!imm.isInteger(0)) 1928bf215546Sopenharmony_ci return false; 1929bf215546Sopenharmony_ci } 1930bf215546Sopenharmony_ci 1931bf215546Sopenharmony_ci if (typeSizeof(add->dType) != typeSizeof(src->getInsn()->dType) || 1932bf215546Sopenharmony_ci isFloatType(add->dType) != isFloatType(src->getInsn()->dType)) 1933bf215546Sopenharmony_ci return false; 1934bf215546Sopenharmony_ci 1935bf215546Sopenharmony_ci mod[0] = add->src(0).mod; 1936bf215546Sopenharmony_ci mod[1] = add->src(1).mod; 1937bf215546Sopenharmony_ci mod[2] = src->getUniqueInsn()->src(0).mod; 1938bf215546Sopenharmony_ci mod[3] = src->getUniqueInsn()->src(1).mod; 1939bf215546Sopenharmony_ci 1940bf215546Sopenharmony_ci if (((mod[0] | mod[1]) | (mod[2] | mod[3])) & modBad) 1941bf215546Sopenharmony_ci return false; 1942bf215546Sopenharmony_ci 1943bf215546Sopenharmony_ci add->op = toOp; 1944bf215546Sopenharmony_ci add->subOp = src->getInsn()->subOp; // potentially mul-high 1945bf215546Sopenharmony_ci add->dnz = src->getInsn()->dnz; 1946bf215546Sopenharmony_ci add->dType = src->getInsn()->dType; // sign matters for imad hi 1947bf215546Sopenharmony_ci add->sType = src->getInsn()->sType; 1948bf215546Sopenharmony_ci 1949bf215546Sopenharmony_ci add->setSrc(2, add->src(s ? 0 : 1)); 1950bf215546Sopenharmony_ci 1951bf215546Sopenharmony_ci add->setSrc(0, src->getInsn()->getSrc(0)); 1952bf215546Sopenharmony_ci add->src(0).mod = mod[2] ^ mod[s]; 1953bf215546Sopenharmony_ci add->setSrc(1, src->getInsn()->getSrc(1)); 1954bf215546Sopenharmony_ci add->src(1).mod = mod[3]; 1955bf215546Sopenharmony_ci 1956bf215546Sopenharmony_ci return true; 1957bf215546Sopenharmony_ci} 1958bf215546Sopenharmony_ci 1959bf215546Sopenharmony_civoid 1960bf215546Sopenharmony_ciAlgebraicOpt::handleMINMAX(Instruction *minmax) 1961bf215546Sopenharmony_ci{ 1962bf215546Sopenharmony_ci Value *src0 = minmax->getSrc(0); 1963bf215546Sopenharmony_ci Value *src1 = minmax->getSrc(1); 1964bf215546Sopenharmony_ci 1965bf215546Sopenharmony_ci if (src0 != src1 || src0->reg.file != FILE_GPR) 1966bf215546Sopenharmony_ci return; 1967bf215546Sopenharmony_ci if (minmax->src(0).mod == minmax->src(1).mod) { 1968bf215546Sopenharmony_ci if (minmax->def(0).mayReplace(minmax->src(0))) { 1969bf215546Sopenharmony_ci minmax->def(0).replace(minmax->src(0), false); 1970bf215546Sopenharmony_ci delete_Instruction(prog, minmax); 1971bf215546Sopenharmony_ci } else { 1972bf215546Sopenharmony_ci minmax->op = OP_CVT; 1973bf215546Sopenharmony_ci minmax->setSrc(1, NULL); 1974bf215546Sopenharmony_ci } 1975bf215546Sopenharmony_ci } else { 1976bf215546Sopenharmony_ci // TODO: 1977bf215546Sopenharmony_ci // min(x, -x) = -abs(x) 1978bf215546Sopenharmony_ci // min(x, -abs(x)) = -abs(x) 1979bf215546Sopenharmony_ci // min(x, abs(x)) = x 1980bf215546Sopenharmony_ci // max(x, -abs(x)) = x 1981bf215546Sopenharmony_ci // max(x, abs(x)) = abs(x) 1982bf215546Sopenharmony_ci // max(x, -x) = abs(x) 1983bf215546Sopenharmony_ci } 1984bf215546Sopenharmony_ci} 1985bf215546Sopenharmony_ci 1986bf215546Sopenharmony_ci// rcp(rcp(a)) = a 1987bf215546Sopenharmony_ci// rcp(sqrt(a)) = rsq(a) 1988bf215546Sopenharmony_civoid 1989bf215546Sopenharmony_ciAlgebraicOpt::handleRCP(Instruction *rcp) 1990bf215546Sopenharmony_ci{ 1991bf215546Sopenharmony_ci Instruction *si = rcp->getSrc(0)->getUniqueInsn(); 1992bf215546Sopenharmony_ci 1993bf215546Sopenharmony_ci if (!si) 1994bf215546Sopenharmony_ci return; 1995bf215546Sopenharmony_ci 1996bf215546Sopenharmony_ci if (si->op == OP_RCP) { 1997bf215546Sopenharmony_ci Modifier mod = rcp->src(0).mod * si->src(0).mod; 1998bf215546Sopenharmony_ci rcp->op = mod.getOp(); 1999bf215546Sopenharmony_ci rcp->setSrc(0, si->getSrc(0)); 2000bf215546Sopenharmony_ci } else if (si->op == OP_SQRT) { 2001bf215546Sopenharmony_ci rcp->op = OP_RSQ; 2002bf215546Sopenharmony_ci rcp->setSrc(0, si->getSrc(0)); 2003bf215546Sopenharmony_ci rcp->src(0).mod = rcp->src(0).mod * si->src(0).mod; 2004bf215546Sopenharmony_ci } 2005bf215546Sopenharmony_ci} 2006bf215546Sopenharmony_ci 2007bf215546Sopenharmony_civoid 2008bf215546Sopenharmony_ciAlgebraicOpt::handleSLCT(Instruction *slct) 2009bf215546Sopenharmony_ci{ 2010bf215546Sopenharmony_ci if (slct->getSrc(2)->reg.file == FILE_IMMEDIATE) { 2011bf215546Sopenharmony_ci if (slct->getSrc(2)->asImm()->compare(slct->asCmp()->setCond, 0.0f)) 2012bf215546Sopenharmony_ci slct->setSrc(0, slct->getSrc(1)); 2013bf215546Sopenharmony_ci } else 2014bf215546Sopenharmony_ci if (slct->getSrc(0) != slct->getSrc(1)) { 2015bf215546Sopenharmony_ci return; 2016bf215546Sopenharmony_ci } 2017bf215546Sopenharmony_ci slct->op = OP_MOV; 2018bf215546Sopenharmony_ci slct->setSrc(1, NULL); 2019bf215546Sopenharmony_ci slct->setSrc(2, NULL); 2020bf215546Sopenharmony_ci} 2021bf215546Sopenharmony_ci 2022bf215546Sopenharmony_civoid 2023bf215546Sopenharmony_ciAlgebraicOpt::handleLOGOP(Instruction *logop) 2024bf215546Sopenharmony_ci{ 2025bf215546Sopenharmony_ci Value *src0 = logop->getSrc(0); 2026bf215546Sopenharmony_ci Value *src1 = logop->getSrc(1); 2027bf215546Sopenharmony_ci 2028bf215546Sopenharmony_ci if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) 2029bf215546Sopenharmony_ci return; 2030bf215546Sopenharmony_ci 2031bf215546Sopenharmony_ci if (src0 == src1) { 2032bf215546Sopenharmony_ci if ((logop->op == OP_AND || logop->op == OP_OR) && 2033bf215546Sopenharmony_ci logop->def(0).mayReplace(logop->src(0))) { 2034bf215546Sopenharmony_ci logop->def(0).replace(logop->src(0), false); 2035bf215546Sopenharmony_ci delete_Instruction(prog, logop); 2036bf215546Sopenharmony_ci } 2037bf215546Sopenharmony_ci } else { 2038bf215546Sopenharmony_ci // try AND(SET, SET) -> SET_AND(SET) 2039bf215546Sopenharmony_ci Instruction *set0 = src0->getInsn(); 2040bf215546Sopenharmony_ci Instruction *set1 = src1->getInsn(); 2041bf215546Sopenharmony_ci 2042bf215546Sopenharmony_ci if (!set0 || set0->fixed || !set1 || set1->fixed) 2043bf215546Sopenharmony_ci return; 2044bf215546Sopenharmony_ci if (set1->op != OP_SET) { 2045bf215546Sopenharmony_ci Instruction *xchg = set0; 2046bf215546Sopenharmony_ci set0 = set1; 2047bf215546Sopenharmony_ci set1 = xchg; 2048bf215546Sopenharmony_ci if (set1->op != OP_SET) 2049bf215546Sopenharmony_ci return; 2050bf215546Sopenharmony_ci } 2051bf215546Sopenharmony_ci operation redOp = (logop->op == OP_AND ? OP_SET_AND : 2052bf215546Sopenharmony_ci logop->op == OP_XOR ? OP_SET_XOR : OP_SET_OR); 2053bf215546Sopenharmony_ci if (!prog->getTarget()->isOpSupported(redOp, set1->sType)) 2054bf215546Sopenharmony_ci return; 2055bf215546Sopenharmony_ci if (set0->op != OP_SET && 2056bf215546Sopenharmony_ci set0->op != OP_SET_AND && 2057bf215546Sopenharmony_ci set0->op != OP_SET_OR && 2058bf215546Sopenharmony_ci set0->op != OP_SET_XOR) 2059bf215546Sopenharmony_ci return; 2060bf215546Sopenharmony_ci if (set0->getDef(0)->refCount() > 1 && 2061bf215546Sopenharmony_ci set1->getDef(0)->refCount() > 1) 2062bf215546Sopenharmony_ci return; 2063bf215546Sopenharmony_ci if (set0->getPredicate() || set1->getPredicate()) 2064bf215546Sopenharmony_ci return; 2065bf215546Sopenharmony_ci // check that they don't source each other 2066bf215546Sopenharmony_ci for (int s = 0; s < 2; ++s) 2067bf215546Sopenharmony_ci if (set0->getSrc(s) == set1->getDef(0) || 2068bf215546Sopenharmony_ci set1->getSrc(s) == set0->getDef(0)) 2069bf215546Sopenharmony_ci return; 2070bf215546Sopenharmony_ci 2071bf215546Sopenharmony_ci set0 = cloneForward(func, set0); 2072bf215546Sopenharmony_ci set1 = cloneShallow(func, set1); 2073bf215546Sopenharmony_ci logop->bb->insertAfter(logop, set1); 2074bf215546Sopenharmony_ci logop->bb->insertAfter(logop, set0); 2075bf215546Sopenharmony_ci 2076bf215546Sopenharmony_ci set0->dType = TYPE_U8; 2077bf215546Sopenharmony_ci set0->getDef(0)->reg.file = FILE_PREDICATE; 2078bf215546Sopenharmony_ci set0->getDef(0)->reg.size = 1; 2079bf215546Sopenharmony_ci set1->setSrc(2, set0->getDef(0)); 2080bf215546Sopenharmony_ci set1->op = redOp; 2081bf215546Sopenharmony_ci set1->setDef(0, logop->getDef(0)); 2082bf215546Sopenharmony_ci delete_Instruction(prog, logop); 2083bf215546Sopenharmony_ci } 2084bf215546Sopenharmony_ci} 2085bf215546Sopenharmony_ci 2086bf215546Sopenharmony_ci// F2I(NEG(SET with result 1.0f/0.0f)) -> SET with result -1/0 2087bf215546Sopenharmony_ci// nv50: 2088bf215546Sopenharmony_ci// F2I(NEG(I2F(ABS(SET)))) 2089bf215546Sopenharmony_civoid 2090bf215546Sopenharmony_ciAlgebraicOpt::handleCVT_NEG(Instruction *cvt) 2091bf215546Sopenharmony_ci{ 2092bf215546Sopenharmony_ci Instruction *insn = cvt->getSrc(0)->getInsn(); 2093bf215546Sopenharmony_ci if (cvt->sType != TYPE_F32 || 2094bf215546Sopenharmony_ci cvt->dType != TYPE_S32 || cvt->src(0).mod != Modifier(0)) 2095bf215546Sopenharmony_ci return; 2096bf215546Sopenharmony_ci if (!insn || insn->op != OP_NEG || insn->dType != TYPE_F32) 2097bf215546Sopenharmony_ci return; 2098bf215546Sopenharmony_ci if (insn->src(0).mod != Modifier(0)) 2099bf215546Sopenharmony_ci return; 2100bf215546Sopenharmony_ci insn = insn->getSrc(0)->getInsn(); 2101bf215546Sopenharmony_ci 2102bf215546Sopenharmony_ci // check for nv50 SET(-1,0) -> SET(1.0f/0.0f) chain and nvc0's f32 SET 2103bf215546Sopenharmony_ci if (insn && insn->op == OP_CVT && 2104bf215546Sopenharmony_ci insn->dType == TYPE_F32 && 2105bf215546Sopenharmony_ci insn->sType == TYPE_S32) { 2106bf215546Sopenharmony_ci insn = insn->getSrc(0)->getInsn(); 2107bf215546Sopenharmony_ci if (!insn || insn->op != OP_ABS || insn->sType != TYPE_S32 || 2108bf215546Sopenharmony_ci insn->src(0).mod) 2109bf215546Sopenharmony_ci return; 2110bf215546Sopenharmony_ci insn = insn->getSrc(0)->getInsn(); 2111bf215546Sopenharmony_ci if (!insn || insn->op != OP_SET || insn->dType != TYPE_U32) 2112bf215546Sopenharmony_ci return; 2113bf215546Sopenharmony_ci } else 2114bf215546Sopenharmony_ci if (!insn || insn->op != OP_SET || insn->dType != TYPE_F32) { 2115bf215546Sopenharmony_ci return; 2116bf215546Sopenharmony_ci } 2117bf215546Sopenharmony_ci 2118bf215546Sopenharmony_ci Instruction *bset = cloneShallow(func, insn); 2119bf215546Sopenharmony_ci bset->dType = TYPE_U32; 2120bf215546Sopenharmony_ci bset->setDef(0, cvt->getDef(0)); 2121bf215546Sopenharmony_ci cvt->bb->insertAfter(cvt, bset); 2122bf215546Sopenharmony_ci delete_Instruction(prog, cvt); 2123bf215546Sopenharmony_ci} 2124bf215546Sopenharmony_ci 2125bf215546Sopenharmony_ci// F2I(TRUNC()) and so on can be expressed as a single CVT. If the earlier CVT 2126bf215546Sopenharmony_ci// does a type conversion, this becomes trickier as there might be range 2127bf215546Sopenharmony_ci// changes/etc. We could handle those in theory as long as the range was being 2128bf215546Sopenharmony_ci// reduced or kept the same. 2129bf215546Sopenharmony_civoid 2130bf215546Sopenharmony_ciAlgebraicOpt::handleCVT_CVT(Instruction *cvt) 2131bf215546Sopenharmony_ci{ 2132bf215546Sopenharmony_ci Instruction *insn = cvt->getSrc(0)->getInsn(); 2133bf215546Sopenharmony_ci 2134bf215546Sopenharmony_ci if (!insn || 2135bf215546Sopenharmony_ci insn->saturate || 2136bf215546Sopenharmony_ci insn->subOp || 2137bf215546Sopenharmony_ci insn->dType != insn->sType || 2138bf215546Sopenharmony_ci insn->dType != cvt->sType) 2139bf215546Sopenharmony_ci return; 2140bf215546Sopenharmony_ci 2141bf215546Sopenharmony_ci RoundMode rnd = insn->rnd; 2142bf215546Sopenharmony_ci switch (insn->op) { 2143bf215546Sopenharmony_ci case OP_CEIL: 2144bf215546Sopenharmony_ci rnd = ROUND_PI; 2145bf215546Sopenharmony_ci break; 2146bf215546Sopenharmony_ci case OP_FLOOR: 2147bf215546Sopenharmony_ci rnd = ROUND_MI; 2148bf215546Sopenharmony_ci break; 2149bf215546Sopenharmony_ci case OP_TRUNC: 2150bf215546Sopenharmony_ci rnd = ROUND_ZI; 2151bf215546Sopenharmony_ci break; 2152bf215546Sopenharmony_ci case OP_CVT: 2153bf215546Sopenharmony_ci break; 2154bf215546Sopenharmony_ci default: 2155bf215546Sopenharmony_ci return; 2156bf215546Sopenharmony_ci } 2157bf215546Sopenharmony_ci 2158bf215546Sopenharmony_ci if (!isFloatType(cvt->dType) || !isFloatType(insn->sType)) 2159bf215546Sopenharmony_ci rnd = (RoundMode)(rnd & 3); 2160bf215546Sopenharmony_ci 2161bf215546Sopenharmony_ci cvt->rnd = rnd; 2162bf215546Sopenharmony_ci cvt->setSrc(0, insn->getSrc(0)); 2163bf215546Sopenharmony_ci cvt->src(0).mod *= insn->src(0).mod; 2164bf215546Sopenharmony_ci cvt->sType = insn->sType; 2165bf215546Sopenharmony_ci} 2166bf215546Sopenharmony_ci 2167bf215546Sopenharmony_ci// Some shaders extract packed bytes out of words and convert them to 2168bf215546Sopenharmony_ci// e.g. float. The Fermi+ CVT instruction can extract those directly, as can 2169bf215546Sopenharmony_ci// nv50 for word sizes. 2170bf215546Sopenharmony_ci// 2171bf215546Sopenharmony_ci// CVT(EXTBF(x, byte/word)) 2172bf215546Sopenharmony_ci// CVT(AND(bytemask, x)) 2173bf215546Sopenharmony_ci// CVT(AND(bytemask, SHR(x, 8/16/24))) 2174bf215546Sopenharmony_ci// CVT(SHR(x, 16/24)) 2175bf215546Sopenharmony_civoid 2176bf215546Sopenharmony_ciAlgebraicOpt::handleCVT_EXTBF(Instruction *cvt) 2177bf215546Sopenharmony_ci{ 2178bf215546Sopenharmony_ci Instruction *insn = cvt->getSrc(0)->getInsn(); 2179bf215546Sopenharmony_ci ImmediateValue imm; 2180bf215546Sopenharmony_ci Value *arg = NULL; 2181bf215546Sopenharmony_ci unsigned width, offset = 0; 2182bf215546Sopenharmony_ci if ((cvt->sType != TYPE_U32 && cvt->sType != TYPE_S32) || !insn) 2183bf215546Sopenharmony_ci return; 2184bf215546Sopenharmony_ci if (insn->op == OP_EXTBF && insn->src(1).getImmediate(imm)) { 2185bf215546Sopenharmony_ci width = (imm.reg.data.u32 >> 8) & 0xff; 2186bf215546Sopenharmony_ci offset = imm.reg.data.u32 & 0xff; 2187bf215546Sopenharmony_ci arg = insn->getSrc(0); 2188bf215546Sopenharmony_ci 2189bf215546Sopenharmony_ci if (width != 8 && width != 16) 2190bf215546Sopenharmony_ci return; 2191bf215546Sopenharmony_ci if (width == 8 && offset & 0x7) 2192bf215546Sopenharmony_ci return; 2193bf215546Sopenharmony_ci if (width == 16 && offset & 0xf) 2194bf215546Sopenharmony_ci return; 2195bf215546Sopenharmony_ci } else if (insn->op == OP_AND) { 2196bf215546Sopenharmony_ci int s; 2197bf215546Sopenharmony_ci if (insn->src(0).getImmediate(imm)) 2198bf215546Sopenharmony_ci s = 0; 2199bf215546Sopenharmony_ci else if (insn->src(1).getImmediate(imm)) 2200bf215546Sopenharmony_ci s = 1; 2201bf215546Sopenharmony_ci else 2202bf215546Sopenharmony_ci return; 2203bf215546Sopenharmony_ci 2204bf215546Sopenharmony_ci if (imm.reg.data.u32 == 0xff) 2205bf215546Sopenharmony_ci width = 8; 2206bf215546Sopenharmony_ci else if (imm.reg.data.u32 == 0xffff) 2207bf215546Sopenharmony_ci width = 16; 2208bf215546Sopenharmony_ci else 2209bf215546Sopenharmony_ci return; 2210bf215546Sopenharmony_ci 2211bf215546Sopenharmony_ci arg = insn->getSrc(!s); 2212bf215546Sopenharmony_ci Instruction *shift = arg->getInsn(); 2213bf215546Sopenharmony_ci 2214bf215546Sopenharmony_ci if (shift && shift->op == OP_SHR && 2215bf215546Sopenharmony_ci shift->sType == cvt->sType && 2216bf215546Sopenharmony_ci shift->src(1).getImmediate(imm) && 2217bf215546Sopenharmony_ci ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) || 2218bf215546Sopenharmony_ci (width == 16 && (imm.reg.data.u32 & 0xf) == 0))) { 2219bf215546Sopenharmony_ci arg = shift->getSrc(0); 2220bf215546Sopenharmony_ci offset = imm.reg.data.u32; 2221bf215546Sopenharmony_ci } 2222bf215546Sopenharmony_ci // We just AND'd the high bits away, which means this is effectively an 2223bf215546Sopenharmony_ci // unsigned value. 2224bf215546Sopenharmony_ci cvt->sType = TYPE_U32; 2225bf215546Sopenharmony_ci } else if (insn->op == OP_SHR && 2226bf215546Sopenharmony_ci insn->sType == cvt->sType && 2227bf215546Sopenharmony_ci insn->src(1).getImmediate(imm)) { 2228bf215546Sopenharmony_ci arg = insn->getSrc(0); 2229bf215546Sopenharmony_ci if (imm.reg.data.u32 == 24) { 2230bf215546Sopenharmony_ci width = 8; 2231bf215546Sopenharmony_ci offset = 24; 2232bf215546Sopenharmony_ci } else if (imm.reg.data.u32 == 16) { 2233bf215546Sopenharmony_ci width = 16; 2234bf215546Sopenharmony_ci offset = 16; 2235bf215546Sopenharmony_ci } else { 2236bf215546Sopenharmony_ci return; 2237bf215546Sopenharmony_ci } 2238bf215546Sopenharmony_ci } 2239bf215546Sopenharmony_ci 2240bf215546Sopenharmony_ci if (!arg) 2241bf215546Sopenharmony_ci return; 2242bf215546Sopenharmony_ci 2243bf215546Sopenharmony_ci // Irrespective of what came earlier, we can undo a shift on the argument 2244bf215546Sopenharmony_ci // by adjusting the offset. 2245bf215546Sopenharmony_ci Instruction *shift = arg->getInsn(); 2246bf215546Sopenharmony_ci if (shift && shift->op == OP_SHL && 2247bf215546Sopenharmony_ci shift->src(1).getImmediate(imm) && 2248bf215546Sopenharmony_ci ((width == 8 && (imm.reg.data.u32 & 0x7) == 0) || 2249bf215546Sopenharmony_ci (width == 16 && (imm.reg.data.u32 & 0xf) == 0)) && 2250bf215546Sopenharmony_ci imm.reg.data.u32 <= offset) { 2251bf215546Sopenharmony_ci arg = shift->getSrc(0); 2252bf215546Sopenharmony_ci offset -= imm.reg.data.u32; 2253bf215546Sopenharmony_ci } 2254bf215546Sopenharmony_ci 2255bf215546Sopenharmony_ci // The unpackSnorm lowering still leaves a few shifts behind, but it's too 2256bf215546Sopenharmony_ci // annoying to detect them. 2257bf215546Sopenharmony_ci 2258bf215546Sopenharmony_ci if (width == 8) { 2259bf215546Sopenharmony_ci cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U8 : TYPE_S8; 2260bf215546Sopenharmony_ci } else { 2261bf215546Sopenharmony_ci assert(width == 16); 2262bf215546Sopenharmony_ci cvt->sType = cvt->sType == TYPE_U32 ? TYPE_U16 : TYPE_S16; 2263bf215546Sopenharmony_ci } 2264bf215546Sopenharmony_ci cvt->setSrc(0, arg); 2265bf215546Sopenharmony_ci cvt->subOp = offset >> 3; 2266bf215546Sopenharmony_ci} 2267bf215546Sopenharmony_ci 2268bf215546Sopenharmony_ci// SUCLAMP dst, (ADD b imm), k, 0 -> SUCLAMP dst, b, k, imm (if imm fits s6) 2269bf215546Sopenharmony_civoid 2270bf215546Sopenharmony_ciAlgebraicOpt::handleSUCLAMP(Instruction *insn) 2271bf215546Sopenharmony_ci{ 2272bf215546Sopenharmony_ci ImmediateValue imm; 2273bf215546Sopenharmony_ci int32_t val = insn->getSrc(2)->asImm()->reg.data.s32; 2274bf215546Sopenharmony_ci int s; 2275bf215546Sopenharmony_ci Instruction *add; 2276bf215546Sopenharmony_ci 2277bf215546Sopenharmony_ci assert(insn->srcExists(0) && insn->src(0).getFile() == FILE_GPR); 2278bf215546Sopenharmony_ci 2279bf215546Sopenharmony_ci // look for ADD (TODO: only count references by non-SUCLAMP) 2280bf215546Sopenharmony_ci if (insn->getSrc(0)->refCount() > 1) 2281bf215546Sopenharmony_ci return; 2282bf215546Sopenharmony_ci add = insn->getSrc(0)->getInsn(); 2283bf215546Sopenharmony_ci if (!add || add->op != OP_ADD || 2284bf215546Sopenharmony_ci (add->dType != TYPE_U32 && 2285bf215546Sopenharmony_ci add->dType != TYPE_S32)) 2286bf215546Sopenharmony_ci return; 2287bf215546Sopenharmony_ci 2288bf215546Sopenharmony_ci // look for immediate 2289bf215546Sopenharmony_ci for (s = 0; s < 2; ++s) 2290bf215546Sopenharmony_ci if (add->src(s).getImmediate(imm)) 2291bf215546Sopenharmony_ci break; 2292bf215546Sopenharmony_ci if (s >= 2) 2293bf215546Sopenharmony_ci return; 2294bf215546Sopenharmony_ci s = s ? 0 : 1; 2295bf215546Sopenharmony_ci // determine if immediate fits 2296bf215546Sopenharmony_ci val += imm.reg.data.s32; 2297bf215546Sopenharmony_ci if (val > 31 || val < -32) 2298bf215546Sopenharmony_ci return; 2299bf215546Sopenharmony_ci // determine if other addend fits 2300bf215546Sopenharmony_ci if (add->src(s).getFile() != FILE_GPR || add->src(s).mod != Modifier(0)) 2301bf215546Sopenharmony_ci return; 2302bf215546Sopenharmony_ci 2303bf215546Sopenharmony_ci bld.setPosition(insn, false); // make sure bld is init'ed 2304bf215546Sopenharmony_ci // replace sources 2305bf215546Sopenharmony_ci insn->setSrc(2, bld.mkImm(val)); 2306bf215546Sopenharmony_ci insn->setSrc(0, add->getSrc(s)); 2307bf215546Sopenharmony_ci} 2308bf215546Sopenharmony_ci 2309bf215546Sopenharmony_ci// NEG(AND(SET, 1)) -> SET 2310bf215546Sopenharmony_civoid 2311bf215546Sopenharmony_ciAlgebraicOpt::handleNEG(Instruction *i) { 2312bf215546Sopenharmony_ci Instruction *src = i->getSrc(0)->getInsn(); 2313bf215546Sopenharmony_ci ImmediateValue imm; 2314bf215546Sopenharmony_ci int b; 2315bf215546Sopenharmony_ci 2316bf215546Sopenharmony_ci if (isFloatType(i->sType) || !src || src->op != OP_AND) 2317bf215546Sopenharmony_ci return; 2318bf215546Sopenharmony_ci 2319bf215546Sopenharmony_ci if (src->src(0).getImmediate(imm)) 2320bf215546Sopenharmony_ci b = 1; 2321bf215546Sopenharmony_ci else if (src->src(1).getImmediate(imm)) 2322bf215546Sopenharmony_ci b = 0; 2323bf215546Sopenharmony_ci else 2324bf215546Sopenharmony_ci return; 2325bf215546Sopenharmony_ci 2326bf215546Sopenharmony_ci if (!imm.isInteger(1)) 2327bf215546Sopenharmony_ci return; 2328bf215546Sopenharmony_ci 2329bf215546Sopenharmony_ci Instruction *set = src->getSrc(b)->getInsn(); 2330bf215546Sopenharmony_ci if ((set->op == OP_SET || set->op == OP_SET_AND || 2331bf215546Sopenharmony_ci set->op == OP_SET_OR || set->op == OP_SET_XOR) && 2332bf215546Sopenharmony_ci !isFloatType(set->dType)) { 2333bf215546Sopenharmony_ci i->def(0).replace(set->getDef(0), false); 2334bf215546Sopenharmony_ci } 2335bf215546Sopenharmony_ci} 2336bf215546Sopenharmony_ci 2337bf215546Sopenharmony_ci// EXTBF(RDSV(COMBINED_TID)) -> RDSV(TID) 2338bf215546Sopenharmony_civoid 2339bf215546Sopenharmony_ciAlgebraicOpt::handleEXTBF_RDSV(Instruction *i) 2340bf215546Sopenharmony_ci{ 2341bf215546Sopenharmony_ci Instruction *rdsv = i->getSrc(0)->getUniqueInsn(); 2342bf215546Sopenharmony_ci if (rdsv->op != OP_RDSV || 2343bf215546Sopenharmony_ci rdsv->getSrc(0)->asSym()->reg.data.sv.sv != SV_COMBINED_TID) 2344bf215546Sopenharmony_ci return; 2345bf215546Sopenharmony_ci // Avoid creating more RDSV instructions 2346bf215546Sopenharmony_ci if (rdsv->getDef(0)->refCount() > 1) 2347bf215546Sopenharmony_ci return; 2348bf215546Sopenharmony_ci 2349bf215546Sopenharmony_ci ImmediateValue imm; 2350bf215546Sopenharmony_ci if (!i->src(1).getImmediate(imm)) 2351bf215546Sopenharmony_ci return; 2352bf215546Sopenharmony_ci 2353bf215546Sopenharmony_ci int index; 2354bf215546Sopenharmony_ci if (imm.isInteger(0x1000)) 2355bf215546Sopenharmony_ci index = 0; 2356bf215546Sopenharmony_ci else 2357bf215546Sopenharmony_ci if (imm.isInteger(0x0a10)) 2358bf215546Sopenharmony_ci index = 1; 2359bf215546Sopenharmony_ci else 2360bf215546Sopenharmony_ci if (imm.isInteger(0x061a)) 2361bf215546Sopenharmony_ci index = 2; 2362bf215546Sopenharmony_ci else 2363bf215546Sopenharmony_ci return; 2364bf215546Sopenharmony_ci 2365bf215546Sopenharmony_ci bld.setPosition(i, false); 2366bf215546Sopenharmony_ci 2367bf215546Sopenharmony_ci i->op = OP_RDSV; 2368bf215546Sopenharmony_ci i->setSrc(0, bld.mkSysVal(SV_TID, index)); 2369bf215546Sopenharmony_ci i->setSrc(1, NULL); 2370bf215546Sopenharmony_ci} 2371bf215546Sopenharmony_ci 2372bf215546Sopenharmony_cibool 2373bf215546Sopenharmony_ciAlgebraicOpt::visit(BasicBlock *bb) 2374bf215546Sopenharmony_ci{ 2375bf215546Sopenharmony_ci Instruction *next; 2376bf215546Sopenharmony_ci for (Instruction *i = bb->getEntry(); i; i = next) { 2377bf215546Sopenharmony_ci next = i->next; 2378bf215546Sopenharmony_ci switch (i->op) { 2379bf215546Sopenharmony_ci case OP_ABS: 2380bf215546Sopenharmony_ci handleABS(i); 2381bf215546Sopenharmony_ci break; 2382bf215546Sopenharmony_ci case OP_ADD: 2383bf215546Sopenharmony_ci handleADD(i); 2384bf215546Sopenharmony_ci break; 2385bf215546Sopenharmony_ci case OP_RCP: 2386bf215546Sopenharmony_ci handleRCP(i); 2387bf215546Sopenharmony_ci break; 2388bf215546Sopenharmony_ci case OP_MIN: 2389bf215546Sopenharmony_ci case OP_MAX: 2390bf215546Sopenharmony_ci handleMINMAX(i); 2391bf215546Sopenharmony_ci break; 2392bf215546Sopenharmony_ci case OP_SLCT: 2393bf215546Sopenharmony_ci handleSLCT(i); 2394bf215546Sopenharmony_ci break; 2395bf215546Sopenharmony_ci case OP_AND: 2396bf215546Sopenharmony_ci case OP_OR: 2397bf215546Sopenharmony_ci case OP_XOR: 2398bf215546Sopenharmony_ci handleLOGOP(i); 2399bf215546Sopenharmony_ci break; 2400bf215546Sopenharmony_ci case OP_CVT: 2401bf215546Sopenharmony_ci handleCVT_NEG(i); 2402bf215546Sopenharmony_ci handleCVT_CVT(i); 2403bf215546Sopenharmony_ci if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32)) 2404bf215546Sopenharmony_ci handleCVT_EXTBF(i); 2405bf215546Sopenharmony_ci break; 2406bf215546Sopenharmony_ci case OP_SUCLAMP: 2407bf215546Sopenharmony_ci handleSUCLAMP(i); 2408bf215546Sopenharmony_ci break; 2409bf215546Sopenharmony_ci case OP_NEG: 2410bf215546Sopenharmony_ci handleNEG(i); 2411bf215546Sopenharmony_ci break; 2412bf215546Sopenharmony_ci case OP_EXTBF: 2413bf215546Sopenharmony_ci handleEXTBF_RDSV(i); 2414bf215546Sopenharmony_ci break; 2415bf215546Sopenharmony_ci default: 2416bf215546Sopenharmony_ci break; 2417bf215546Sopenharmony_ci } 2418bf215546Sopenharmony_ci } 2419bf215546Sopenharmony_ci 2420bf215546Sopenharmony_ci return true; 2421bf215546Sopenharmony_ci} 2422bf215546Sopenharmony_ci 2423bf215546Sopenharmony_ci// ============================================================================= 2424bf215546Sopenharmony_ci 2425bf215546Sopenharmony_ci// ADD(SHL(a, b), c) -> SHLADD(a, b, c) 2426bf215546Sopenharmony_ci// MUL(a, b) -> a few XMADs 2427bf215546Sopenharmony_ci// MAD/FMA(a, b, c) -> a few XMADs 2428bf215546Sopenharmony_ciclass LateAlgebraicOpt : public Pass 2429bf215546Sopenharmony_ci{ 2430bf215546Sopenharmony_ciprivate: 2431bf215546Sopenharmony_ci virtual bool visit(Instruction *); 2432bf215546Sopenharmony_ci 2433bf215546Sopenharmony_ci void handleADD(Instruction *); 2434bf215546Sopenharmony_ci void handleMULMAD(Instruction *); 2435bf215546Sopenharmony_ci bool tryADDToSHLADD(Instruction *); 2436bf215546Sopenharmony_ci 2437bf215546Sopenharmony_ci BuildUtil bld; 2438bf215546Sopenharmony_ci}; 2439bf215546Sopenharmony_ci 2440bf215546Sopenharmony_civoid 2441bf215546Sopenharmony_ciLateAlgebraicOpt::handleADD(Instruction *add) 2442bf215546Sopenharmony_ci{ 2443bf215546Sopenharmony_ci Value *src0 = add->getSrc(0); 2444bf215546Sopenharmony_ci Value *src1 = add->getSrc(1); 2445bf215546Sopenharmony_ci 2446bf215546Sopenharmony_ci if (src0->reg.file != FILE_GPR || src1->reg.file != FILE_GPR) 2447bf215546Sopenharmony_ci return; 2448bf215546Sopenharmony_ci 2449bf215546Sopenharmony_ci if (prog->getTarget()->isOpSupported(OP_SHLADD, add->dType)) 2450bf215546Sopenharmony_ci tryADDToSHLADD(add); 2451bf215546Sopenharmony_ci} 2452bf215546Sopenharmony_ci 2453bf215546Sopenharmony_ci// ADD(SHL(a, b), c) -> SHLADD(a, b, c) 2454bf215546Sopenharmony_cibool 2455bf215546Sopenharmony_ciLateAlgebraicOpt::tryADDToSHLADD(Instruction *add) 2456bf215546Sopenharmony_ci{ 2457bf215546Sopenharmony_ci Value *src0 = add->getSrc(0); 2458bf215546Sopenharmony_ci Value *src1 = add->getSrc(1); 2459bf215546Sopenharmony_ci ImmediateValue imm; 2460bf215546Sopenharmony_ci Instruction *shl; 2461bf215546Sopenharmony_ci Value *src; 2462bf215546Sopenharmony_ci int s; 2463bf215546Sopenharmony_ci 2464bf215546Sopenharmony_ci if (add->saturate || add->usesFlags() || typeSizeof(add->dType) == 8 2465bf215546Sopenharmony_ci || isFloatType(add->dType)) 2466bf215546Sopenharmony_ci return false; 2467bf215546Sopenharmony_ci 2468bf215546Sopenharmony_ci if (src0->getUniqueInsn() && src0->getUniqueInsn()->op == OP_SHL) 2469bf215546Sopenharmony_ci s = 0; 2470bf215546Sopenharmony_ci else 2471bf215546Sopenharmony_ci if (src1->getUniqueInsn() && src1->getUniqueInsn()->op == OP_SHL) 2472bf215546Sopenharmony_ci s = 1; 2473bf215546Sopenharmony_ci else 2474bf215546Sopenharmony_ci return false; 2475bf215546Sopenharmony_ci 2476bf215546Sopenharmony_ci src = add->getSrc(s); 2477bf215546Sopenharmony_ci shl = src->getUniqueInsn(); 2478bf215546Sopenharmony_ci 2479bf215546Sopenharmony_ci if (shl->bb != add->bb || shl->usesFlags() || shl->subOp || shl->src(0).mod) 2480bf215546Sopenharmony_ci return false; 2481bf215546Sopenharmony_ci 2482bf215546Sopenharmony_ci if (!shl->src(1).getImmediate(imm)) 2483bf215546Sopenharmony_ci return false; 2484bf215546Sopenharmony_ci 2485bf215546Sopenharmony_ci add->op = OP_SHLADD; 2486bf215546Sopenharmony_ci add->setSrc(2, add->src(!s)); 2487bf215546Sopenharmony_ci // SHL can't have any modifiers, but the ADD source may have had 2488bf215546Sopenharmony_ci // one. Preserve it. 2489bf215546Sopenharmony_ci add->setSrc(0, shl->getSrc(0)); 2490bf215546Sopenharmony_ci if (s == 1) 2491bf215546Sopenharmony_ci add->src(0).mod = add->src(1).mod; 2492bf215546Sopenharmony_ci add->setSrc(1, new_ImmediateValue(shl->bb->getProgram(), imm.reg.data.u32)); 2493bf215546Sopenharmony_ci add->src(1).mod = Modifier(0); 2494bf215546Sopenharmony_ci 2495bf215546Sopenharmony_ci return true; 2496bf215546Sopenharmony_ci} 2497bf215546Sopenharmony_ci 2498bf215546Sopenharmony_ci// MUL(a, b) -> a few XMADs 2499bf215546Sopenharmony_ci// MAD/FMA(a, b, c) -> a few XMADs 2500bf215546Sopenharmony_civoid 2501bf215546Sopenharmony_ciLateAlgebraicOpt::handleMULMAD(Instruction *i) 2502bf215546Sopenharmony_ci{ 2503bf215546Sopenharmony_ci // TODO: handle NV50_IR_SUBOP_MUL_HIGH 2504bf215546Sopenharmony_ci if (!prog->getTarget()->isOpSupported(OP_XMAD, TYPE_U32)) 2505bf215546Sopenharmony_ci return; 2506bf215546Sopenharmony_ci if (isFloatType(i->dType) || typeSizeof(i->dType) != 4) 2507bf215546Sopenharmony_ci return; 2508bf215546Sopenharmony_ci if (i->subOp || i->usesFlags() || i->flagsDef >= 0) 2509bf215546Sopenharmony_ci return; 2510bf215546Sopenharmony_ci 2511bf215546Sopenharmony_ci assert(!i->src(0).mod); 2512bf215546Sopenharmony_ci assert(!i->src(1).mod); 2513bf215546Sopenharmony_ci assert(i->op == OP_MUL ? 1 : !i->src(2).mod); 2514bf215546Sopenharmony_ci 2515bf215546Sopenharmony_ci bld.setPosition(i, false); 2516bf215546Sopenharmony_ci 2517bf215546Sopenharmony_ci Value *a = i->getSrc(0); 2518bf215546Sopenharmony_ci Value *b = i->getSrc(1); 2519bf215546Sopenharmony_ci Value *c = i->op == OP_MUL ? bld.mkImm(0) : i->getSrc(2); 2520bf215546Sopenharmony_ci 2521bf215546Sopenharmony_ci Value *tmp0 = bld.getSSA(); 2522bf215546Sopenharmony_ci Value *tmp1 = bld.getSSA(); 2523bf215546Sopenharmony_ci 2524bf215546Sopenharmony_ci Instruction *insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp0, b, a, c); 2525bf215546Sopenharmony_ci insn->setPredicate(i->cc, i->getPredicate()); 2526bf215546Sopenharmony_ci 2527bf215546Sopenharmony_ci insn = bld.mkOp3(OP_XMAD, TYPE_U32, tmp1, b, a, bld.mkImm(0)); 2528bf215546Sopenharmony_ci insn->setPredicate(i->cc, i->getPredicate()); 2529bf215546Sopenharmony_ci insn->subOp = NV50_IR_SUBOP_XMAD_MRG | NV50_IR_SUBOP_XMAD_H1(1); 2530bf215546Sopenharmony_ci 2531bf215546Sopenharmony_ci Value *pred = i->getPredicate(); 2532bf215546Sopenharmony_ci i->setPredicate(i->cc, NULL); 2533bf215546Sopenharmony_ci 2534bf215546Sopenharmony_ci i->op = OP_XMAD; 2535bf215546Sopenharmony_ci i->setSrc(0, b); 2536bf215546Sopenharmony_ci i->setSrc(1, tmp1); 2537bf215546Sopenharmony_ci i->setSrc(2, tmp0); 2538bf215546Sopenharmony_ci i->subOp = NV50_IR_SUBOP_XMAD_PSL | NV50_IR_SUBOP_XMAD_CBCC; 2539bf215546Sopenharmony_ci i->subOp |= NV50_IR_SUBOP_XMAD_H1(0) | NV50_IR_SUBOP_XMAD_H1(1); 2540bf215546Sopenharmony_ci 2541bf215546Sopenharmony_ci i->setPredicate(i->cc, pred); 2542bf215546Sopenharmony_ci} 2543bf215546Sopenharmony_ci 2544bf215546Sopenharmony_cibool 2545bf215546Sopenharmony_ciLateAlgebraicOpt::visit(Instruction *i) 2546bf215546Sopenharmony_ci{ 2547bf215546Sopenharmony_ci switch (i->op) { 2548bf215546Sopenharmony_ci case OP_ADD: 2549bf215546Sopenharmony_ci handleADD(i); 2550bf215546Sopenharmony_ci break; 2551bf215546Sopenharmony_ci case OP_MUL: 2552bf215546Sopenharmony_ci case OP_MAD: 2553bf215546Sopenharmony_ci case OP_FMA: 2554bf215546Sopenharmony_ci handleMULMAD(i); 2555bf215546Sopenharmony_ci break; 2556bf215546Sopenharmony_ci default: 2557bf215546Sopenharmony_ci break; 2558bf215546Sopenharmony_ci } 2559bf215546Sopenharmony_ci 2560bf215546Sopenharmony_ci return true; 2561bf215546Sopenharmony_ci} 2562bf215546Sopenharmony_ci 2563bf215546Sopenharmony_ci// ============================================================================= 2564bf215546Sopenharmony_ci 2565bf215546Sopenharmony_ci// Split 64-bit MUL and MAD 2566bf215546Sopenharmony_ciclass Split64BitOpPreRA : public Pass 2567bf215546Sopenharmony_ci{ 2568bf215546Sopenharmony_ciprivate: 2569bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 2570bf215546Sopenharmony_ci void split64MulMad(Function *, Instruction *, DataType); 2571bf215546Sopenharmony_ci 2572bf215546Sopenharmony_ci BuildUtil bld; 2573bf215546Sopenharmony_ci}; 2574bf215546Sopenharmony_ci 2575bf215546Sopenharmony_cibool 2576bf215546Sopenharmony_ciSplit64BitOpPreRA::visit(BasicBlock *bb) 2577bf215546Sopenharmony_ci{ 2578bf215546Sopenharmony_ci Instruction *i, *next; 2579bf215546Sopenharmony_ci Modifier mod; 2580bf215546Sopenharmony_ci 2581bf215546Sopenharmony_ci for (i = bb->getEntry(); i; i = next) { 2582bf215546Sopenharmony_ci next = i->next; 2583bf215546Sopenharmony_ci 2584bf215546Sopenharmony_ci DataType hTy; 2585bf215546Sopenharmony_ci switch (i->dType) { 2586bf215546Sopenharmony_ci case TYPE_U64: hTy = TYPE_U32; break; 2587bf215546Sopenharmony_ci case TYPE_S64: hTy = TYPE_S32; break; 2588bf215546Sopenharmony_ci default: 2589bf215546Sopenharmony_ci continue; 2590bf215546Sopenharmony_ci } 2591bf215546Sopenharmony_ci 2592bf215546Sopenharmony_ci if (i->op == OP_MAD || i->op == OP_MUL) 2593bf215546Sopenharmony_ci split64MulMad(func, i, hTy); 2594bf215546Sopenharmony_ci } 2595bf215546Sopenharmony_ci 2596bf215546Sopenharmony_ci return true; 2597bf215546Sopenharmony_ci} 2598bf215546Sopenharmony_ci 2599bf215546Sopenharmony_civoid 2600bf215546Sopenharmony_ciSplit64BitOpPreRA::split64MulMad(Function *fn, Instruction *i, DataType hTy) 2601bf215546Sopenharmony_ci{ 2602bf215546Sopenharmony_ci assert(i->op == OP_MAD || i->op == OP_MUL); 2603bf215546Sopenharmony_ci assert(!isFloatType(i->dType) && !isFloatType(i->sType)); 2604bf215546Sopenharmony_ci assert(typeSizeof(hTy) == 4); 2605bf215546Sopenharmony_ci 2606bf215546Sopenharmony_ci bld.setPosition(i, true); 2607bf215546Sopenharmony_ci 2608bf215546Sopenharmony_ci Value *zero = bld.mkImm(0u); 2609bf215546Sopenharmony_ci Value *carry = bld.getSSA(1, FILE_FLAGS); 2610bf215546Sopenharmony_ci 2611bf215546Sopenharmony_ci // We want to compute `d = a * b (+ c)?`, where a, b, c and d are 64-bit 2612bf215546Sopenharmony_ci // values (a, b and c might be 32-bit values), using 32-bit operations. This 2613bf215546Sopenharmony_ci // gives the following operations: 2614bf215546Sopenharmony_ci // * `d.low = low(a.low * b.low) (+ c.low)?` 2615bf215546Sopenharmony_ci // * `d.high = low(a.high * b.low) + low(a.low * b.high) 2616bf215546Sopenharmony_ci // + high(a.low * b.low) (+ c.high)?` 2617bf215546Sopenharmony_ci // 2618bf215546Sopenharmony_ci // To compute the high bits, we can split in the following operations: 2619bf215546Sopenharmony_ci // * `tmp1 = low(a.high * b.low) (+ c.high)?` 2620bf215546Sopenharmony_ci // * `tmp2 = low(a.low * b.high) + tmp1` 2621bf215546Sopenharmony_ci // * `d.high = high(a.low * b.low) + tmp2` 2622bf215546Sopenharmony_ci // 2623bf215546Sopenharmony_ci // mkSplit put lower bits at index 0 and higher bits at index 1 2624bf215546Sopenharmony_ci 2625bf215546Sopenharmony_ci Value *op1[2]; 2626bf215546Sopenharmony_ci if (i->getSrc(0)->reg.size == 8) 2627bf215546Sopenharmony_ci bld.mkSplit(op1, 4, i->getSrc(0)); 2628bf215546Sopenharmony_ci else { 2629bf215546Sopenharmony_ci op1[0] = i->getSrc(0); 2630bf215546Sopenharmony_ci op1[1] = zero; 2631bf215546Sopenharmony_ci } 2632bf215546Sopenharmony_ci Value *op2[2]; 2633bf215546Sopenharmony_ci if (i->getSrc(1)->reg.size == 8) 2634bf215546Sopenharmony_ci bld.mkSplit(op2, 4, i->getSrc(1)); 2635bf215546Sopenharmony_ci else { 2636bf215546Sopenharmony_ci op2[0] = i->getSrc(1); 2637bf215546Sopenharmony_ci op2[1] = zero; 2638bf215546Sopenharmony_ci } 2639bf215546Sopenharmony_ci 2640bf215546Sopenharmony_ci Value *op3[2] = { NULL, NULL }; 2641bf215546Sopenharmony_ci if (i->op == OP_MAD) { 2642bf215546Sopenharmony_ci if (i->getSrc(2)->reg.size == 8) 2643bf215546Sopenharmony_ci bld.mkSplit(op3, 4, i->getSrc(2)); 2644bf215546Sopenharmony_ci else { 2645bf215546Sopenharmony_ci op3[0] = i->getSrc(2); 2646bf215546Sopenharmony_ci op3[1] = zero; 2647bf215546Sopenharmony_ci } 2648bf215546Sopenharmony_ci } 2649bf215546Sopenharmony_ci 2650bf215546Sopenharmony_ci Value *tmpRes1Hi = bld.getSSA(); 2651bf215546Sopenharmony_ci if (i->op == OP_MAD) 2652bf215546Sopenharmony_ci bld.mkOp3(OP_MAD, hTy, tmpRes1Hi, op1[1], op2[0], op3[1]); 2653bf215546Sopenharmony_ci else 2654bf215546Sopenharmony_ci bld.mkOp2(OP_MUL, hTy, tmpRes1Hi, op1[1], op2[0]); 2655bf215546Sopenharmony_ci 2656bf215546Sopenharmony_ci Value *tmpRes2Hi = bld.mkOp3v(OP_MAD, hTy, bld.getSSA(), op1[0], op2[1], tmpRes1Hi); 2657bf215546Sopenharmony_ci 2658bf215546Sopenharmony_ci Value *def[2] = { bld.getSSA(), bld.getSSA() }; 2659bf215546Sopenharmony_ci 2660bf215546Sopenharmony_ci // If it was a MAD, add the carry from the low bits 2661bf215546Sopenharmony_ci // It is not needed if it was a MUL, since we added high(a.low * b.low) to 2662bf215546Sopenharmony_ci // d.high 2663bf215546Sopenharmony_ci if (i->op == OP_MAD) 2664bf215546Sopenharmony_ci bld.mkOp3(OP_MAD, hTy, def[0], op1[0], op2[0], op3[0])->setFlagsDef(1, carry); 2665bf215546Sopenharmony_ci else 2666bf215546Sopenharmony_ci bld.mkOp2(OP_MUL, hTy, def[0], op1[0], op2[0]); 2667bf215546Sopenharmony_ci 2668bf215546Sopenharmony_ci Instruction *hiPart3 = bld.mkOp3(OP_MAD, hTy, def[1], op1[0], op2[0], tmpRes2Hi); 2669bf215546Sopenharmony_ci hiPart3->subOp = NV50_IR_SUBOP_MUL_HIGH; 2670bf215546Sopenharmony_ci if (i->op == OP_MAD) 2671bf215546Sopenharmony_ci hiPart3->setFlagsSrc(3, carry); 2672bf215546Sopenharmony_ci 2673bf215546Sopenharmony_ci bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]); 2674bf215546Sopenharmony_ci 2675bf215546Sopenharmony_ci delete_Instruction(fn->getProgram(), i); 2676bf215546Sopenharmony_ci} 2677bf215546Sopenharmony_ci 2678bf215546Sopenharmony_ci// ============================================================================= 2679bf215546Sopenharmony_ci 2680bf215546Sopenharmony_cistatic inline void 2681bf215546Sopenharmony_ciupdateLdStOffset(Instruction *ldst, int32_t offset, Function *fn) 2682bf215546Sopenharmony_ci{ 2683bf215546Sopenharmony_ci if (offset != ldst->getSrc(0)->reg.data.offset) { 2684bf215546Sopenharmony_ci if (ldst->getSrc(0)->refCount() > 1) 2685bf215546Sopenharmony_ci ldst->setSrc(0, cloneShallow(fn, ldst->getSrc(0))); 2686bf215546Sopenharmony_ci ldst->getSrc(0)->reg.data.offset = offset; 2687bf215546Sopenharmony_ci } 2688bf215546Sopenharmony_ci} 2689bf215546Sopenharmony_ci 2690bf215546Sopenharmony_ci// Combine loads and stores, forward stores to loads where possible. 2691bf215546Sopenharmony_ciclass MemoryOpt : public Pass 2692bf215546Sopenharmony_ci{ 2693bf215546Sopenharmony_ciprivate: 2694bf215546Sopenharmony_ci class Record 2695bf215546Sopenharmony_ci { 2696bf215546Sopenharmony_ci public: 2697bf215546Sopenharmony_ci Record *next; 2698bf215546Sopenharmony_ci Instruction *insn; 2699bf215546Sopenharmony_ci const Value *rel[2]; 2700bf215546Sopenharmony_ci const Value *base; 2701bf215546Sopenharmony_ci int32_t offset; 2702bf215546Sopenharmony_ci int8_t fileIndex; 2703bf215546Sopenharmony_ci uint8_t size; 2704bf215546Sopenharmony_ci bool locked; 2705bf215546Sopenharmony_ci Record *prev; 2706bf215546Sopenharmony_ci 2707bf215546Sopenharmony_ci bool overlaps(const Instruction *ldst) const; 2708bf215546Sopenharmony_ci 2709bf215546Sopenharmony_ci inline void link(Record **); 2710bf215546Sopenharmony_ci inline void unlink(Record **); 2711bf215546Sopenharmony_ci inline void set(const Instruction *ldst); 2712bf215546Sopenharmony_ci }; 2713bf215546Sopenharmony_ci 2714bf215546Sopenharmony_cipublic: 2715bf215546Sopenharmony_ci MemoryOpt(); 2716bf215546Sopenharmony_ci 2717bf215546Sopenharmony_ci Record *loads[DATA_FILE_COUNT]; 2718bf215546Sopenharmony_ci Record *stores[DATA_FILE_COUNT]; 2719bf215546Sopenharmony_ci 2720bf215546Sopenharmony_ci MemoryPool recordPool; 2721bf215546Sopenharmony_ci 2722bf215546Sopenharmony_ciprivate: 2723bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 2724bf215546Sopenharmony_ci bool runOpt(BasicBlock *); 2725bf215546Sopenharmony_ci 2726bf215546Sopenharmony_ci Record **getList(const Instruction *); 2727bf215546Sopenharmony_ci 2728bf215546Sopenharmony_ci Record *findRecord(const Instruction *, bool load, bool& isAdjacent) const; 2729bf215546Sopenharmony_ci 2730bf215546Sopenharmony_ci // merge @insn into load/store instruction from @rec 2731bf215546Sopenharmony_ci bool combineLd(Record *rec, Instruction *ld); 2732bf215546Sopenharmony_ci bool combineSt(Record *rec, Instruction *st); 2733bf215546Sopenharmony_ci 2734bf215546Sopenharmony_ci bool replaceLdFromLd(Instruction *ld, Record *ldRec); 2735bf215546Sopenharmony_ci bool replaceLdFromSt(Instruction *ld, Record *stRec); 2736bf215546Sopenharmony_ci bool replaceStFromSt(Instruction *restrict st, Record *stRec); 2737bf215546Sopenharmony_ci 2738bf215546Sopenharmony_ci void addRecord(Instruction *ldst); 2739bf215546Sopenharmony_ci void purgeRecords(Instruction *const st, DataFile); 2740bf215546Sopenharmony_ci void lockStores(Instruction *const ld); 2741bf215546Sopenharmony_ci void reset(); 2742bf215546Sopenharmony_ci 2743bf215546Sopenharmony_ciprivate: 2744bf215546Sopenharmony_ci Record *prevRecord; 2745bf215546Sopenharmony_ci}; 2746bf215546Sopenharmony_ci 2747bf215546Sopenharmony_ciMemoryOpt::MemoryOpt() : recordPool(sizeof(MemoryOpt::Record), 6) 2748bf215546Sopenharmony_ci{ 2749bf215546Sopenharmony_ci for (int i = 0; i < DATA_FILE_COUNT; ++i) { 2750bf215546Sopenharmony_ci loads[i] = NULL; 2751bf215546Sopenharmony_ci stores[i] = NULL; 2752bf215546Sopenharmony_ci } 2753bf215546Sopenharmony_ci prevRecord = NULL; 2754bf215546Sopenharmony_ci} 2755bf215546Sopenharmony_ci 2756bf215546Sopenharmony_civoid 2757bf215546Sopenharmony_ciMemoryOpt::reset() 2758bf215546Sopenharmony_ci{ 2759bf215546Sopenharmony_ci for (unsigned int i = 0; i < DATA_FILE_COUNT; ++i) { 2760bf215546Sopenharmony_ci Record *it, *next; 2761bf215546Sopenharmony_ci for (it = loads[i]; it; it = next) { 2762bf215546Sopenharmony_ci next = it->next; 2763bf215546Sopenharmony_ci recordPool.release(it); 2764bf215546Sopenharmony_ci } 2765bf215546Sopenharmony_ci loads[i] = NULL; 2766bf215546Sopenharmony_ci for (it = stores[i]; it; it = next) { 2767bf215546Sopenharmony_ci next = it->next; 2768bf215546Sopenharmony_ci recordPool.release(it); 2769bf215546Sopenharmony_ci } 2770bf215546Sopenharmony_ci stores[i] = NULL; 2771bf215546Sopenharmony_ci } 2772bf215546Sopenharmony_ci} 2773bf215546Sopenharmony_ci 2774bf215546Sopenharmony_cibool 2775bf215546Sopenharmony_ciMemoryOpt::combineLd(Record *rec, Instruction *ld) 2776bf215546Sopenharmony_ci{ 2777bf215546Sopenharmony_ci int32_t offRc = rec->offset; 2778bf215546Sopenharmony_ci int32_t offLd = ld->getSrc(0)->reg.data.offset; 2779bf215546Sopenharmony_ci int sizeRc = rec->size; 2780bf215546Sopenharmony_ci int sizeLd = typeSizeof(ld->dType); 2781bf215546Sopenharmony_ci int size = sizeRc + sizeLd; 2782bf215546Sopenharmony_ci int d, j; 2783bf215546Sopenharmony_ci 2784bf215546Sopenharmony_ci if (!prog->getTarget()-> 2785bf215546Sopenharmony_ci isAccessSupported(ld->getSrc(0)->reg.file, typeOfSize(size))) 2786bf215546Sopenharmony_ci return false; 2787bf215546Sopenharmony_ci // no unaligned loads 2788bf215546Sopenharmony_ci if (((size == 0x8) && (MIN2(offLd, offRc) & 0x7)) || 2789bf215546Sopenharmony_ci ((size == 0xc) && (MIN2(offLd, offRc) & 0xf))) 2790bf215546Sopenharmony_ci return false; 2791bf215546Sopenharmony_ci // for compute indirect loads are not guaranteed to be aligned 2792bf215546Sopenharmony_ci if (prog->getType() == Program::TYPE_COMPUTE && rec->rel[0]) 2793bf215546Sopenharmony_ci return false; 2794bf215546Sopenharmony_ci 2795bf215546Sopenharmony_ci assert(sizeRc + sizeLd <= 16 && offRc != offLd); 2796bf215546Sopenharmony_ci 2797bf215546Sopenharmony_ci // lock any stores that overlap with the load being merged into the 2798bf215546Sopenharmony_ci // existing record. 2799bf215546Sopenharmony_ci lockStores(ld); 2800bf215546Sopenharmony_ci 2801bf215546Sopenharmony_ci for (j = 0; sizeRc; sizeRc -= rec->insn->getDef(j)->reg.size, ++j); 2802bf215546Sopenharmony_ci 2803bf215546Sopenharmony_ci if (offLd < offRc) { 2804bf215546Sopenharmony_ci int sz; 2805bf215546Sopenharmony_ci for (sz = 0, d = 0; sz < sizeLd; sz += ld->getDef(d)->reg.size, ++d); 2806bf215546Sopenharmony_ci // d: nr of definitions in ld 2807bf215546Sopenharmony_ci // j: nr of definitions in rec->insn, move: 2808bf215546Sopenharmony_ci for (d = d + j - 1; j > 0; --j, --d) 2809bf215546Sopenharmony_ci rec->insn->setDef(d, rec->insn->getDef(j - 1)); 2810bf215546Sopenharmony_ci 2811bf215546Sopenharmony_ci if (rec->insn->getSrc(0)->refCount() > 1) 2812bf215546Sopenharmony_ci rec->insn->setSrc(0, cloneShallow(func, rec->insn->getSrc(0))); 2813bf215546Sopenharmony_ci rec->offset = rec->insn->getSrc(0)->reg.data.offset = offLd; 2814bf215546Sopenharmony_ci 2815bf215546Sopenharmony_ci d = 0; 2816bf215546Sopenharmony_ci } else { 2817bf215546Sopenharmony_ci d = j; 2818bf215546Sopenharmony_ci } 2819bf215546Sopenharmony_ci // move definitions of @ld to @rec->insn 2820bf215546Sopenharmony_ci for (j = 0; sizeLd; ++j, ++d) { 2821bf215546Sopenharmony_ci sizeLd -= ld->getDef(j)->reg.size; 2822bf215546Sopenharmony_ci rec->insn->setDef(d, ld->getDef(j)); 2823bf215546Sopenharmony_ci } 2824bf215546Sopenharmony_ci 2825bf215546Sopenharmony_ci rec->size = size; 2826bf215546Sopenharmony_ci rec->insn->getSrc(0)->reg.size = size; 2827bf215546Sopenharmony_ci rec->insn->setType(typeOfSize(size)); 2828bf215546Sopenharmony_ci 2829bf215546Sopenharmony_ci delete_Instruction(prog, ld); 2830bf215546Sopenharmony_ci 2831bf215546Sopenharmony_ci return true; 2832bf215546Sopenharmony_ci} 2833bf215546Sopenharmony_ci 2834bf215546Sopenharmony_cibool 2835bf215546Sopenharmony_ciMemoryOpt::combineSt(Record *rec, Instruction *st) 2836bf215546Sopenharmony_ci{ 2837bf215546Sopenharmony_ci int32_t offRc = rec->offset; 2838bf215546Sopenharmony_ci int32_t offSt = st->getSrc(0)->reg.data.offset; 2839bf215546Sopenharmony_ci int sizeRc = rec->size; 2840bf215546Sopenharmony_ci int sizeSt = typeSizeof(st->dType); 2841bf215546Sopenharmony_ci int s = sizeSt / 4; 2842bf215546Sopenharmony_ci int size = sizeRc + sizeSt; 2843bf215546Sopenharmony_ci int j, k; 2844bf215546Sopenharmony_ci Value *src[4]; // no modifiers in ValueRef allowed for st 2845bf215546Sopenharmony_ci Value *extra[3]; 2846bf215546Sopenharmony_ci 2847bf215546Sopenharmony_ci if (!prog->getTarget()-> 2848bf215546Sopenharmony_ci isAccessSupported(st->getSrc(0)->reg.file, typeOfSize(size))) 2849bf215546Sopenharmony_ci return false; 2850bf215546Sopenharmony_ci // no unaligned stores 2851bf215546Sopenharmony_ci if (size == 8 && MIN2(offRc, offSt) & 0x7) 2852bf215546Sopenharmony_ci return false; 2853bf215546Sopenharmony_ci // for compute indirect stores are not guaranteed to be aligned 2854bf215546Sopenharmony_ci if (prog->getType() == Program::TYPE_COMPUTE && rec->rel[0]) 2855bf215546Sopenharmony_ci return false; 2856bf215546Sopenharmony_ci 2857bf215546Sopenharmony_ci // There's really no great place to put this in a generic manner. Seemingly 2858bf215546Sopenharmony_ci // wide stores at 0x60 don't work in GS shaders on SM50+. Don't combine 2859bf215546Sopenharmony_ci // those. 2860bf215546Sopenharmony_ci if (prog->getTarget()->getChipset() >= NVISA_GM107_CHIPSET && 2861bf215546Sopenharmony_ci prog->getType() == Program::TYPE_GEOMETRY && 2862bf215546Sopenharmony_ci st->getSrc(0)->reg.file == FILE_SHADER_OUTPUT && 2863bf215546Sopenharmony_ci rec->rel[0] == NULL && 2864bf215546Sopenharmony_ci MIN2(offRc, offSt) == 0x60) 2865bf215546Sopenharmony_ci return false; 2866bf215546Sopenharmony_ci 2867bf215546Sopenharmony_ci // remove any existing load/store records for the store being merged into 2868bf215546Sopenharmony_ci // the existing record. 2869bf215546Sopenharmony_ci purgeRecords(st, DATA_FILE_COUNT); 2870bf215546Sopenharmony_ci 2871bf215546Sopenharmony_ci st->takeExtraSources(0, extra); // save predicate and indirect address 2872bf215546Sopenharmony_ci 2873bf215546Sopenharmony_ci if (offRc < offSt) { 2874bf215546Sopenharmony_ci // save values from @st 2875bf215546Sopenharmony_ci for (s = 0; sizeSt; ++s) { 2876bf215546Sopenharmony_ci sizeSt -= st->getSrc(s + 1)->reg.size; 2877bf215546Sopenharmony_ci src[s] = st->getSrc(s + 1); 2878bf215546Sopenharmony_ci } 2879bf215546Sopenharmony_ci // set record's values as low sources of @st 2880bf215546Sopenharmony_ci for (j = 1; sizeRc; ++j) { 2881bf215546Sopenharmony_ci sizeRc -= rec->insn->getSrc(j)->reg.size; 2882bf215546Sopenharmony_ci st->setSrc(j, rec->insn->getSrc(j)); 2883bf215546Sopenharmony_ci } 2884bf215546Sopenharmony_ci // set saved values as high sources of @st 2885bf215546Sopenharmony_ci for (k = j, j = 0; j < s; ++j) 2886bf215546Sopenharmony_ci st->setSrc(k++, src[j]); 2887bf215546Sopenharmony_ci 2888bf215546Sopenharmony_ci updateLdStOffset(st, offRc, func); 2889bf215546Sopenharmony_ci } else { 2890bf215546Sopenharmony_ci for (j = 1; sizeSt; ++j) 2891bf215546Sopenharmony_ci sizeSt -= st->getSrc(j)->reg.size; 2892bf215546Sopenharmony_ci for (s = 1; sizeRc; ++j, ++s) { 2893bf215546Sopenharmony_ci sizeRc -= rec->insn->getSrc(s)->reg.size; 2894bf215546Sopenharmony_ci st->setSrc(j, rec->insn->getSrc(s)); 2895bf215546Sopenharmony_ci } 2896bf215546Sopenharmony_ci rec->offset = offSt; 2897bf215546Sopenharmony_ci } 2898bf215546Sopenharmony_ci st->putExtraSources(0, extra); // restore pointer and predicate 2899bf215546Sopenharmony_ci 2900bf215546Sopenharmony_ci delete_Instruction(prog, rec->insn); 2901bf215546Sopenharmony_ci rec->insn = st; 2902bf215546Sopenharmony_ci rec->size = size; 2903bf215546Sopenharmony_ci rec->insn->getSrc(0)->reg.size = size; 2904bf215546Sopenharmony_ci rec->insn->setType(typeOfSize(size)); 2905bf215546Sopenharmony_ci return true; 2906bf215546Sopenharmony_ci} 2907bf215546Sopenharmony_ci 2908bf215546Sopenharmony_civoid 2909bf215546Sopenharmony_ciMemoryOpt::Record::set(const Instruction *ldst) 2910bf215546Sopenharmony_ci{ 2911bf215546Sopenharmony_ci const Symbol *mem = ldst->getSrc(0)->asSym(); 2912bf215546Sopenharmony_ci fileIndex = mem->reg.fileIndex; 2913bf215546Sopenharmony_ci rel[0] = ldst->getIndirect(0, 0); 2914bf215546Sopenharmony_ci rel[1] = ldst->getIndirect(0, 1); 2915bf215546Sopenharmony_ci offset = mem->reg.data.offset; 2916bf215546Sopenharmony_ci base = mem->getBase(); 2917bf215546Sopenharmony_ci size = typeSizeof(ldst->sType); 2918bf215546Sopenharmony_ci} 2919bf215546Sopenharmony_ci 2920bf215546Sopenharmony_civoid 2921bf215546Sopenharmony_ciMemoryOpt::Record::link(Record **list) 2922bf215546Sopenharmony_ci{ 2923bf215546Sopenharmony_ci next = *list; 2924bf215546Sopenharmony_ci if (next) 2925bf215546Sopenharmony_ci next->prev = this; 2926bf215546Sopenharmony_ci prev = NULL; 2927bf215546Sopenharmony_ci *list = this; 2928bf215546Sopenharmony_ci} 2929bf215546Sopenharmony_ci 2930bf215546Sopenharmony_civoid 2931bf215546Sopenharmony_ciMemoryOpt::Record::unlink(Record **list) 2932bf215546Sopenharmony_ci{ 2933bf215546Sopenharmony_ci if (next) 2934bf215546Sopenharmony_ci next->prev = prev; 2935bf215546Sopenharmony_ci if (prev) 2936bf215546Sopenharmony_ci prev->next = next; 2937bf215546Sopenharmony_ci else 2938bf215546Sopenharmony_ci *list = next; 2939bf215546Sopenharmony_ci} 2940bf215546Sopenharmony_ci 2941bf215546Sopenharmony_ciMemoryOpt::Record ** 2942bf215546Sopenharmony_ciMemoryOpt::getList(const Instruction *insn) 2943bf215546Sopenharmony_ci{ 2944bf215546Sopenharmony_ci if (insn->op == OP_LOAD || insn->op == OP_VFETCH) 2945bf215546Sopenharmony_ci return &loads[insn->src(0).getFile()]; 2946bf215546Sopenharmony_ci return &stores[insn->src(0).getFile()]; 2947bf215546Sopenharmony_ci} 2948bf215546Sopenharmony_ci 2949bf215546Sopenharmony_civoid 2950bf215546Sopenharmony_ciMemoryOpt::addRecord(Instruction *i) 2951bf215546Sopenharmony_ci{ 2952bf215546Sopenharmony_ci Record **list = getList(i); 2953bf215546Sopenharmony_ci Record *it = reinterpret_cast<Record *>(recordPool.allocate()); 2954bf215546Sopenharmony_ci 2955bf215546Sopenharmony_ci it->link(list); 2956bf215546Sopenharmony_ci it->set(i); 2957bf215546Sopenharmony_ci it->insn = i; 2958bf215546Sopenharmony_ci it->locked = false; 2959bf215546Sopenharmony_ci} 2960bf215546Sopenharmony_ci 2961bf215546Sopenharmony_ciMemoryOpt::Record * 2962bf215546Sopenharmony_ciMemoryOpt::findRecord(const Instruction *insn, bool load, bool& isAdj) const 2963bf215546Sopenharmony_ci{ 2964bf215546Sopenharmony_ci const Symbol *sym = insn->getSrc(0)->asSym(); 2965bf215546Sopenharmony_ci const int size = typeSizeof(insn->sType); 2966bf215546Sopenharmony_ci Record *rec = NULL; 2967bf215546Sopenharmony_ci Record *it = load ? loads[sym->reg.file] : stores[sym->reg.file]; 2968bf215546Sopenharmony_ci 2969bf215546Sopenharmony_ci for (; it; it = it->next) { 2970bf215546Sopenharmony_ci if (it->locked && insn->op != OP_LOAD && insn->op != OP_VFETCH) 2971bf215546Sopenharmony_ci continue; 2972bf215546Sopenharmony_ci if ((it->offset >> 4) != (sym->reg.data.offset >> 4) || 2973bf215546Sopenharmony_ci it->rel[0] != insn->getIndirect(0, 0) || 2974bf215546Sopenharmony_ci it->fileIndex != sym->reg.fileIndex || 2975bf215546Sopenharmony_ci it->rel[1] != insn->getIndirect(0, 1)) 2976bf215546Sopenharmony_ci continue; 2977bf215546Sopenharmony_ci 2978bf215546Sopenharmony_ci if (it->offset < sym->reg.data.offset) { 2979bf215546Sopenharmony_ci if (it->offset + it->size >= sym->reg.data.offset) { 2980bf215546Sopenharmony_ci isAdj = (it->offset + it->size == sym->reg.data.offset); 2981bf215546Sopenharmony_ci if (!isAdj) 2982bf215546Sopenharmony_ci return it; 2983bf215546Sopenharmony_ci if (!(it->offset & 0x7)) 2984bf215546Sopenharmony_ci rec = it; 2985bf215546Sopenharmony_ci } 2986bf215546Sopenharmony_ci } else { 2987bf215546Sopenharmony_ci isAdj = it->offset != sym->reg.data.offset; 2988bf215546Sopenharmony_ci if (size <= it->size && !isAdj) 2989bf215546Sopenharmony_ci return it; 2990bf215546Sopenharmony_ci else 2991bf215546Sopenharmony_ci if (!(sym->reg.data.offset & 0x7)) 2992bf215546Sopenharmony_ci if (it->offset - size <= sym->reg.data.offset) 2993bf215546Sopenharmony_ci rec = it; 2994bf215546Sopenharmony_ci } 2995bf215546Sopenharmony_ci } 2996bf215546Sopenharmony_ci return rec; 2997bf215546Sopenharmony_ci} 2998bf215546Sopenharmony_ci 2999bf215546Sopenharmony_cibool 3000bf215546Sopenharmony_ciMemoryOpt::replaceLdFromSt(Instruction *ld, Record *rec) 3001bf215546Sopenharmony_ci{ 3002bf215546Sopenharmony_ci Instruction *st = rec->insn; 3003bf215546Sopenharmony_ci int32_t offSt = rec->offset; 3004bf215546Sopenharmony_ci int32_t offLd = ld->getSrc(0)->reg.data.offset; 3005bf215546Sopenharmony_ci int d, s; 3006bf215546Sopenharmony_ci 3007bf215546Sopenharmony_ci for (s = 1; offSt != offLd && st->srcExists(s); ++s) 3008bf215546Sopenharmony_ci offSt += st->getSrc(s)->reg.size; 3009bf215546Sopenharmony_ci if (offSt != offLd) 3010bf215546Sopenharmony_ci return false; 3011bf215546Sopenharmony_ci 3012bf215546Sopenharmony_ci for (d = 0; ld->defExists(d) && st->srcExists(s); ++d, ++s) { 3013bf215546Sopenharmony_ci if (ld->getDef(d)->reg.size != st->getSrc(s)->reg.size) 3014bf215546Sopenharmony_ci return false; 3015bf215546Sopenharmony_ci if (st->getSrc(s)->reg.file != FILE_GPR) 3016bf215546Sopenharmony_ci return false; 3017bf215546Sopenharmony_ci ld->def(d).replace(st->src(s), false); 3018bf215546Sopenharmony_ci } 3019bf215546Sopenharmony_ci ld->bb->remove(ld); 3020bf215546Sopenharmony_ci return true; 3021bf215546Sopenharmony_ci} 3022bf215546Sopenharmony_ci 3023bf215546Sopenharmony_cibool 3024bf215546Sopenharmony_ciMemoryOpt::replaceLdFromLd(Instruction *ldE, Record *rec) 3025bf215546Sopenharmony_ci{ 3026bf215546Sopenharmony_ci Instruction *ldR = rec->insn; 3027bf215546Sopenharmony_ci int32_t offR = rec->offset; 3028bf215546Sopenharmony_ci int32_t offE = ldE->getSrc(0)->reg.data.offset; 3029bf215546Sopenharmony_ci int dR, dE; 3030bf215546Sopenharmony_ci 3031bf215546Sopenharmony_ci assert(offR <= offE); 3032bf215546Sopenharmony_ci for (dR = 0; offR < offE && ldR->defExists(dR); ++dR) 3033bf215546Sopenharmony_ci offR += ldR->getDef(dR)->reg.size; 3034bf215546Sopenharmony_ci if (offR != offE) 3035bf215546Sopenharmony_ci return false; 3036bf215546Sopenharmony_ci 3037bf215546Sopenharmony_ci for (dE = 0; ldE->defExists(dE) && ldR->defExists(dR); ++dE, ++dR) { 3038bf215546Sopenharmony_ci if (ldE->getDef(dE)->reg.size != ldR->getDef(dR)->reg.size) 3039bf215546Sopenharmony_ci return false; 3040bf215546Sopenharmony_ci ldE->def(dE).replace(ldR->getDef(dR), false); 3041bf215546Sopenharmony_ci } 3042bf215546Sopenharmony_ci 3043bf215546Sopenharmony_ci delete_Instruction(prog, ldE); 3044bf215546Sopenharmony_ci return true; 3045bf215546Sopenharmony_ci} 3046bf215546Sopenharmony_ci 3047bf215546Sopenharmony_cibool 3048bf215546Sopenharmony_ciMemoryOpt::replaceStFromSt(Instruction *restrict st, Record *rec) 3049bf215546Sopenharmony_ci{ 3050bf215546Sopenharmony_ci const Instruction *const ri = rec->insn; 3051bf215546Sopenharmony_ci Value *extra[3]; 3052bf215546Sopenharmony_ci 3053bf215546Sopenharmony_ci int32_t offS = st->getSrc(0)->reg.data.offset; 3054bf215546Sopenharmony_ci int32_t offR = rec->offset; 3055bf215546Sopenharmony_ci int32_t endS = offS + typeSizeof(st->dType); 3056bf215546Sopenharmony_ci int32_t endR = offR + typeSizeof(ri->dType); 3057bf215546Sopenharmony_ci 3058bf215546Sopenharmony_ci rec->size = MAX2(endS, endR) - MIN2(offS, offR); 3059bf215546Sopenharmony_ci 3060bf215546Sopenharmony_ci st->takeExtraSources(0, extra); 3061bf215546Sopenharmony_ci 3062bf215546Sopenharmony_ci if (offR < offS) { 3063bf215546Sopenharmony_ci Value *vals[10]; 3064bf215546Sopenharmony_ci int s, n; 3065bf215546Sopenharmony_ci int k = 0; 3066bf215546Sopenharmony_ci // get non-replaced sources of ri 3067bf215546Sopenharmony_ci for (s = 1; offR < offS; offR += ri->getSrc(s)->reg.size, ++s) 3068bf215546Sopenharmony_ci vals[k++] = ri->getSrc(s); 3069bf215546Sopenharmony_ci n = s; 3070bf215546Sopenharmony_ci // get replaced sources of st 3071bf215546Sopenharmony_ci for (s = 1; st->srcExists(s); offS += st->getSrc(s)->reg.size, ++s) 3072bf215546Sopenharmony_ci vals[k++] = st->getSrc(s); 3073bf215546Sopenharmony_ci // skip replaced sources of ri 3074bf215546Sopenharmony_ci for (s = n; offR < endS; offR += ri->getSrc(s)->reg.size, ++s); 3075bf215546Sopenharmony_ci // get non-replaced sources after values covered by st 3076bf215546Sopenharmony_ci for (; offR < endR; offR += ri->getSrc(s)->reg.size, ++s) 3077bf215546Sopenharmony_ci vals[k++] = ri->getSrc(s); 3078bf215546Sopenharmony_ci assert((unsigned int)k <= ARRAY_SIZE(vals)); 3079bf215546Sopenharmony_ci for (s = 0; s < k; ++s) 3080bf215546Sopenharmony_ci st->setSrc(s + 1, vals[s]); 3081bf215546Sopenharmony_ci st->setSrc(0, ri->getSrc(0)); 3082bf215546Sopenharmony_ci } else 3083bf215546Sopenharmony_ci if (endR > endS) { 3084bf215546Sopenharmony_ci int j, s; 3085bf215546Sopenharmony_ci for (j = 1; offR < endS; offR += ri->getSrc(j++)->reg.size); 3086bf215546Sopenharmony_ci for (s = 1; offS < endS; offS += st->getSrc(s++)->reg.size); 3087bf215546Sopenharmony_ci for (; offR < endR; offR += ri->getSrc(j++)->reg.size) 3088bf215546Sopenharmony_ci st->setSrc(s++, ri->getSrc(j)); 3089bf215546Sopenharmony_ci } 3090bf215546Sopenharmony_ci st->putExtraSources(0, extra); 3091bf215546Sopenharmony_ci 3092bf215546Sopenharmony_ci delete_Instruction(prog, rec->insn); 3093bf215546Sopenharmony_ci 3094bf215546Sopenharmony_ci rec->insn = st; 3095bf215546Sopenharmony_ci rec->offset = st->getSrc(0)->reg.data.offset; 3096bf215546Sopenharmony_ci 3097bf215546Sopenharmony_ci st->setType(typeOfSize(rec->size)); 3098bf215546Sopenharmony_ci 3099bf215546Sopenharmony_ci return true; 3100bf215546Sopenharmony_ci} 3101bf215546Sopenharmony_ci 3102bf215546Sopenharmony_cibool 3103bf215546Sopenharmony_ciMemoryOpt::Record::overlaps(const Instruction *ldst) const 3104bf215546Sopenharmony_ci{ 3105bf215546Sopenharmony_ci Record that; 3106bf215546Sopenharmony_ci that.set(ldst); 3107bf215546Sopenharmony_ci 3108bf215546Sopenharmony_ci // This assumes that images/buffers can't overlap. They can. 3109bf215546Sopenharmony_ci // TODO: Plumb the restrict logic through, and only skip when it's a 3110bf215546Sopenharmony_ci // restrict situation, or there can implicitly be no writes. 3111bf215546Sopenharmony_ci if (this->fileIndex != that.fileIndex && this->rel[1] == that.rel[1]) 3112bf215546Sopenharmony_ci return false; 3113bf215546Sopenharmony_ci 3114bf215546Sopenharmony_ci if (this->rel[0] || that.rel[0]) 3115bf215546Sopenharmony_ci return this->base == that.base; 3116bf215546Sopenharmony_ci 3117bf215546Sopenharmony_ci return 3118bf215546Sopenharmony_ci (this->offset < that.offset + that.size) && 3119bf215546Sopenharmony_ci (this->offset + this->size > that.offset); 3120bf215546Sopenharmony_ci} 3121bf215546Sopenharmony_ci 3122bf215546Sopenharmony_ci// We must not eliminate stores that affect the result of @ld if 3123bf215546Sopenharmony_ci// we find later stores to the same location, and we may no longer 3124bf215546Sopenharmony_ci// merge them with later stores. 3125bf215546Sopenharmony_ci// The stored value can, however, still be used to determine the value 3126bf215546Sopenharmony_ci// returned by future loads. 3127bf215546Sopenharmony_civoid 3128bf215546Sopenharmony_ciMemoryOpt::lockStores(Instruction *const ld) 3129bf215546Sopenharmony_ci{ 3130bf215546Sopenharmony_ci for (Record *r = stores[ld->src(0).getFile()]; r; r = r->next) 3131bf215546Sopenharmony_ci if (!r->locked && r->overlaps(ld)) 3132bf215546Sopenharmony_ci r->locked = true; 3133bf215546Sopenharmony_ci} 3134bf215546Sopenharmony_ci 3135bf215546Sopenharmony_ci// Prior loads from the location of @st are no longer valid. 3136bf215546Sopenharmony_ci// Stores to the location of @st may no longer be used to derive 3137bf215546Sopenharmony_ci// the value at it nor be coalesced into later stores. 3138bf215546Sopenharmony_civoid 3139bf215546Sopenharmony_ciMemoryOpt::purgeRecords(Instruction *const st, DataFile f) 3140bf215546Sopenharmony_ci{ 3141bf215546Sopenharmony_ci if (st) 3142bf215546Sopenharmony_ci f = st->src(0).getFile(); 3143bf215546Sopenharmony_ci 3144bf215546Sopenharmony_ci for (Record *r = loads[f]; r; r = r->next) 3145bf215546Sopenharmony_ci if (!st || r->overlaps(st)) 3146bf215546Sopenharmony_ci r->unlink(&loads[f]); 3147bf215546Sopenharmony_ci 3148bf215546Sopenharmony_ci for (Record *r = stores[f]; r; r = r->next) 3149bf215546Sopenharmony_ci if (!st || r->overlaps(st)) 3150bf215546Sopenharmony_ci r->unlink(&stores[f]); 3151bf215546Sopenharmony_ci} 3152bf215546Sopenharmony_ci 3153bf215546Sopenharmony_cibool 3154bf215546Sopenharmony_ciMemoryOpt::visit(BasicBlock *bb) 3155bf215546Sopenharmony_ci{ 3156bf215546Sopenharmony_ci bool ret = runOpt(bb); 3157bf215546Sopenharmony_ci // Run again, one pass won't combine 4 32 bit ld/st to a single 128 bit ld/st 3158bf215546Sopenharmony_ci // where 96 bit memory operations are forbidden. 3159bf215546Sopenharmony_ci if (ret) 3160bf215546Sopenharmony_ci ret = runOpt(bb); 3161bf215546Sopenharmony_ci return ret; 3162bf215546Sopenharmony_ci} 3163bf215546Sopenharmony_ci 3164bf215546Sopenharmony_cibool 3165bf215546Sopenharmony_ciMemoryOpt::runOpt(BasicBlock *bb) 3166bf215546Sopenharmony_ci{ 3167bf215546Sopenharmony_ci Instruction *ldst, *next; 3168bf215546Sopenharmony_ci Record *rec; 3169bf215546Sopenharmony_ci bool isAdjacent = true; 3170bf215546Sopenharmony_ci 3171bf215546Sopenharmony_ci for (ldst = bb->getEntry(); ldst; ldst = next) { 3172bf215546Sopenharmony_ci bool keep = true; 3173bf215546Sopenharmony_ci bool isLoad = true; 3174bf215546Sopenharmony_ci next = ldst->next; 3175bf215546Sopenharmony_ci 3176bf215546Sopenharmony_ci if (ldst->op == OP_LOAD || ldst->op == OP_VFETCH) { 3177bf215546Sopenharmony_ci if (ldst->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { 3178bf215546Sopenharmony_ci purgeRecords(ldst, ldst->src(0).getFile()); 3179bf215546Sopenharmony_ci continue; 3180bf215546Sopenharmony_ci } 3181bf215546Sopenharmony_ci if (ldst->isDead()) { 3182bf215546Sopenharmony_ci // might have been produced by earlier optimization 3183bf215546Sopenharmony_ci delete_Instruction(prog, ldst); 3184bf215546Sopenharmony_ci continue; 3185bf215546Sopenharmony_ci } 3186bf215546Sopenharmony_ci } else 3187bf215546Sopenharmony_ci if (ldst->op == OP_STORE || ldst->op == OP_EXPORT) { 3188bf215546Sopenharmony_ci if (ldst->subOp == NV50_IR_SUBOP_STORE_UNLOCKED) { 3189bf215546Sopenharmony_ci purgeRecords(ldst, ldst->src(0).getFile()); 3190bf215546Sopenharmony_ci continue; 3191bf215546Sopenharmony_ci } 3192bf215546Sopenharmony_ci if (typeSizeof(ldst->dType) == 4 && 3193bf215546Sopenharmony_ci ldst->src(1).getFile() == FILE_GPR && 3194bf215546Sopenharmony_ci ldst->getSrc(1)->getInsn()->op == OP_NOP) { 3195bf215546Sopenharmony_ci delete_Instruction(prog, ldst); 3196bf215546Sopenharmony_ci continue; 3197bf215546Sopenharmony_ci } 3198bf215546Sopenharmony_ci isLoad = false; 3199bf215546Sopenharmony_ci } else { 3200bf215546Sopenharmony_ci // TODO: maybe have all fixed ops act as barrier ? 3201bf215546Sopenharmony_ci if (ldst->op == OP_CALL || 3202bf215546Sopenharmony_ci ldst->op == OP_BAR || 3203bf215546Sopenharmony_ci ldst->op == OP_MEMBAR) { 3204bf215546Sopenharmony_ci purgeRecords(NULL, FILE_MEMORY_LOCAL); 3205bf215546Sopenharmony_ci purgeRecords(NULL, FILE_MEMORY_GLOBAL); 3206bf215546Sopenharmony_ci purgeRecords(NULL, FILE_MEMORY_SHARED); 3207bf215546Sopenharmony_ci purgeRecords(NULL, FILE_SHADER_OUTPUT); 3208bf215546Sopenharmony_ci } else 3209bf215546Sopenharmony_ci if (ldst->op == OP_ATOM || ldst->op == OP_CCTL) { 3210bf215546Sopenharmony_ci if (ldst->src(0).getFile() == FILE_MEMORY_GLOBAL) { 3211bf215546Sopenharmony_ci purgeRecords(NULL, FILE_MEMORY_LOCAL); 3212bf215546Sopenharmony_ci purgeRecords(NULL, FILE_MEMORY_GLOBAL); 3213bf215546Sopenharmony_ci purgeRecords(NULL, FILE_MEMORY_SHARED); 3214bf215546Sopenharmony_ci } else { 3215bf215546Sopenharmony_ci purgeRecords(NULL, ldst->src(0).getFile()); 3216bf215546Sopenharmony_ci } 3217bf215546Sopenharmony_ci } else 3218bf215546Sopenharmony_ci if (ldst->op == OP_EMIT || ldst->op == OP_RESTART) { 3219bf215546Sopenharmony_ci purgeRecords(NULL, FILE_SHADER_OUTPUT); 3220bf215546Sopenharmony_ci } 3221bf215546Sopenharmony_ci continue; 3222bf215546Sopenharmony_ci } 3223bf215546Sopenharmony_ci if (ldst->getPredicate()) // TODO: handle predicated ld/st 3224bf215546Sopenharmony_ci continue; 3225bf215546Sopenharmony_ci if (ldst->perPatch) // TODO: create separate per-patch lists 3226bf215546Sopenharmony_ci continue; 3227bf215546Sopenharmony_ci 3228bf215546Sopenharmony_ci if (isLoad) { 3229bf215546Sopenharmony_ci DataFile file = ldst->src(0).getFile(); 3230bf215546Sopenharmony_ci 3231bf215546Sopenharmony_ci // if ld l[]/g[] look for previous store to eliminate the reload 3232bf215546Sopenharmony_ci if (file == FILE_MEMORY_GLOBAL || file == FILE_MEMORY_LOCAL) { 3233bf215546Sopenharmony_ci // TODO: shared memory ? 3234bf215546Sopenharmony_ci rec = findRecord(ldst, false, isAdjacent); 3235bf215546Sopenharmony_ci if (rec && !isAdjacent) 3236bf215546Sopenharmony_ci keep = !replaceLdFromSt(ldst, rec); 3237bf215546Sopenharmony_ci } 3238bf215546Sopenharmony_ci 3239bf215546Sopenharmony_ci // or look for ld from the same location and replace this one 3240bf215546Sopenharmony_ci rec = keep ? findRecord(ldst, true, isAdjacent) : NULL; 3241bf215546Sopenharmony_ci if (rec) { 3242bf215546Sopenharmony_ci if (!isAdjacent) 3243bf215546Sopenharmony_ci keep = !replaceLdFromLd(ldst, rec); 3244bf215546Sopenharmony_ci else 3245bf215546Sopenharmony_ci // or combine a previous load with this one 3246bf215546Sopenharmony_ci keep = !combineLd(rec, ldst); 3247bf215546Sopenharmony_ci } 3248bf215546Sopenharmony_ci if (keep) 3249bf215546Sopenharmony_ci lockStores(ldst); 3250bf215546Sopenharmony_ci } else { 3251bf215546Sopenharmony_ci rec = findRecord(ldst, false, isAdjacent); 3252bf215546Sopenharmony_ci if (rec) { 3253bf215546Sopenharmony_ci if (!isAdjacent) 3254bf215546Sopenharmony_ci keep = !replaceStFromSt(ldst, rec); 3255bf215546Sopenharmony_ci else 3256bf215546Sopenharmony_ci keep = !combineSt(rec, ldst); 3257bf215546Sopenharmony_ci } 3258bf215546Sopenharmony_ci if (keep) 3259bf215546Sopenharmony_ci purgeRecords(ldst, DATA_FILE_COUNT); 3260bf215546Sopenharmony_ci } 3261bf215546Sopenharmony_ci if (keep) 3262bf215546Sopenharmony_ci addRecord(ldst); 3263bf215546Sopenharmony_ci } 3264bf215546Sopenharmony_ci reset(); 3265bf215546Sopenharmony_ci 3266bf215546Sopenharmony_ci return true; 3267bf215546Sopenharmony_ci} 3268bf215546Sopenharmony_ci 3269bf215546Sopenharmony_ci// ============================================================================= 3270bf215546Sopenharmony_ci 3271bf215546Sopenharmony_ci// Turn control flow into predicated instructions (after register allocation !). 3272bf215546Sopenharmony_ci// TODO: 3273bf215546Sopenharmony_ci// Could move this to before register allocation on NVC0 and also handle nested 3274bf215546Sopenharmony_ci// constructs. 3275bf215546Sopenharmony_ciclass FlatteningPass : public Pass 3276bf215546Sopenharmony_ci{ 3277bf215546Sopenharmony_cipublic: 3278bf215546Sopenharmony_ci FlatteningPass() : gpr_unit(0) {} 3279bf215546Sopenharmony_ci 3280bf215546Sopenharmony_ciprivate: 3281bf215546Sopenharmony_ci virtual bool visit(Function *); 3282bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 3283bf215546Sopenharmony_ci 3284bf215546Sopenharmony_ci bool tryPredicateConditional(BasicBlock *); 3285bf215546Sopenharmony_ci void predicateInstructions(BasicBlock *, Value *pred, CondCode cc); 3286bf215546Sopenharmony_ci void tryPropagateBranch(BasicBlock *); 3287bf215546Sopenharmony_ci inline bool isConstantCondition(Value *pred); 3288bf215546Sopenharmony_ci inline bool mayPredicate(const Instruction *, const Value *pred) const; 3289bf215546Sopenharmony_ci inline void removeFlow(Instruction *); 3290bf215546Sopenharmony_ci 3291bf215546Sopenharmony_ci uint8_t gpr_unit; 3292bf215546Sopenharmony_ci}; 3293bf215546Sopenharmony_ci 3294bf215546Sopenharmony_cibool 3295bf215546Sopenharmony_ciFlatteningPass::isConstantCondition(Value *pred) 3296bf215546Sopenharmony_ci{ 3297bf215546Sopenharmony_ci Instruction *insn = pred->getUniqueInsn(); 3298bf215546Sopenharmony_ci assert(insn); 3299bf215546Sopenharmony_ci if (insn->op != OP_SET || insn->srcExists(2)) 3300bf215546Sopenharmony_ci return false; 3301bf215546Sopenharmony_ci 3302bf215546Sopenharmony_ci for (int s = 0; s < 2 && insn->srcExists(s); ++s) { 3303bf215546Sopenharmony_ci Instruction *ld = insn->getSrc(s)->getUniqueInsn(); 3304bf215546Sopenharmony_ci DataFile file; 3305bf215546Sopenharmony_ci if (ld) { 3306bf215546Sopenharmony_ci if (ld->op != OP_MOV && ld->op != OP_LOAD) 3307bf215546Sopenharmony_ci return false; 3308bf215546Sopenharmony_ci if (ld->src(0).isIndirect(0)) 3309bf215546Sopenharmony_ci return false; 3310bf215546Sopenharmony_ci file = ld->src(0).getFile(); 3311bf215546Sopenharmony_ci } else { 3312bf215546Sopenharmony_ci file = insn->src(s).getFile(); 3313bf215546Sopenharmony_ci // catch $r63 on NVC0 and $r63/$r127 on NV50. Unfortunately maxGPR is 3314bf215546Sopenharmony_ci // in register "units", which can vary between targets. 3315bf215546Sopenharmony_ci if (file == FILE_GPR) { 3316bf215546Sopenharmony_ci Value *v = insn->getSrc(s); 3317bf215546Sopenharmony_ci int bytes = v->reg.data.id * MIN2(v->reg.size, 4); 3318bf215546Sopenharmony_ci int units = bytes >> gpr_unit; 3319bf215546Sopenharmony_ci if (units > prog->maxGPR) 3320bf215546Sopenharmony_ci file = FILE_IMMEDIATE; 3321bf215546Sopenharmony_ci } 3322bf215546Sopenharmony_ci } 3323bf215546Sopenharmony_ci if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST) 3324bf215546Sopenharmony_ci return false; 3325bf215546Sopenharmony_ci } 3326bf215546Sopenharmony_ci return true; 3327bf215546Sopenharmony_ci} 3328bf215546Sopenharmony_ci 3329bf215546Sopenharmony_civoid 3330bf215546Sopenharmony_ciFlatteningPass::removeFlow(Instruction *insn) 3331bf215546Sopenharmony_ci{ 3332bf215546Sopenharmony_ci FlowInstruction *term = insn ? insn->asFlow() : NULL; 3333bf215546Sopenharmony_ci if (!term) 3334bf215546Sopenharmony_ci return; 3335bf215546Sopenharmony_ci Graph::Edge::Type ty = term->bb->cfg.outgoing().getType(); 3336bf215546Sopenharmony_ci 3337bf215546Sopenharmony_ci if (term->op == OP_BRA) { 3338bf215546Sopenharmony_ci // TODO: this might get more difficult when we get arbitrary BRAs 3339bf215546Sopenharmony_ci if (ty == Graph::Edge::CROSS || ty == Graph::Edge::BACK) 3340bf215546Sopenharmony_ci return; 3341bf215546Sopenharmony_ci } else 3342bf215546Sopenharmony_ci if (term->op != OP_JOIN) 3343bf215546Sopenharmony_ci return; 3344bf215546Sopenharmony_ci 3345bf215546Sopenharmony_ci Value *pred = term->getPredicate(); 3346bf215546Sopenharmony_ci 3347bf215546Sopenharmony_ci delete_Instruction(prog, term); 3348bf215546Sopenharmony_ci 3349bf215546Sopenharmony_ci if (pred && pred->refCount() == 0) { 3350bf215546Sopenharmony_ci Instruction *pSet = pred->getUniqueInsn(); 3351bf215546Sopenharmony_ci pred->join->reg.data.id = -1; // deallocate 3352bf215546Sopenharmony_ci if (pSet->isDead()) 3353bf215546Sopenharmony_ci delete_Instruction(prog, pSet); 3354bf215546Sopenharmony_ci } 3355bf215546Sopenharmony_ci} 3356bf215546Sopenharmony_ci 3357bf215546Sopenharmony_civoid 3358bf215546Sopenharmony_ciFlatteningPass::predicateInstructions(BasicBlock *bb, Value *pred, CondCode cc) 3359bf215546Sopenharmony_ci{ 3360bf215546Sopenharmony_ci for (Instruction *i = bb->getEntry(); i; i = i->next) { 3361bf215546Sopenharmony_ci if (i->isNop()) 3362bf215546Sopenharmony_ci continue; 3363bf215546Sopenharmony_ci assert(!i->getPredicate()); 3364bf215546Sopenharmony_ci i->setPredicate(cc, pred); 3365bf215546Sopenharmony_ci } 3366bf215546Sopenharmony_ci removeFlow(bb->getExit()); 3367bf215546Sopenharmony_ci} 3368bf215546Sopenharmony_ci 3369bf215546Sopenharmony_cibool 3370bf215546Sopenharmony_ciFlatteningPass::mayPredicate(const Instruction *insn, const Value *pred) const 3371bf215546Sopenharmony_ci{ 3372bf215546Sopenharmony_ci if (insn->isPseudo()) 3373bf215546Sopenharmony_ci return true; 3374bf215546Sopenharmony_ci // TODO: calls where we don't know which registers are modified 3375bf215546Sopenharmony_ci 3376bf215546Sopenharmony_ci if (!prog->getTarget()->mayPredicate(insn, pred)) 3377bf215546Sopenharmony_ci return false; 3378bf215546Sopenharmony_ci for (int d = 0; insn->defExists(d); ++d) 3379bf215546Sopenharmony_ci if (insn->getDef(d)->equals(pred)) 3380bf215546Sopenharmony_ci return false; 3381bf215546Sopenharmony_ci return true; 3382bf215546Sopenharmony_ci} 3383bf215546Sopenharmony_ci 3384bf215546Sopenharmony_ci// If we jump to BRA/RET/EXIT, replace the jump with it. 3385bf215546Sopenharmony_ci// NOTE: We do not update the CFG anymore here ! 3386bf215546Sopenharmony_ci// 3387bf215546Sopenharmony_ci// TODO: Handle cases where we skip over a branch (maybe do that elsewhere ?): 3388bf215546Sopenharmony_ci// BB:0 3389bf215546Sopenharmony_ci// @p0 bra BB:2 -> @!p0 bra BB:3 iff (!) BB:2 immediately adjoins BB:1 3390bf215546Sopenharmony_ci// BB1: 3391bf215546Sopenharmony_ci// bra BB:3 3392bf215546Sopenharmony_ci// BB2: 3393bf215546Sopenharmony_ci// ... 3394bf215546Sopenharmony_ci// BB3: 3395bf215546Sopenharmony_ci// ... 3396bf215546Sopenharmony_civoid 3397bf215546Sopenharmony_ciFlatteningPass::tryPropagateBranch(BasicBlock *bb) 3398bf215546Sopenharmony_ci{ 3399bf215546Sopenharmony_ci for (Instruction *i = bb->getExit(); i && i->op == OP_BRA; i = i->prev) { 3400bf215546Sopenharmony_ci BasicBlock *bf = i->asFlow()->target.bb; 3401bf215546Sopenharmony_ci 3402bf215546Sopenharmony_ci if (bf->getInsnCount() != 1) 3403bf215546Sopenharmony_ci continue; 3404bf215546Sopenharmony_ci 3405bf215546Sopenharmony_ci FlowInstruction *bra = i->asFlow(); 3406bf215546Sopenharmony_ci FlowInstruction *rep = bf->getExit()->asFlow(); 3407bf215546Sopenharmony_ci 3408bf215546Sopenharmony_ci if (!rep || rep->getPredicate()) 3409bf215546Sopenharmony_ci continue; 3410bf215546Sopenharmony_ci if (rep->op != OP_BRA && 3411bf215546Sopenharmony_ci rep->op != OP_JOIN && 3412bf215546Sopenharmony_ci rep->op != OP_EXIT) 3413bf215546Sopenharmony_ci continue; 3414bf215546Sopenharmony_ci 3415bf215546Sopenharmony_ci // TODO: If there are multiple branches to @rep, only the first would 3416bf215546Sopenharmony_ci // be replaced, so only remove them after this pass is done ? 3417bf215546Sopenharmony_ci // Also, need to check all incident blocks for fall-through exits and 3418bf215546Sopenharmony_ci // add the branch there. 3419bf215546Sopenharmony_ci bra->op = rep->op; 3420bf215546Sopenharmony_ci bra->target.bb = rep->target.bb; 3421bf215546Sopenharmony_ci if (bf->cfg.incidentCount() == 1) 3422bf215546Sopenharmony_ci bf->remove(rep); 3423bf215546Sopenharmony_ci } 3424bf215546Sopenharmony_ci} 3425bf215546Sopenharmony_ci 3426bf215546Sopenharmony_cibool 3427bf215546Sopenharmony_ciFlatteningPass::visit(Function *fn) 3428bf215546Sopenharmony_ci{ 3429bf215546Sopenharmony_ci gpr_unit = prog->getTarget()->getFileUnit(FILE_GPR); 3430bf215546Sopenharmony_ci 3431bf215546Sopenharmony_ci return true; 3432bf215546Sopenharmony_ci} 3433bf215546Sopenharmony_ci 3434bf215546Sopenharmony_cibool 3435bf215546Sopenharmony_ciFlatteningPass::visit(BasicBlock *bb) 3436bf215546Sopenharmony_ci{ 3437bf215546Sopenharmony_ci if (tryPredicateConditional(bb)) 3438bf215546Sopenharmony_ci return true; 3439bf215546Sopenharmony_ci 3440bf215546Sopenharmony_ci // try to attach join to previous instruction 3441bf215546Sopenharmony_ci if (prog->getTarget()->hasJoin) { 3442bf215546Sopenharmony_ci Instruction *insn = bb->getExit(); 3443bf215546Sopenharmony_ci if (insn && insn->op == OP_JOIN && !insn->getPredicate()) { 3444bf215546Sopenharmony_ci insn = insn->prev; 3445bf215546Sopenharmony_ci if (insn && !insn->getPredicate() && 3446bf215546Sopenharmony_ci !insn->asFlow() && 3447bf215546Sopenharmony_ci insn->op != OP_DISCARD && 3448bf215546Sopenharmony_ci insn->op != OP_TEXBAR && 3449bf215546Sopenharmony_ci !isTextureOp(insn->op) && // probably just nve4 3450bf215546Sopenharmony_ci !isSurfaceOp(insn->op) && // not confirmed 3451bf215546Sopenharmony_ci insn->op != OP_LINTERP && // probably just nve4 3452bf215546Sopenharmony_ci insn->op != OP_PINTERP && // probably just nve4 3453bf215546Sopenharmony_ci ((insn->op != OP_LOAD && insn->op != OP_STORE && insn->op != OP_ATOM) || 3454bf215546Sopenharmony_ci (typeSizeof(insn->dType) <= 4 && !insn->src(0).isIndirect(0))) && 3455bf215546Sopenharmony_ci !insn->isNop()) { 3456bf215546Sopenharmony_ci insn->join = 1; 3457bf215546Sopenharmony_ci bb->remove(bb->getExit()); 3458bf215546Sopenharmony_ci return true; 3459bf215546Sopenharmony_ci } 3460bf215546Sopenharmony_ci } 3461bf215546Sopenharmony_ci } 3462bf215546Sopenharmony_ci 3463bf215546Sopenharmony_ci tryPropagateBranch(bb); 3464bf215546Sopenharmony_ci 3465bf215546Sopenharmony_ci return true; 3466bf215546Sopenharmony_ci} 3467bf215546Sopenharmony_ci 3468bf215546Sopenharmony_cibool 3469bf215546Sopenharmony_ciFlatteningPass::tryPredicateConditional(BasicBlock *bb) 3470bf215546Sopenharmony_ci{ 3471bf215546Sopenharmony_ci BasicBlock *bL = NULL, *bR = NULL; 3472bf215546Sopenharmony_ci unsigned int nL = 0, nR = 0, limit = 12; 3473bf215546Sopenharmony_ci Instruction *insn; 3474bf215546Sopenharmony_ci unsigned int mask; 3475bf215546Sopenharmony_ci 3476bf215546Sopenharmony_ci mask = bb->initiatesSimpleConditional(); 3477bf215546Sopenharmony_ci if (!mask) 3478bf215546Sopenharmony_ci return false; 3479bf215546Sopenharmony_ci 3480bf215546Sopenharmony_ci assert(bb->getExit()); 3481bf215546Sopenharmony_ci Value *pred = bb->getExit()->getPredicate(); 3482bf215546Sopenharmony_ci assert(pred); 3483bf215546Sopenharmony_ci 3484bf215546Sopenharmony_ci if (isConstantCondition(pred)) 3485bf215546Sopenharmony_ci limit = 4; 3486bf215546Sopenharmony_ci 3487bf215546Sopenharmony_ci Graph::EdgeIterator ei = bb->cfg.outgoing(); 3488bf215546Sopenharmony_ci 3489bf215546Sopenharmony_ci if (mask & 1) { 3490bf215546Sopenharmony_ci bL = BasicBlock::get(ei.getNode()); 3491bf215546Sopenharmony_ci for (insn = bL->getEntry(); insn; insn = insn->next, ++nL) 3492bf215546Sopenharmony_ci if (!mayPredicate(insn, pred)) 3493bf215546Sopenharmony_ci return false; 3494bf215546Sopenharmony_ci if (nL > limit) 3495bf215546Sopenharmony_ci return false; // too long, do a real branch 3496bf215546Sopenharmony_ci } 3497bf215546Sopenharmony_ci ei.next(); 3498bf215546Sopenharmony_ci 3499bf215546Sopenharmony_ci if (mask & 2) { 3500bf215546Sopenharmony_ci bR = BasicBlock::get(ei.getNode()); 3501bf215546Sopenharmony_ci for (insn = bR->getEntry(); insn; insn = insn->next, ++nR) 3502bf215546Sopenharmony_ci if (!mayPredicate(insn, pred)) 3503bf215546Sopenharmony_ci return false; 3504bf215546Sopenharmony_ci if (nR > limit) 3505bf215546Sopenharmony_ci return false; // too long, do a real branch 3506bf215546Sopenharmony_ci } 3507bf215546Sopenharmony_ci 3508bf215546Sopenharmony_ci if (bL) 3509bf215546Sopenharmony_ci predicateInstructions(bL, pred, bb->getExit()->cc); 3510bf215546Sopenharmony_ci if (bR) 3511bf215546Sopenharmony_ci predicateInstructions(bR, pred, inverseCondCode(bb->getExit()->cc)); 3512bf215546Sopenharmony_ci 3513bf215546Sopenharmony_ci if (bb->joinAt) { 3514bf215546Sopenharmony_ci bb->remove(bb->joinAt); 3515bf215546Sopenharmony_ci bb->joinAt = NULL; 3516bf215546Sopenharmony_ci } 3517bf215546Sopenharmony_ci removeFlow(bb->getExit()); // delete the branch/join at the fork point 3518bf215546Sopenharmony_ci 3519bf215546Sopenharmony_ci // remove potential join operations at the end of the conditional 3520bf215546Sopenharmony_ci if (prog->getTarget()->joinAnterior) { 3521bf215546Sopenharmony_ci bb = BasicBlock::get((bL ? bL : bR)->cfg.outgoing().getNode()); 3522bf215546Sopenharmony_ci if (bb->getEntry() && bb->getEntry()->op == OP_JOIN) 3523bf215546Sopenharmony_ci removeFlow(bb->getEntry()); 3524bf215546Sopenharmony_ci } 3525bf215546Sopenharmony_ci 3526bf215546Sopenharmony_ci return true; 3527bf215546Sopenharmony_ci} 3528bf215546Sopenharmony_ci 3529bf215546Sopenharmony_ci// ============================================================================= 3530bf215546Sopenharmony_ci 3531bf215546Sopenharmony_ci// Fold Immediate into MAD; must be done after register allocation due to 3532bf215546Sopenharmony_ci// constraint SDST == SSRC2 3533bf215546Sopenharmony_ci// TODO: 3534bf215546Sopenharmony_ci// Does NVC0+ have other situations where this pass makes sense? 3535bf215546Sopenharmony_ciclass PostRaLoadPropagation : public Pass 3536bf215546Sopenharmony_ci{ 3537bf215546Sopenharmony_ciprivate: 3538bf215546Sopenharmony_ci virtual bool visit(Instruction *); 3539bf215546Sopenharmony_ci 3540bf215546Sopenharmony_ci void handleMADforNV50(Instruction *); 3541bf215546Sopenharmony_ci void handleMADforNVC0(Instruction *); 3542bf215546Sopenharmony_ci}; 3543bf215546Sopenharmony_ci 3544bf215546Sopenharmony_cistatic bool 3545bf215546Sopenharmony_cipost_ra_dead(Instruction *i) 3546bf215546Sopenharmony_ci{ 3547bf215546Sopenharmony_ci for (int d = 0; i->defExists(d); ++d) 3548bf215546Sopenharmony_ci if (i->getDef(d)->refCount()) 3549bf215546Sopenharmony_ci return false; 3550bf215546Sopenharmony_ci return true; 3551bf215546Sopenharmony_ci} 3552bf215546Sopenharmony_ci 3553bf215546Sopenharmony_ci// Fold Immediate into MAD; must be done after register allocation due to 3554bf215546Sopenharmony_ci// constraint SDST == SSRC2 3555bf215546Sopenharmony_civoid 3556bf215546Sopenharmony_ciPostRaLoadPropagation::handleMADforNV50(Instruction *i) 3557bf215546Sopenharmony_ci{ 3558bf215546Sopenharmony_ci if (i->def(0).getFile() != FILE_GPR || 3559bf215546Sopenharmony_ci i->src(0).getFile() != FILE_GPR || 3560bf215546Sopenharmony_ci i->src(1).getFile() != FILE_GPR || 3561bf215546Sopenharmony_ci i->src(2).getFile() != FILE_GPR || 3562bf215546Sopenharmony_ci i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id) 3563bf215546Sopenharmony_ci return; 3564bf215546Sopenharmony_ci 3565bf215546Sopenharmony_ci if (i->getDef(0)->reg.data.id >= 64 || 3566bf215546Sopenharmony_ci i->getSrc(0)->reg.data.id >= 64) 3567bf215546Sopenharmony_ci return; 3568bf215546Sopenharmony_ci 3569bf215546Sopenharmony_ci if (i->flagsSrc >= 0 && i->getSrc(i->flagsSrc)->reg.data.id != 0) 3570bf215546Sopenharmony_ci return; 3571bf215546Sopenharmony_ci 3572bf215546Sopenharmony_ci if (i->getPredicate()) 3573bf215546Sopenharmony_ci return; 3574bf215546Sopenharmony_ci 3575bf215546Sopenharmony_ci Value *vtmp; 3576bf215546Sopenharmony_ci Instruction *def = i->getSrc(1)->getInsn(); 3577bf215546Sopenharmony_ci 3578bf215546Sopenharmony_ci if (def && def->op == OP_SPLIT && typeSizeof(def->sType) == 4) 3579bf215546Sopenharmony_ci def = def->getSrc(0)->getInsn(); 3580bf215546Sopenharmony_ci if (def && def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) { 3581bf215546Sopenharmony_ci vtmp = i->getSrc(1); 3582bf215546Sopenharmony_ci if (isFloatType(i->sType)) { 3583bf215546Sopenharmony_ci i->setSrc(1, def->getSrc(0)); 3584bf215546Sopenharmony_ci } else { 3585bf215546Sopenharmony_ci ImmediateValue val; 3586bf215546Sopenharmony_ci // getImmediate() has side-effects on the argument so this *shouldn't* 3587bf215546Sopenharmony_ci // be folded into the assert() 3588bf215546Sopenharmony_ci ASSERTED bool ret = def->src(0).getImmediate(val); 3589bf215546Sopenharmony_ci assert(ret); 3590bf215546Sopenharmony_ci if (i->getSrc(1)->reg.data.id & 1) 3591bf215546Sopenharmony_ci val.reg.data.u32 >>= 16; 3592bf215546Sopenharmony_ci val.reg.data.u32 &= 0xffff; 3593bf215546Sopenharmony_ci i->setSrc(1, new_ImmediateValue(prog, val.reg.data.u32)); 3594bf215546Sopenharmony_ci } 3595bf215546Sopenharmony_ci 3596bf215546Sopenharmony_ci /* There's no post-RA dead code elimination, so do it here 3597bf215546Sopenharmony_ci * XXX: if we add more code-removing post-RA passes, we might 3598bf215546Sopenharmony_ci * want to create a post-RA dead-code elim pass */ 3599bf215546Sopenharmony_ci if (post_ra_dead(vtmp->getInsn())) { 3600bf215546Sopenharmony_ci Value *src = vtmp->getInsn()->getSrc(0); 3601bf215546Sopenharmony_ci // Careful -- splits will have already been removed from the 3602bf215546Sopenharmony_ci // functions. Don't double-delete. 3603bf215546Sopenharmony_ci if (vtmp->getInsn()->bb) 3604bf215546Sopenharmony_ci delete_Instruction(prog, vtmp->getInsn()); 3605bf215546Sopenharmony_ci if (src->getInsn() && post_ra_dead(src->getInsn())) 3606bf215546Sopenharmony_ci delete_Instruction(prog, src->getInsn()); 3607bf215546Sopenharmony_ci } 3608bf215546Sopenharmony_ci } 3609bf215546Sopenharmony_ci} 3610bf215546Sopenharmony_ci 3611bf215546Sopenharmony_civoid 3612bf215546Sopenharmony_ciPostRaLoadPropagation::handleMADforNVC0(Instruction *i) 3613bf215546Sopenharmony_ci{ 3614bf215546Sopenharmony_ci if (i->def(0).getFile() != FILE_GPR || 3615bf215546Sopenharmony_ci i->src(0).getFile() != FILE_GPR || 3616bf215546Sopenharmony_ci i->src(1).getFile() != FILE_GPR || 3617bf215546Sopenharmony_ci i->src(2).getFile() != FILE_GPR || 3618bf215546Sopenharmony_ci i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id) 3619bf215546Sopenharmony_ci return; 3620bf215546Sopenharmony_ci 3621bf215546Sopenharmony_ci // TODO: gm107 can also do this for S32, maybe other chipsets as well 3622bf215546Sopenharmony_ci if (i->dType != TYPE_F32) 3623bf215546Sopenharmony_ci return; 3624bf215546Sopenharmony_ci 3625bf215546Sopenharmony_ci if ((i->src(2).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG)) 3626bf215546Sopenharmony_ci return; 3627bf215546Sopenharmony_ci 3628bf215546Sopenharmony_ci ImmediateValue val; 3629bf215546Sopenharmony_ci int s; 3630bf215546Sopenharmony_ci 3631bf215546Sopenharmony_ci if (i->src(0).getImmediate(val)) 3632bf215546Sopenharmony_ci s = 1; 3633bf215546Sopenharmony_ci else if (i->src(1).getImmediate(val)) 3634bf215546Sopenharmony_ci s = 0; 3635bf215546Sopenharmony_ci else 3636bf215546Sopenharmony_ci return; 3637bf215546Sopenharmony_ci 3638bf215546Sopenharmony_ci if ((i->src(s).mod | Modifier(NV50_IR_MOD_NEG)) != Modifier(NV50_IR_MOD_NEG)) 3639bf215546Sopenharmony_ci return; 3640bf215546Sopenharmony_ci 3641bf215546Sopenharmony_ci if (s == 1) 3642bf215546Sopenharmony_ci i->swapSources(0, 1); 3643bf215546Sopenharmony_ci 3644bf215546Sopenharmony_ci Instruction *imm = i->getSrc(1)->getInsn(); 3645bf215546Sopenharmony_ci i->setSrc(1, imm->getSrc(0)); 3646bf215546Sopenharmony_ci if (post_ra_dead(imm)) 3647bf215546Sopenharmony_ci delete_Instruction(prog, imm); 3648bf215546Sopenharmony_ci} 3649bf215546Sopenharmony_ci 3650bf215546Sopenharmony_cibool 3651bf215546Sopenharmony_ciPostRaLoadPropagation::visit(Instruction *i) 3652bf215546Sopenharmony_ci{ 3653bf215546Sopenharmony_ci switch (i->op) { 3654bf215546Sopenharmony_ci case OP_FMA: 3655bf215546Sopenharmony_ci case OP_MAD: 3656bf215546Sopenharmony_ci if (prog->getTarget()->getChipset() < 0xc0) 3657bf215546Sopenharmony_ci handleMADforNV50(i); 3658bf215546Sopenharmony_ci else 3659bf215546Sopenharmony_ci handleMADforNVC0(i); 3660bf215546Sopenharmony_ci break; 3661bf215546Sopenharmony_ci default: 3662bf215546Sopenharmony_ci break; 3663bf215546Sopenharmony_ci } 3664bf215546Sopenharmony_ci 3665bf215546Sopenharmony_ci return true; 3666bf215546Sopenharmony_ci} 3667bf215546Sopenharmony_ci 3668bf215546Sopenharmony_ci// ============================================================================= 3669bf215546Sopenharmony_ci 3670bf215546Sopenharmony_ci// Common subexpression elimination. Stupid O^2 implementation. 3671bf215546Sopenharmony_ciclass LocalCSE : public Pass 3672bf215546Sopenharmony_ci{ 3673bf215546Sopenharmony_ciprivate: 3674bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 3675bf215546Sopenharmony_ci 3676bf215546Sopenharmony_ci inline bool tryReplace(Instruction **, Instruction *); 3677bf215546Sopenharmony_ci 3678bf215546Sopenharmony_ci DLList ops[OP_LAST + 1]; 3679bf215546Sopenharmony_ci}; 3680bf215546Sopenharmony_ci 3681bf215546Sopenharmony_ciclass GlobalCSE : public Pass 3682bf215546Sopenharmony_ci{ 3683bf215546Sopenharmony_ciprivate: 3684bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 3685bf215546Sopenharmony_ci}; 3686bf215546Sopenharmony_ci 3687bf215546Sopenharmony_cibool 3688bf215546Sopenharmony_ciInstruction::isActionEqual(const Instruction *that) const 3689bf215546Sopenharmony_ci{ 3690bf215546Sopenharmony_ci if (this->op != that->op || 3691bf215546Sopenharmony_ci this->dType != that->dType || 3692bf215546Sopenharmony_ci this->sType != that->sType) 3693bf215546Sopenharmony_ci return false; 3694bf215546Sopenharmony_ci if (this->cc != that->cc) 3695bf215546Sopenharmony_ci return false; 3696bf215546Sopenharmony_ci 3697bf215546Sopenharmony_ci if (this->asTex()) { 3698bf215546Sopenharmony_ci if (memcmp(&this->asTex()->tex, 3699bf215546Sopenharmony_ci &that->asTex()->tex, 3700bf215546Sopenharmony_ci sizeof(this->asTex()->tex))) 3701bf215546Sopenharmony_ci return false; 3702bf215546Sopenharmony_ci } else 3703bf215546Sopenharmony_ci if (this->asCmp()) { 3704bf215546Sopenharmony_ci if (this->asCmp()->setCond != that->asCmp()->setCond) 3705bf215546Sopenharmony_ci return false; 3706bf215546Sopenharmony_ci } else 3707bf215546Sopenharmony_ci if (this->asFlow()) { 3708bf215546Sopenharmony_ci return false; 3709bf215546Sopenharmony_ci } else 3710bf215546Sopenharmony_ci if (this->op == OP_PHI && this->bb != that->bb) { 3711bf215546Sopenharmony_ci /* TODO: we could probably be a bit smarter here by following the 3712bf215546Sopenharmony_ci * control flow, but honestly, it is quite painful to check */ 3713bf215546Sopenharmony_ci return false; 3714bf215546Sopenharmony_ci } else { 3715bf215546Sopenharmony_ci if (this->ipa != that->ipa || 3716bf215546Sopenharmony_ci this->lanes != that->lanes || 3717bf215546Sopenharmony_ci this->perPatch != that->perPatch) 3718bf215546Sopenharmony_ci return false; 3719bf215546Sopenharmony_ci if (this->postFactor != that->postFactor) 3720bf215546Sopenharmony_ci return false; 3721bf215546Sopenharmony_ci } 3722bf215546Sopenharmony_ci 3723bf215546Sopenharmony_ci if (this->subOp != that->subOp || 3724bf215546Sopenharmony_ci this->saturate != that->saturate || 3725bf215546Sopenharmony_ci this->rnd != that->rnd || 3726bf215546Sopenharmony_ci this->ftz != that->ftz || 3727bf215546Sopenharmony_ci this->dnz != that->dnz || 3728bf215546Sopenharmony_ci this->cache != that->cache || 3729bf215546Sopenharmony_ci this->mask != that->mask) 3730bf215546Sopenharmony_ci return false; 3731bf215546Sopenharmony_ci 3732bf215546Sopenharmony_ci return true; 3733bf215546Sopenharmony_ci} 3734bf215546Sopenharmony_ci 3735bf215546Sopenharmony_cibool 3736bf215546Sopenharmony_ciInstruction::isResultEqual(const Instruction *that) const 3737bf215546Sopenharmony_ci{ 3738bf215546Sopenharmony_ci unsigned int d, s; 3739bf215546Sopenharmony_ci 3740bf215546Sopenharmony_ci // NOTE: location of discard only affects tex with liveOnly and quadops 3741bf215546Sopenharmony_ci if (!this->defExists(0) && this->op != OP_DISCARD) 3742bf215546Sopenharmony_ci return false; 3743bf215546Sopenharmony_ci 3744bf215546Sopenharmony_ci if (!isActionEqual(that)) 3745bf215546Sopenharmony_ci return false; 3746bf215546Sopenharmony_ci 3747bf215546Sopenharmony_ci if (this->predSrc != that->predSrc) 3748bf215546Sopenharmony_ci return false; 3749bf215546Sopenharmony_ci 3750bf215546Sopenharmony_ci for (d = 0; this->defExists(d); ++d) { 3751bf215546Sopenharmony_ci if (!that->defExists(d) || 3752bf215546Sopenharmony_ci !this->getDef(d)->equals(that->getDef(d), false)) 3753bf215546Sopenharmony_ci return false; 3754bf215546Sopenharmony_ci } 3755bf215546Sopenharmony_ci if (that->defExists(d)) 3756bf215546Sopenharmony_ci return false; 3757bf215546Sopenharmony_ci 3758bf215546Sopenharmony_ci for (s = 0; this->srcExists(s); ++s) { 3759bf215546Sopenharmony_ci if (!that->srcExists(s)) 3760bf215546Sopenharmony_ci return false; 3761bf215546Sopenharmony_ci if (this->src(s).mod != that->src(s).mod) 3762bf215546Sopenharmony_ci return false; 3763bf215546Sopenharmony_ci if (!this->getSrc(s)->equals(that->getSrc(s), true)) 3764bf215546Sopenharmony_ci return false; 3765bf215546Sopenharmony_ci } 3766bf215546Sopenharmony_ci if (that->srcExists(s)) 3767bf215546Sopenharmony_ci return false; 3768bf215546Sopenharmony_ci 3769bf215546Sopenharmony_ci if (op == OP_LOAD || op == OP_VFETCH || op == OP_ATOM) { 3770bf215546Sopenharmony_ci switch (src(0).getFile()) { 3771bf215546Sopenharmony_ci case FILE_MEMORY_CONST: 3772bf215546Sopenharmony_ci case FILE_SHADER_INPUT: 3773bf215546Sopenharmony_ci return true; 3774bf215546Sopenharmony_ci case FILE_SHADER_OUTPUT: 3775bf215546Sopenharmony_ci return bb->getProgram()->getType() == Program::TYPE_TESSELLATION_EVAL; 3776bf215546Sopenharmony_ci default: 3777bf215546Sopenharmony_ci return false; 3778bf215546Sopenharmony_ci } 3779bf215546Sopenharmony_ci } 3780bf215546Sopenharmony_ci 3781bf215546Sopenharmony_ci return true; 3782bf215546Sopenharmony_ci} 3783bf215546Sopenharmony_ci 3784bf215546Sopenharmony_ci// pull through common expressions from different in-blocks 3785bf215546Sopenharmony_cibool 3786bf215546Sopenharmony_ciGlobalCSE::visit(BasicBlock *bb) 3787bf215546Sopenharmony_ci{ 3788bf215546Sopenharmony_ci Instruction *phi, *next, *ik; 3789bf215546Sopenharmony_ci int s; 3790bf215546Sopenharmony_ci 3791bf215546Sopenharmony_ci // TODO: maybe do this with OP_UNION, too 3792bf215546Sopenharmony_ci 3793bf215546Sopenharmony_ci for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = next) { 3794bf215546Sopenharmony_ci next = phi->next; 3795bf215546Sopenharmony_ci if (phi->getSrc(0)->refCount() > 1) 3796bf215546Sopenharmony_ci continue; 3797bf215546Sopenharmony_ci ik = phi->getSrc(0)->getInsn(); 3798bf215546Sopenharmony_ci if (!ik) 3799bf215546Sopenharmony_ci continue; // probably a function input 3800bf215546Sopenharmony_ci if (ik->defCount(0xff) > 1) 3801bf215546Sopenharmony_ci continue; // too painful to check if we can really push this forward 3802bf215546Sopenharmony_ci for (s = 1; phi->srcExists(s); ++s) { 3803bf215546Sopenharmony_ci if (phi->getSrc(s)->refCount() > 1) 3804bf215546Sopenharmony_ci break; 3805bf215546Sopenharmony_ci if (!phi->getSrc(s)->getInsn() || 3806bf215546Sopenharmony_ci !phi->getSrc(s)->getInsn()->isResultEqual(ik)) 3807bf215546Sopenharmony_ci break; 3808bf215546Sopenharmony_ci } 3809bf215546Sopenharmony_ci if (!phi->srcExists(s)) { 3810bf215546Sopenharmony_ci assert(ik->op != OP_PHI); 3811bf215546Sopenharmony_ci Instruction *entry = bb->getEntry(); 3812bf215546Sopenharmony_ci ik->bb->remove(ik); 3813bf215546Sopenharmony_ci if (!entry || entry->op != OP_JOIN) 3814bf215546Sopenharmony_ci bb->insertHead(ik); 3815bf215546Sopenharmony_ci else 3816bf215546Sopenharmony_ci bb->insertAfter(entry, ik); 3817bf215546Sopenharmony_ci ik->setDef(0, phi->getDef(0)); 3818bf215546Sopenharmony_ci delete_Instruction(prog, phi); 3819bf215546Sopenharmony_ci } 3820bf215546Sopenharmony_ci } 3821bf215546Sopenharmony_ci 3822bf215546Sopenharmony_ci return true; 3823bf215546Sopenharmony_ci} 3824bf215546Sopenharmony_ci 3825bf215546Sopenharmony_cibool 3826bf215546Sopenharmony_ciLocalCSE::tryReplace(Instruction **ptr, Instruction *i) 3827bf215546Sopenharmony_ci{ 3828bf215546Sopenharmony_ci Instruction *old = *ptr; 3829bf215546Sopenharmony_ci 3830bf215546Sopenharmony_ci // TODO: maybe relax this later (causes trouble with OP_UNION) 3831bf215546Sopenharmony_ci if (i->isPredicated()) 3832bf215546Sopenharmony_ci return false; 3833bf215546Sopenharmony_ci 3834bf215546Sopenharmony_ci if (!old->isResultEqual(i)) 3835bf215546Sopenharmony_ci return false; 3836bf215546Sopenharmony_ci 3837bf215546Sopenharmony_ci for (int d = 0; old->defExists(d); ++d) 3838bf215546Sopenharmony_ci old->def(d).replace(i->getDef(d), false); 3839bf215546Sopenharmony_ci delete_Instruction(prog, old); 3840bf215546Sopenharmony_ci *ptr = NULL; 3841bf215546Sopenharmony_ci return true; 3842bf215546Sopenharmony_ci} 3843bf215546Sopenharmony_ci 3844bf215546Sopenharmony_cibool 3845bf215546Sopenharmony_ciLocalCSE::visit(BasicBlock *bb) 3846bf215546Sopenharmony_ci{ 3847bf215546Sopenharmony_ci unsigned int replaced; 3848bf215546Sopenharmony_ci 3849bf215546Sopenharmony_ci do { 3850bf215546Sopenharmony_ci Instruction *ir, *next; 3851bf215546Sopenharmony_ci 3852bf215546Sopenharmony_ci replaced = 0; 3853bf215546Sopenharmony_ci 3854bf215546Sopenharmony_ci // will need to know the order of instructions 3855bf215546Sopenharmony_ci int serial = 0; 3856bf215546Sopenharmony_ci for (ir = bb->getFirst(); ir; ir = ir->next) 3857bf215546Sopenharmony_ci ir->serial = serial++; 3858bf215546Sopenharmony_ci 3859bf215546Sopenharmony_ci for (ir = bb->getFirst(); ir; ir = next) { 3860bf215546Sopenharmony_ci int s; 3861bf215546Sopenharmony_ci Value *src = NULL; 3862bf215546Sopenharmony_ci 3863bf215546Sopenharmony_ci next = ir->next; 3864bf215546Sopenharmony_ci 3865bf215546Sopenharmony_ci if (ir->fixed) { 3866bf215546Sopenharmony_ci ops[ir->op].insert(ir); 3867bf215546Sopenharmony_ci continue; 3868bf215546Sopenharmony_ci } 3869bf215546Sopenharmony_ci 3870bf215546Sopenharmony_ci for (s = 0; ir->srcExists(s); ++s) 3871bf215546Sopenharmony_ci if (ir->getSrc(s)->asLValue()) 3872bf215546Sopenharmony_ci if (!src || ir->getSrc(s)->refCount() < src->refCount()) 3873bf215546Sopenharmony_ci src = ir->getSrc(s); 3874bf215546Sopenharmony_ci 3875bf215546Sopenharmony_ci if (src) { 3876bf215546Sopenharmony_ci for (Value::UseIterator it = src->uses.begin(); 3877bf215546Sopenharmony_ci it != src->uses.end(); ++it) { 3878bf215546Sopenharmony_ci Instruction *ik = (*it)->getInsn(); 3879bf215546Sopenharmony_ci if (ik && ik->bb == ir->bb && ik->serial < ir->serial) 3880bf215546Sopenharmony_ci if (tryReplace(&ir, ik)) 3881bf215546Sopenharmony_ci break; 3882bf215546Sopenharmony_ci } 3883bf215546Sopenharmony_ci } else { 3884bf215546Sopenharmony_ci DLLIST_FOR_EACH(&ops[ir->op], iter) 3885bf215546Sopenharmony_ci { 3886bf215546Sopenharmony_ci Instruction *ik = reinterpret_cast<Instruction *>(iter.get()); 3887bf215546Sopenharmony_ci if (tryReplace(&ir, ik)) 3888bf215546Sopenharmony_ci break; 3889bf215546Sopenharmony_ci } 3890bf215546Sopenharmony_ci } 3891bf215546Sopenharmony_ci 3892bf215546Sopenharmony_ci if (ir) 3893bf215546Sopenharmony_ci ops[ir->op].insert(ir); 3894bf215546Sopenharmony_ci else 3895bf215546Sopenharmony_ci ++replaced; 3896bf215546Sopenharmony_ci } 3897bf215546Sopenharmony_ci for (unsigned int i = 0; i <= OP_LAST; ++i) 3898bf215546Sopenharmony_ci ops[i].clear(); 3899bf215546Sopenharmony_ci 3900bf215546Sopenharmony_ci } while (replaced); 3901bf215546Sopenharmony_ci 3902bf215546Sopenharmony_ci return true; 3903bf215546Sopenharmony_ci} 3904bf215546Sopenharmony_ci 3905bf215546Sopenharmony_ci// ============================================================================= 3906bf215546Sopenharmony_ci 3907bf215546Sopenharmony_ci// Remove computations of unused values. 3908bf215546Sopenharmony_ciclass DeadCodeElim : public Pass 3909bf215546Sopenharmony_ci{ 3910bf215546Sopenharmony_cipublic: 3911bf215546Sopenharmony_ci DeadCodeElim() : deadCount(0) {} 3912bf215546Sopenharmony_ci bool buryAll(Program *); 3913bf215546Sopenharmony_ci 3914bf215546Sopenharmony_ciprivate: 3915bf215546Sopenharmony_ci virtual bool visit(BasicBlock *); 3916bf215546Sopenharmony_ci 3917bf215546Sopenharmony_ci void checkSplitLoad(Instruction *ld); // for partially dead loads 3918bf215546Sopenharmony_ci 3919bf215546Sopenharmony_ci unsigned int deadCount; 3920bf215546Sopenharmony_ci}; 3921bf215546Sopenharmony_ci 3922bf215546Sopenharmony_cibool 3923bf215546Sopenharmony_ciDeadCodeElim::buryAll(Program *prog) 3924bf215546Sopenharmony_ci{ 3925bf215546Sopenharmony_ci do { 3926bf215546Sopenharmony_ci deadCount = 0; 3927bf215546Sopenharmony_ci if (!this->run(prog, false, false)) 3928bf215546Sopenharmony_ci return false; 3929bf215546Sopenharmony_ci } while (deadCount); 3930bf215546Sopenharmony_ci 3931bf215546Sopenharmony_ci return true; 3932bf215546Sopenharmony_ci} 3933bf215546Sopenharmony_ci 3934bf215546Sopenharmony_cibool 3935bf215546Sopenharmony_ciDeadCodeElim::visit(BasicBlock *bb) 3936bf215546Sopenharmony_ci{ 3937bf215546Sopenharmony_ci Instruction *prev; 3938bf215546Sopenharmony_ci 3939bf215546Sopenharmony_ci for (Instruction *i = bb->getExit(); i; i = prev) { 3940bf215546Sopenharmony_ci prev = i->prev; 3941bf215546Sopenharmony_ci if (i->isDead()) { 3942bf215546Sopenharmony_ci ++deadCount; 3943bf215546Sopenharmony_ci delete_Instruction(prog, i); 3944bf215546Sopenharmony_ci } else 3945bf215546Sopenharmony_ci if (i->defExists(1) && 3946bf215546Sopenharmony_ci i->subOp == 0 && 3947bf215546Sopenharmony_ci (i->op == OP_VFETCH || i->op == OP_LOAD)) { 3948bf215546Sopenharmony_ci checkSplitLoad(i); 3949bf215546Sopenharmony_ci } else 3950bf215546Sopenharmony_ci if (i->defExists(0) && !i->getDef(0)->refCount()) { 3951bf215546Sopenharmony_ci if (i->op == OP_ATOM || 3952bf215546Sopenharmony_ci i->op == OP_SUREDP || 3953bf215546Sopenharmony_ci i->op == OP_SUREDB) { 3954bf215546Sopenharmony_ci const Target *targ = prog->getTarget(); 3955bf215546Sopenharmony_ci if (targ->getChipset() >= NVISA_GF100_CHIPSET || 3956bf215546Sopenharmony_ci i->subOp != NV50_IR_SUBOP_ATOM_CAS) 3957bf215546Sopenharmony_ci i->setDef(0, NULL); 3958bf215546Sopenharmony_ci if (i->op == OP_ATOM && i->subOp == NV50_IR_SUBOP_ATOM_EXCH) { 3959bf215546Sopenharmony_ci i->cache = CACHE_CV; 3960bf215546Sopenharmony_ci i->op = OP_STORE; 3961bf215546Sopenharmony_ci i->subOp = 0; 3962bf215546Sopenharmony_ci } 3963bf215546Sopenharmony_ci } else if (i->op == OP_LOAD && i->subOp == NV50_IR_SUBOP_LOAD_LOCKED) { 3964bf215546Sopenharmony_ci i->setDef(0, i->getDef(1)); 3965bf215546Sopenharmony_ci i->setDef(1, NULL); 3966bf215546Sopenharmony_ci } 3967bf215546Sopenharmony_ci } 3968bf215546Sopenharmony_ci } 3969bf215546Sopenharmony_ci return true; 3970bf215546Sopenharmony_ci} 3971bf215546Sopenharmony_ci 3972bf215546Sopenharmony_ci// Each load can go into up to 4 destinations, any of which might potentially 3973bf215546Sopenharmony_ci// be dead (i.e. a hole). These can always be split into 2 loads, independent 3974bf215546Sopenharmony_ci// of where the holes are. We find the first contiguous region, put it into 3975bf215546Sopenharmony_ci// the first load, and then put the second contiguous region into the second 3976bf215546Sopenharmony_ci// load. There can be at most 2 contiguous regions. 3977bf215546Sopenharmony_ci// 3978bf215546Sopenharmony_ci// Note that there are some restrictions, for example it's not possible to do 3979bf215546Sopenharmony_ci// a 64-bit load that's not 64-bit aligned, so such a load has to be split 3980bf215546Sopenharmony_ci// up. Also hardware doesn't support 96-bit loads, so those also have to be 3981bf215546Sopenharmony_ci// split into a 64-bit and 32-bit load. 3982bf215546Sopenharmony_civoid 3983bf215546Sopenharmony_ciDeadCodeElim::checkSplitLoad(Instruction *ld1) 3984bf215546Sopenharmony_ci{ 3985bf215546Sopenharmony_ci Instruction *ld2 = NULL; // can get at most 2 loads 3986bf215546Sopenharmony_ci Value *def1[4]; 3987bf215546Sopenharmony_ci Value *def2[4]; 3988bf215546Sopenharmony_ci int32_t addr1, addr2; 3989bf215546Sopenharmony_ci int32_t size1, size2; 3990bf215546Sopenharmony_ci int d, n1, n2; 3991bf215546Sopenharmony_ci uint32_t mask = 0xffffffff; 3992bf215546Sopenharmony_ci 3993bf215546Sopenharmony_ci for (d = 0; ld1->defExists(d); ++d) 3994bf215546Sopenharmony_ci if (!ld1->getDef(d)->refCount() && ld1->getDef(d)->reg.data.id < 0) 3995bf215546Sopenharmony_ci mask &= ~(1 << d); 3996bf215546Sopenharmony_ci if (mask == 0xffffffff) 3997bf215546Sopenharmony_ci return; 3998bf215546Sopenharmony_ci 3999bf215546Sopenharmony_ci addr1 = ld1->getSrc(0)->reg.data.offset; 4000bf215546Sopenharmony_ci n1 = n2 = 0; 4001bf215546Sopenharmony_ci size1 = size2 = 0; 4002bf215546Sopenharmony_ci 4003bf215546Sopenharmony_ci // Compute address/width for first load 4004bf215546Sopenharmony_ci for (d = 0; ld1->defExists(d); ++d) { 4005bf215546Sopenharmony_ci if (mask & (1 << d)) { 4006bf215546Sopenharmony_ci if (size1 && (addr1 & 0x7)) 4007bf215546Sopenharmony_ci break; 4008bf215546Sopenharmony_ci def1[n1] = ld1->getDef(d); 4009bf215546Sopenharmony_ci size1 += def1[n1++]->reg.size; 4010bf215546Sopenharmony_ci } else 4011bf215546Sopenharmony_ci if (!n1) { 4012bf215546Sopenharmony_ci addr1 += ld1->getDef(d)->reg.size; 4013bf215546Sopenharmony_ci } else { 4014bf215546Sopenharmony_ci break; 4015bf215546Sopenharmony_ci } 4016bf215546Sopenharmony_ci } 4017bf215546Sopenharmony_ci 4018bf215546Sopenharmony_ci // Scale back the size of the first load until it can be loaded. This 4019bf215546Sopenharmony_ci // typically happens for TYPE_B96 loads. 4020bf215546Sopenharmony_ci while (n1 && 4021bf215546Sopenharmony_ci !prog->getTarget()->isAccessSupported(ld1->getSrc(0)->reg.file, 4022bf215546Sopenharmony_ci typeOfSize(size1))) { 4023bf215546Sopenharmony_ci size1 -= def1[--n1]->reg.size; 4024bf215546Sopenharmony_ci d--; 4025bf215546Sopenharmony_ci } 4026bf215546Sopenharmony_ci 4027bf215546Sopenharmony_ci // Compute address/width for second load 4028bf215546Sopenharmony_ci for (addr2 = addr1 + size1; ld1->defExists(d); ++d) { 4029bf215546Sopenharmony_ci if (mask & (1 << d)) { 4030bf215546Sopenharmony_ci assert(!size2 || !(addr2 & 0x7)); 4031bf215546Sopenharmony_ci def2[n2] = ld1->getDef(d); 4032bf215546Sopenharmony_ci size2 += def2[n2++]->reg.size; 4033bf215546Sopenharmony_ci } else if (!n2) { 4034bf215546Sopenharmony_ci assert(!n2); 4035bf215546Sopenharmony_ci addr2 += ld1->getDef(d)->reg.size; 4036bf215546Sopenharmony_ci } else { 4037bf215546Sopenharmony_ci break; 4038bf215546Sopenharmony_ci } 4039bf215546Sopenharmony_ci } 4040bf215546Sopenharmony_ci 4041bf215546Sopenharmony_ci // Make sure that we've processed all the values 4042bf215546Sopenharmony_ci for (; ld1->defExists(d); ++d) 4043bf215546Sopenharmony_ci assert(!(mask & (1 << d))); 4044bf215546Sopenharmony_ci 4045bf215546Sopenharmony_ci updateLdStOffset(ld1, addr1, func); 4046bf215546Sopenharmony_ci ld1->setType(typeOfSize(size1)); 4047bf215546Sopenharmony_ci for (d = 0; d < 4; ++d) 4048bf215546Sopenharmony_ci ld1->setDef(d, (d < n1) ? def1[d] : NULL); 4049bf215546Sopenharmony_ci 4050bf215546Sopenharmony_ci if (!n2) 4051bf215546Sopenharmony_ci return; 4052bf215546Sopenharmony_ci 4053bf215546Sopenharmony_ci ld2 = cloneShallow(func, ld1); 4054bf215546Sopenharmony_ci updateLdStOffset(ld2, addr2, func); 4055bf215546Sopenharmony_ci ld2->setType(typeOfSize(size2)); 4056bf215546Sopenharmony_ci for (d = 0; d < 4; ++d) 4057bf215546Sopenharmony_ci ld2->setDef(d, (d < n2) ? def2[d] : NULL); 4058bf215546Sopenharmony_ci 4059bf215546Sopenharmony_ci ld1->bb->insertAfter(ld1, ld2); 4060bf215546Sopenharmony_ci} 4061bf215546Sopenharmony_ci 4062bf215546Sopenharmony_ci// ============================================================================= 4063bf215546Sopenharmony_ci 4064bf215546Sopenharmony_ci#define RUN_PASS(l, n, f) \ 4065bf215546Sopenharmony_ci if (level >= (l)) { \ 4066bf215546Sopenharmony_ci if (dbgFlags & NV50_IR_DEBUG_VERBOSE) \ 4067bf215546Sopenharmony_ci INFO("PEEPHOLE: %s\n", #n); \ 4068bf215546Sopenharmony_ci n pass; \ 4069bf215546Sopenharmony_ci if (!pass.f(this)) \ 4070bf215546Sopenharmony_ci return false; \ 4071bf215546Sopenharmony_ci } 4072bf215546Sopenharmony_ci 4073bf215546Sopenharmony_cibool 4074bf215546Sopenharmony_ciProgram::optimizeSSA(int level) 4075bf215546Sopenharmony_ci{ 4076bf215546Sopenharmony_ci RUN_PASS(1, DeadCodeElim, buryAll); 4077bf215546Sopenharmony_ci RUN_PASS(1, CopyPropagation, run); 4078bf215546Sopenharmony_ci RUN_PASS(1, MergeSplits, run); 4079bf215546Sopenharmony_ci RUN_PASS(2, GlobalCSE, run); 4080bf215546Sopenharmony_ci RUN_PASS(1, LocalCSE, run); 4081bf215546Sopenharmony_ci RUN_PASS(2, AlgebraicOpt, run); 4082bf215546Sopenharmony_ci RUN_PASS(2, ModifierFolding, run); // before load propagation -> less checks 4083bf215546Sopenharmony_ci RUN_PASS(1, ConstantFolding, foldAll); 4084bf215546Sopenharmony_ci RUN_PASS(0, Split64BitOpPreRA, run); 4085bf215546Sopenharmony_ci RUN_PASS(2, LateAlgebraicOpt, run); 4086bf215546Sopenharmony_ci RUN_PASS(1, LoadPropagation, run); 4087bf215546Sopenharmony_ci RUN_PASS(1, IndirectPropagation, run); 4088bf215546Sopenharmony_ci RUN_PASS(2, MemoryOpt, run); 4089bf215546Sopenharmony_ci RUN_PASS(2, LocalCSE, run); 4090bf215546Sopenharmony_ci RUN_PASS(0, DeadCodeElim, buryAll); 4091bf215546Sopenharmony_ci 4092bf215546Sopenharmony_ci return true; 4093bf215546Sopenharmony_ci} 4094bf215546Sopenharmony_ci 4095bf215546Sopenharmony_cibool 4096bf215546Sopenharmony_ciProgram::optimizePostRA(int level) 4097bf215546Sopenharmony_ci{ 4098bf215546Sopenharmony_ci RUN_PASS(2, FlatteningPass, run); 4099bf215546Sopenharmony_ci RUN_PASS(2, PostRaLoadPropagation, run); 4100bf215546Sopenharmony_ci 4101bf215546Sopenharmony_ci return true; 4102bf215546Sopenharmony_ci} 4103bf215546Sopenharmony_ci 4104bf215546Sopenharmony_ci} 4105