1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2011 Christoph Bumiller 3bf215546Sopenharmony_ci * 2014 Red Hat Inc. 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in 13bf215546Sopenharmony_ci * all copies or substantial portions of the Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 19bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 21bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "nv50_ir_target_gm107.h" 25bf215546Sopenharmony_ci#include "nv50_ir_lowering_gm107.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_cinamespace nv50_ir { 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ciTarget *getTargetGM107(unsigned int chipset) 30bf215546Sopenharmony_ci{ 31bf215546Sopenharmony_ci return new TargetGM107(chipset); 32bf215546Sopenharmony_ci} 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci// BULTINS / LIBRARY FUNCTIONS: 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci// lazyness -> will just hardcode everything for the time being 37bf215546Sopenharmony_ci 38bf215546Sopenharmony_ci#include "lib/gm107.asm.h" 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_civoid 41bf215546Sopenharmony_ciTargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const 42bf215546Sopenharmony_ci{ 43bf215546Sopenharmony_ci *code = (const uint32_t *)&gm107_builtin_code[0]; 44bf215546Sopenharmony_ci *size = sizeof(gm107_builtin_code); 45bf215546Sopenharmony_ci} 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ciuint32_t 48bf215546Sopenharmony_ciTargetGM107::getBuiltinOffset(int builtin) const 49bf215546Sopenharmony_ci{ 50bf215546Sopenharmony_ci assert(builtin < NVC0_BUILTIN_COUNT); 51bf215546Sopenharmony_ci return gm107_builtin_offsets[builtin]; 52bf215546Sopenharmony_ci} 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_cibool 55bf215546Sopenharmony_ciTargetGM107::isOpSupported(operation op, DataType ty) const 56bf215546Sopenharmony_ci{ 57bf215546Sopenharmony_ci switch (op) { 58bf215546Sopenharmony_ci case OP_SAD: 59bf215546Sopenharmony_ci case OP_POW: 60bf215546Sopenharmony_ci case OP_DIV: 61bf215546Sopenharmony_ci case OP_MOD: 62bf215546Sopenharmony_ci return false; 63bf215546Sopenharmony_ci case OP_SQRT: 64bf215546Sopenharmony_ci if (ty == TYPE_F64) 65bf215546Sopenharmony_ci return false; 66bf215546Sopenharmony_ci return chipset >= NVISA_GM200_CHIPSET; 67bf215546Sopenharmony_ci case OP_XMAD: 68bf215546Sopenharmony_ci if (isFloatType(ty)) 69bf215546Sopenharmony_ci return false; 70bf215546Sopenharmony_ci break; 71bf215546Sopenharmony_ci default: 72bf215546Sopenharmony_ci break; 73bf215546Sopenharmony_ci } 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci return true; 76bf215546Sopenharmony_ci} 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci// Return true when an instruction supports the reuse flag. When supported, the 79bf215546Sopenharmony_ci// hardware will use the operand reuse cache introduced since Maxwell, which 80bf215546Sopenharmony_ci// should try to reduce bank conflicts by caching values for the subsequent 81bf215546Sopenharmony_ci// instructions. Note that the next instructions have to use the same GPR id in 82bf215546Sopenharmony_ci// the same operand slot. 83bf215546Sopenharmony_cibool 84bf215546Sopenharmony_ciTargetGM107::isReuseSupported(const Instruction *insn) const 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci const OpClass cl = getOpClass(insn->op); 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci // TODO: double-check! 89bf215546Sopenharmony_ci switch (cl) { 90bf215546Sopenharmony_ci case OPCLASS_ARITH: 91bf215546Sopenharmony_ci case OPCLASS_COMPARE: 92bf215546Sopenharmony_ci case OPCLASS_LOGIC: 93bf215546Sopenharmony_ci case OPCLASS_MOVE: 94bf215546Sopenharmony_ci case OPCLASS_SHIFT: 95bf215546Sopenharmony_ci return true; 96bf215546Sopenharmony_ci case OPCLASS_BITFIELD: 97bf215546Sopenharmony_ci if (insn->op == OP_INSBF || insn->op == OP_EXTBF) 98bf215546Sopenharmony_ci return true; 99bf215546Sopenharmony_ci break; 100bf215546Sopenharmony_ci default: 101bf215546Sopenharmony_ci break; 102bf215546Sopenharmony_ci } 103bf215546Sopenharmony_ci return false; 104bf215546Sopenharmony_ci} 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci// Return true when an instruction requires to set up a barrier because it 107bf215546Sopenharmony_ci// doesn't operate at a fixed latency. Variable latency instructions are memory 108bf215546Sopenharmony_ci// operations, double precision operations, special function unit operations 109bf215546Sopenharmony_ci// and other low throughput instructions. 110bf215546Sopenharmony_cibool 111bf215546Sopenharmony_ciTargetGM107::isBarrierRequired(const Instruction *insn) const 112bf215546Sopenharmony_ci{ 113bf215546Sopenharmony_ci const OpClass cl = getOpClass(insn->op); 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64) 116bf215546Sopenharmony_ci return true; 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci switch (cl) { 119bf215546Sopenharmony_ci case OPCLASS_ATOMIC: 120bf215546Sopenharmony_ci case OPCLASS_LOAD: 121bf215546Sopenharmony_ci case OPCLASS_STORE: 122bf215546Sopenharmony_ci case OPCLASS_SURFACE: 123bf215546Sopenharmony_ci case OPCLASS_TEXTURE: 124bf215546Sopenharmony_ci return true; 125bf215546Sopenharmony_ci case OPCLASS_SFU: 126bf215546Sopenharmony_ci switch (insn->op) { 127bf215546Sopenharmony_ci case OP_COS: 128bf215546Sopenharmony_ci case OP_EX2: 129bf215546Sopenharmony_ci case OP_LG2: 130bf215546Sopenharmony_ci case OP_LINTERP: 131bf215546Sopenharmony_ci case OP_PINTERP: 132bf215546Sopenharmony_ci case OP_RCP: 133bf215546Sopenharmony_ci case OP_RSQ: 134bf215546Sopenharmony_ci case OP_SIN: 135bf215546Sopenharmony_ci case OP_SQRT: 136bf215546Sopenharmony_ci return true; 137bf215546Sopenharmony_ci default: 138bf215546Sopenharmony_ci break; 139bf215546Sopenharmony_ci } 140bf215546Sopenharmony_ci break; 141bf215546Sopenharmony_ci case OPCLASS_BITFIELD: 142bf215546Sopenharmony_ci switch (insn->op) { 143bf215546Sopenharmony_ci case OP_BFIND: 144bf215546Sopenharmony_ci case OP_POPCNT: 145bf215546Sopenharmony_ci return true; 146bf215546Sopenharmony_ci default: 147bf215546Sopenharmony_ci break; 148bf215546Sopenharmony_ci } 149bf215546Sopenharmony_ci break; 150bf215546Sopenharmony_ci case OPCLASS_CONTROL: 151bf215546Sopenharmony_ci switch (insn->op) { 152bf215546Sopenharmony_ci case OP_EMIT: 153bf215546Sopenharmony_ci case OP_RESTART: 154bf215546Sopenharmony_ci return true; 155bf215546Sopenharmony_ci default: 156bf215546Sopenharmony_ci break; 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci break; 159bf215546Sopenharmony_ci case OPCLASS_OTHER: 160bf215546Sopenharmony_ci switch (insn->op) { 161bf215546Sopenharmony_ci case OP_AFETCH: 162bf215546Sopenharmony_ci case OP_PFETCH: 163bf215546Sopenharmony_ci case OP_PIXLD: 164bf215546Sopenharmony_ci case OP_SHFL: 165bf215546Sopenharmony_ci return true; 166bf215546Sopenharmony_ci case OP_RDSV: 167bf215546Sopenharmony_ci return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv); 168bf215546Sopenharmony_ci default: 169bf215546Sopenharmony_ci break; 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci break; 172bf215546Sopenharmony_ci case OPCLASS_ARITH: 173bf215546Sopenharmony_ci if ((insn->op == OP_MUL || insn->op == OP_MAD) && 174bf215546Sopenharmony_ci !isFloatType(insn->dType)) 175bf215546Sopenharmony_ci return true; 176bf215546Sopenharmony_ci break; 177bf215546Sopenharmony_ci case OPCLASS_CONVERT: 178bf215546Sopenharmony_ci if (insn->def(0).getFile() != FILE_PREDICATE && 179bf215546Sopenharmony_ci insn->src(0).getFile() != FILE_PREDICATE) 180bf215546Sopenharmony_ci return true; 181bf215546Sopenharmony_ci break; 182bf215546Sopenharmony_ci default: 183bf215546Sopenharmony_ci break; 184bf215546Sopenharmony_ci } 185bf215546Sopenharmony_ci return false; 186bf215546Sopenharmony_ci} 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_cibool 189bf215546Sopenharmony_ciTargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const 190bf215546Sopenharmony_ci{ 191bf215546Sopenharmony_ci // TODO 192bf215546Sopenharmony_ci return false; 193bf215546Sopenharmony_ci} 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_ci// Return the number of stall counts needed to complete a single instruction. 196bf215546Sopenharmony_ci// On Maxwell GPUs, the pipeline depth is 6, but some instructions require 197bf215546Sopenharmony_ci// different number of stall counts like memory operations. 198bf215546Sopenharmony_ciint 199bf215546Sopenharmony_ciTargetGM107::getLatency(const Instruction *insn) const 200bf215546Sopenharmony_ci{ 201bf215546Sopenharmony_ci // TODO: better values! This should be good enough for now though. 202bf215546Sopenharmony_ci switch (insn->op) { 203bf215546Sopenharmony_ci case OP_EMIT: 204bf215546Sopenharmony_ci case OP_EXPORT: 205bf215546Sopenharmony_ci case OP_PIXLD: 206bf215546Sopenharmony_ci case OP_RESTART: 207bf215546Sopenharmony_ci case OP_STORE: 208bf215546Sopenharmony_ci case OP_SUSTB: 209bf215546Sopenharmony_ci case OP_SUSTP: 210bf215546Sopenharmony_ci return 1; 211bf215546Sopenharmony_ci case OP_SHFL: 212bf215546Sopenharmony_ci return 2; 213bf215546Sopenharmony_ci case OP_ADD: 214bf215546Sopenharmony_ci case OP_AND: 215bf215546Sopenharmony_ci case OP_EXTBF: 216bf215546Sopenharmony_ci case OP_FMA: 217bf215546Sopenharmony_ci case OP_INSBF: 218bf215546Sopenharmony_ci case OP_MAD: 219bf215546Sopenharmony_ci case OP_MAX: 220bf215546Sopenharmony_ci case OP_MIN: 221bf215546Sopenharmony_ci case OP_MOV: 222bf215546Sopenharmony_ci case OP_MUL: 223bf215546Sopenharmony_ci case OP_NOT: 224bf215546Sopenharmony_ci case OP_OR: 225bf215546Sopenharmony_ci case OP_PREEX2: 226bf215546Sopenharmony_ci case OP_PRESIN: 227bf215546Sopenharmony_ci case OP_QUADOP: 228bf215546Sopenharmony_ci case OP_SELP: 229bf215546Sopenharmony_ci case OP_SET: 230bf215546Sopenharmony_ci case OP_SET_AND: 231bf215546Sopenharmony_ci case OP_SET_OR: 232bf215546Sopenharmony_ci case OP_SET_XOR: 233bf215546Sopenharmony_ci case OP_SHL: 234bf215546Sopenharmony_ci case OP_SHLADD: 235bf215546Sopenharmony_ci case OP_SHR: 236bf215546Sopenharmony_ci case OP_SLCT: 237bf215546Sopenharmony_ci case OP_SUB: 238bf215546Sopenharmony_ci case OP_VOTE: 239bf215546Sopenharmony_ci case OP_XOR: 240bf215546Sopenharmony_ci case OP_XMAD: 241bf215546Sopenharmony_ci if (insn->dType != TYPE_F64) 242bf215546Sopenharmony_ci return 6; 243bf215546Sopenharmony_ci break; 244bf215546Sopenharmony_ci case OP_RDSV: 245bf215546Sopenharmony_ci return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15; 246bf215546Sopenharmony_ci case OP_ABS: 247bf215546Sopenharmony_ci case OP_CEIL: 248bf215546Sopenharmony_ci case OP_CVT: 249bf215546Sopenharmony_ci case OP_FLOOR: 250bf215546Sopenharmony_ci case OP_NEG: 251bf215546Sopenharmony_ci case OP_SAT: 252bf215546Sopenharmony_ci case OP_TRUNC: 253bf215546Sopenharmony_ci if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE || 254bf215546Sopenharmony_ci insn->src(0).getFile() == FILE_PREDICATE)) 255bf215546Sopenharmony_ci return 6; 256bf215546Sopenharmony_ci break; 257bf215546Sopenharmony_ci case OP_BFIND: 258bf215546Sopenharmony_ci case OP_COS: 259bf215546Sopenharmony_ci case OP_EX2: 260bf215546Sopenharmony_ci case OP_LG2: 261bf215546Sopenharmony_ci case OP_POPCNT: 262bf215546Sopenharmony_ci case OP_QUADON: 263bf215546Sopenharmony_ci case OP_QUADPOP: 264bf215546Sopenharmony_ci case OP_RCP: 265bf215546Sopenharmony_ci case OP_RSQ: 266bf215546Sopenharmony_ci case OP_SIN: 267bf215546Sopenharmony_ci case OP_SQRT: 268bf215546Sopenharmony_ci return 13; 269bf215546Sopenharmony_ci default: 270bf215546Sopenharmony_ci break; 271bf215546Sopenharmony_ci } 272bf215546Sopenharmony_ci // Use the maximum number of stall counts for other instructions. 273bf215546Sopenharmony_ci return 15; 274bf215546Sopenharmony_ci} 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci// Return the operand read latency which is the number of stall counts before 277bf215546Sopenharmony_ci// an instruction can read its sources. For memory operations like ATOM, LOAD 278bf215546Sopenharmony_ci// and STORE, the memory access has to be indirect. 279bf215546Sopenharmony_ciint 280bf215546Sopenharmony_ciTargetGM107::getReadLatency(const Instruction *insn) const 281bf215546Sopenharmony_ci{ 282bf215546Sopenharmony_ci switch (insn->op) { 283bf215546Sopenharmony_ci case OP_ABS: 284bf215546Sopenharmony_ci case OP_BFIND: 285bf215546Sopenharmony_ci case OP_CEIL: 286bf215546Sopenharmony_ci case OP_COS: 287bf215546Sopenharmony_ci case OP_EX2: 288bf215546Sopenharmony_ci case OP_FLOOR: 289bf215546Sopenharmony_ci case OP_LG2: 290bf215546Sopenharmony_ci case OP_NEG: 291bf215546Sopenharmony_ci case OP_POPCNT: 292bf215546Sopenharmony_ci case OP_RCP: 293bf215546Sopenharmony_ci case OP_RSQ: 294bf215546Sopenharmony_ci case OP_SAT: 295bf215546Sopenharmony_ci case OP_SIN: 296bf215546Sopenharmony_ci case OP_SQRT: 297bf215546Sopenharmony_ci case OP_SULDB: 298bf215546Sopenharmony_ci case OP_SULDP: 299bf215546Sopenharmony_ci case OP_SUREDB: 300bf215546Sopenharmony_ci case OP_SUREDP: 301bf215546Sopenharmony_ci case OP_SUSTB: 302bf215546Sopenharmony_ci case OP_SUSTP: 303bf215546Sopenharmony_ci case OP_TRUNC: 304bf215546Sopenharmony_ci return 4; 305bf215546Sopenharmony_ci case OP_CVT: 306bf215546Sopenharmony_ci if (insn->def(0).getFile() != FILE_PREDICATE && 307bf215546Sopenharmony_ci insn->src(0).getFile() != FILE_PREDICATE) 308bf215546Sopenharmony_ci return 4; 309bf215546Sopenharmony_ci break; 310bf215546Sopenharmony_ci case OP_ATOM: 311bf215546Sopenharmony_ci case OP_LOAD: 312bf215546Sopenharmony_ci case OP_STORE: 313bf215546Sopenharmony_ci if (insn->src(0).isIndirect(0)) { 314bf215546Sopenharmony_ci switch (insn->src(0).getFile()) { 315bf215546Sopenharmony_ci case FILE_MEMORY_SHARED: 316bf215546Sopenharmony_ci case FILE_MEMORY_CONST: 317bf215546Sopenharmony_ci return 2; 318bf215546Sopenharmony_ci case FILE_MEMORY_GLOBAL: 319bf215546Sopenharmony_ci case FILE_MEMORY_LOCAL: 320bf215546Sopenharmony_ci return 4; 321bf215546Sopenharmony_ci default: 322bf215546Sopenharmony_ci break; 323bf215546Sopenharmony_ci } 324bf215546Sopenharmony_ci } 325bf215546Sopenharmony_ci break; 326bf215546Sopenharmony_ci case OP_EXPORT: 327bf215546Sopenharmony_ci case OP_PFETCH: 328bf215546Sopenharmony_ci case OP_SHFL: 329bf215546Sopenharmony_ci case OP_VFETCH: 330bf215546Sopenharmony_ci return 2; 331bf215546Sopenharmony_ci default: 332bf215546Sopenharmony_ci break; 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci return 0; 335bf215546Sopenharmony_ci} 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_cibool 338bf215546Sopenharmony_ciTargetGM107::isCS2RSV(SVSemantic sv) const 339bf215546Sopenharmony_ci{ 340bf215546Sopenharmony_ci return sv == SV_CLOCK; 341bf215546Sopenharmony_ci} 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_cibool 344bf215546Sopenharmony_ciTargetGM107::runLegalizePass(Program *prog, CGStage stage) const 345bf215546Sopenharmony_ci{ 346bf215546Sopenharmony_ci if (stage == CG_STAGE_PRE_SSA) { 347bf215546Sopenharmony_ci GM107LoweringPass pass(prog); 348bf215546Sopenharmony_ci return pass.run(prog, false, true); 349bf215546Sopenharmony_ci } else 350bf215546Sopenharmony_ci if (stage == CG_STAGE_POST_RA) { 351bf215546Sopenharmony_ci NVC0LegalizePostRA pass(prog); 352bf215546Sopenharmony_ci return pass.run(prog, false, true); 353bf215546Sopenharmony_ci } else 354bf215546Sopenharmony_ci if (stage == CG_STAGE_SSA) { 355bf215546Sopenharmony_ci GM107LegalizeSSA pass; 356bf215546Sopenharmony_ci return pass.run(prog, false, true); 357bf215546Sopenharmony_ci } 358bf215546Sopenharmony_ci return false; 359bf215546Sopenharmony_ci} 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ciCodeEmitter * 362bf215546Sopenharmony_ciTargetGM107::getCodeEmitter(Program::Type type) 363bf215546Sopenharmony_ci{ 364bf215546Sopenharmony_ci return createCodeEmitterGM107(type); 365bf215546Sopenharmony_ci} 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci} // namespace nv50_ir 368