1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2011 Christoph Bumiller
3bf215546Sopenharmony_ci *           2014 Red Hat Inc.
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
8bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
10bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
11bf215546Sopenharmony_ci *
12bf215546Sopenharmony_ci * The above copyright notice and this permission notice shall be included in
13bf215546Sopenharmony_ci * all copies or substantial portions of the Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19bf215546Sopenharmony_ci * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20bf215546Sopenharmony_ci * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21bf215546Sopenharmony_ci * OTHER DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "nv50_ir_target_gm107.h"
25bf215546Sopenharmony_ci#include "nv50_ir_lowering_gm107.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_cinamespace nv50_ir {
28bf215546Sopenharmony_ci
29bf215546Sopenharmony_ciTarget *getTargetGM107(unsigned int chipset)
30bf215546Sopenharmony_ci{
31bf215546Sopenharmony_ci   return new TargetGM107(chipset);
32bf215546Sopenharmony_ci}
33bf215546Sopenharmony_ci
34bf215546Sopenharmony_ci// BULTINS / LIBRARY FUNCTIONS:
35bf215546Sopenharmony_ci
36bf215546Sopenharmony_ci// lazyness -> will just hardcode everything for the time being
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_ci#include "lib/gm107.asm.h"
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_civoid
41bf215546Sopenharmony_ciTargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
42bf215546Sopenharmony_ci{
43bf215546Sopenharmony_ci   *code = (const uint32_t *)&gm107_builtin_code[0];
44bf215546Sopenharmony_ci   *size = sizeof(gm107_builtin_code);
45bf215546Sopenharmony_ci}
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ciuint32_t
48bf215546Sopenharmony_ciTargetGM107::getBuiltinOffset(int builtin) const
49bf215546Sopenharmony_ci{
50bf215546Sopenharmony_ci   assert(builtin < NVC0_BUILTIN_COUNT);
51bf215546Sopenharmony_ci   return gm107_builtin_offsets[builtin];
52bf215546Sopenharmony_ci}
53bf215546Sopenharmony_ci
54bf215546Sopenharmony_cibool
55bf215546Sopenharmony_ciTargetGM107::isOpSupported(operation op, DataType ty) const
56bf215546Sopenharmony_ci{
57bf215546Sopenharmony_ci   switch (op) {
58bf215546Sopenharmony_ci   case OP_SAD:
59bf215546Sopenharmony_ci   case OP_POW:
60bf215546Sopenharmony_ci   case OP_DIV:
61bf215546Sopenharmony_ci   case OP_MOD:
62bf215546Sopenharmony_ci      return false;
63bf215546Sopenharmony_ci   case OP_SQRT:
64bf215546Sopenharmony_ci      if (ty == TYPE_F64)
65bf215546Sopenharmony_ci         return false;
66bf215546Sopenharmony_ci      return chipset >= NVISA_GM200_CHIPSET;
67bf215546Sopenharmony_ci   case OP_XMAD:
68bf215546Sopenharmony_ci      if (isFloatType(ty))
69bf215546Sopenharmony_ci         return false;
70bf215546Sopenharmony_ci      break;
71bf215546Sopenharmony_ci   default:
72bf215546Sopenharmony_ci      break;
73bf215546Sopenharmony_ci   }
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci   return true;
76bf215546Sopenharmony_ci}
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci// Return true when an instruction supports the reuse flag. When supported, the
79bf215546Sopenharmony_ci// hardware will use the operand reuse cache introduced since Maxwell, which
80bf215546Sopenharmony_ci// should try to reduce bank conflicts by caching values for the subsequent
81bf215546Sopenharmony_ci// instructions. Note that the next instructions have to use the same GPR id in
82bf215546Sopenharmony_ci// the same operand slot.
83bf215546Sopenharmony_cibool
84bf215546Sopenharmony_ciTargetGM107::isReuseSupported(const Instruction *insn) const
85bf215546Sopenharmony_ci{
86bf215546Sopenharmony_ci   const OpClass cl = getOpClass(insn->op);
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci   // TODO: double-check!
89bf215546Sopenharmony_ci   switch (cl) {
90bf215546Sopenharmony_ci   case OPCLASS_ARITH:
91bf215546Sopenharmony_ci   case OPCLASS_COMPARE:
92bf215546Sopenharmony_ci   case OPCLASS_LOGIC:
93bf215546Sopenharmony_ci   case OPCLASS_MOVE:
94bf215546Sopenharmony_ci   case OPCLASS_SHIFT:
95bf215546Sopenharmony_ci      return true;
96bf215546Sopenharmony_ci   case OPCLASS_BITFIELD:
97bf215546Sopenharmony_ci      if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
98bf215546Sopenharmony_ci         return true;
99bf215546Sopenharmony_ci      break;
100bf215546Sopenharmony_ci   default:
101bf215546Sopenharmony_ci      break;
102bf215546Sopenharmony_ci   }
103bf215546Sopenharmony_ci   return false;
104bf215546Sopenharmony_ci}
105bf215546Sopenharmony_ci
106bf215546Sopenharmony_ci// Return true when an instruction requires to set up a barrier because it
107bf215546Sopenharmony_ci// doesn't operate at a fixed latency. Variable latency instructions are memory
108bf215546Sopenharmony_ci// operations, double precision operations, special function unit operations
109bf215546Sopenharmony_ci// and other low throughput instructions.
110bf215546Sopenharmony_cibool
111bf215546Sopenharmony_ciTargetGM107::isBarrierRequired(const Instruction *insn) const
112bf215546Sopenharmony_ci{
113bf215546Sopenharmony_ci   const OpClass cl = getOpClass(insn->op);
114bf215546Sopenharmony_ci
115bf215546Sopenharmony_ci   if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
116bf215546Sopenharmony_ci      return true;
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci   switch (cl) {
119bf215546Sopenharmony_ci   case OPCLASS_ATOMIC:
120bf215546Sopenharmony_ci   case OPCLASS_LOAD:
121bf215546Sopenharmony_ci   case OPCLASS_STORE:
122bf215546Sopenharmony_ci   case OPCLASS_SURFACE:
123bf215546Sopenharmony_ci   case OPCLASS_TEXTURE:
124bf215546Sopenharmony_ci      return true;
125bf215546Sopenharmony_ci   case OPCLASS_SFU:
126bf215546Sopenharmony_ci      switch (insn->op) {
127bf215546Sopenharmony_ci      case OP_COS:
128bf215546Sopenharmony_ci      case OP_EX2:
129bf215546Sopenharmony_ci      case OP_LG2:
130bf215546Sopenharmony_ci      case OP_LINTERP:
131bf215546Sopenharmony_ci      case OP_PINTERP:
132bf215546Sopenharmony_ci      case OP_RCP:
133bf215546Sopenharmony_ci      case OP_RSQ:
134bf215546Sopenharmony_ci      case OP_SIN:
135bf215546Sopenharmony_ci      case OP_SQRT:
136bf215546Sopenharmony_ci         return true;
137bf215546Sopenharmony_ci      default:
138bf215546Sopenharmony_ci         break;
139bf215546Sopenharmony_ci      }
140bf215546Sopenharmony_ci      break;
141bf215546Sopenharmony_ci   case OPCLASS_BITFIELD:
142bf215546Sopenharmony_ci      switch (insn->op) {
143bf215546Sopenharmony_ci      case OP_BFIND:
144bf215546Sopenharmony_ci      case OP_POPCNT:
145bf215546Sopenharmony_ci         return true;
146bf215546Sopenharmony_ci      default:
147bf215546Sopenharmony_ci         break;
148bf215546Sopenharmony_ci      }
149bf215546Sopenharmony_ci      break;
150bf215546Sopenharmony_ci   case OPCLASS_CONTROL:
151bf215546Sopenharmony_ci      switch (insn->op) {
152bf215546Sopenharmony_ci      case OP_EMIT:
153bf215546Sopenharmony_ci      case OP_RESTART:
154bf215546Sopenharmony_ci         return true;
155bf215546Sopenharmony_ci      default:
156bf215546Sopenharmony_ci         break;
157bf215546Sopenharmony_ci      }
158bf215546Sopenharmony_ci      break;
159bf215546Sopenharmony_ci   case OPCLASS_OTHER:
160bf215546Sopenharmony_ci      switch (insn->op) {
161bf215546Sopenharmony_ci      case OP_AFETCH:
162bf215546Sopenharmony_ci      case OP_PFETCH:
163bf215546Sopenharmony_ci      case OP_PIXLD:
164bf215546Sopenharmony_ci      case OP_SHFL:
165bf215546Sopenharmony_ci         return true;
166bf215546Sopenharmony_ci      case OP_RDSV:
167bf215546Sopenharmony_ci         return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
168bf215546Sopenharmony_ci      default:
169bf215546Sopenharmony_ci         break;
170bf215546Sopenharmony_ci      }
171bf215546Sopenharmony_ci      break;
172bf215546Sopenharmony_ci   case OPCLASS_ARITH:
173bf215546Sopenharmony_ci      if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
174bf215546Sopenharmony_ci          !isFloatType(insn->dType))
175bf215546Sopenharmony_ci         return true;
176bf215546Sopenharmony_ci      break;
177bf215546Sopenharmony_ci   case OPCLASS_CONVERT:
178bf215546Sopenharmony_ci      if (insn->def(0).getFile() != FILE_PREDICATE &&
179bf215546Sopenharmony_ci          insn->src(0).getFile() != FILE_PREDICATE)
180bf215546Sopenharmony_ci         return true;
181bf215546Sopenharmony_ci      break;
182bf215546Sopenharmony_ci   default:
183bf215546Sopenharmony_ci      break;
184bf215546Sopenharmony_ci   }
185bf215546Sopenharmony_ci   return false;
186bf215546Sopenharmony_ci}
187bf215546Sopenharmony_ci
188bf215546Sopenharmony_cibool
189bf215546Sopenharmony_ciTargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
190bf215546Sopenharmony_ci{
191bf215546Sopenharmony_ci   // TODO
192bf215546Sopenharmony_ci   return false;
193bf215546Sopenharmony_ci}
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci// Return the number of stall counts needed to complete a single instruction.
196bf215546Sopenharmony_ci// On Maxwell GPUs, the pipeline depth is 6, but some instructions require
197bf215546Sopenharmony_ci// different number of stall counts like memory operations.
198bf215546Sopenharmony_ciint
199bf215546Sopenharmony_ciTargetGM107::getLatency(const Instruction *insn) const
200bf215546Sopenharmony_ci{
201bf215546Sopenharmony_ci   // TODO: better values! This should be good enough for now though.
202bf215546Sopenharmony_ci   switch (insn->op) {
203bf215546Sopenharmony_ci   case OP_EMIT:
204bf215546Sopenharmony_ci   case OP_EXPORT:
205bf215546Sopenharmony_ci   case OP_PIXLD:
206bf215546Sopenharmony_ci   case OP_RESTART:
207bf215546Sopenharmony_ci   case OP_STORE:
208bf215546Sopenharmony_ci   case OP_SUSTB:
209bf215546Sopenharmony_ci   case OP_SUSTP:
210bf215546Sopenharmony_ci      return 1;
211bf215546Sopenharmony_ci   case OP_SHFL:
212bf215546Sopenharmony_ci      return 2;
213bf215546Sopenharmony_ci   case OP_ADD:
214bf215546Sopenharmony_ci   case OP_AND:
215bf215546Sopenharmony_ci   case OP_EXTBF:
216bf215546Sopenharmony_ci   case OP_FMA:
217bf215546Sopenharmony_ci   case OP_INSBF:
218bf215546Sopenharmony_ci   case OP_MAD:
219bf215546Sopenharmony_ci   case OP_MAX:
220bf215546Sopenharmony_ci   case OP_MIN:
221bf215546Sopenharmony_ci   case OP_MOV:
222bf215546Sopenharmony_ci   case OP_MUL:
223bf215546Sopenharmony_ci   case OP_NOT:
224bf215546Sopenharmony_ci   case OP_OR:
225bf215546Sopenharmony_ci   case OP_PREEX2:
226bf215546Sopenharmony_ci   case OP_PRESIN:
227bf215546Sopenharmony_ci   case OP_QUADOP:
228bf215546Sopenharmony_ci   case OP_SELP:
229bf215546Sopenharmony_ci   case OP_SET:
230bf215546Sopenharmony_ci   case OP_SET_AND:
231bf215546Sopenharmony_ci   case OP_SET_OR:
232bf215546Sopenharmony_ci   case OP_SET_XOR:
233bf215546Sopenharmony_ci   case OP_SHL:
234bf215546Sopenharmony_ci   case OP_SHLADD:
235bf215546Sopenharmony_ci   case OP_SHR:
236bf215546Sopenharmony_ci   case OP_SLCT:
237bf215546Sopenharmony_ci   case OP_SUB:
238bf215546Sopenharmony_ci   case OP_VOTE:
239bf215546Sopenharmony_ci   case OP_XOR:
240bf215546Sopenharmony_ci   case OP_XMAD:
241bf215546Sopenharmony_ci      if (insn->dType != TYPE_F64)
242bf215546Sopenharmony_ci         return 6;
243bf215546Sopenharmony_ci      break;
244bf215546Sopenharmony_ci   case OP_RDSV:
245bf215546Sopenharmony_ci      return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
246bf215546Sopenharmony_ci   case OP_ABS:
247bf215546Sopenharmony_ci   case OP_CEIL:
248bf215546Sopenharmony_ci   case OP_CVT:
249bf215546Sopenharmony_ci   case OP_FLOOR:
250bf215546Sopenharmony_ci   case OP_NEG:
251bf215546Sopenharmony_ci   case OP_SAT:
252bf215546Sopenharmony_ci   case OP_TRUNC:
253bf215546Sopenharmony_ci      if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
254bf215546Sopenharmony_ci                                 insn->src(0).getFile() == FILE_PREDICATE))
255bf215546Sopenharmony_ci         return 6;
256bf215546Sopenharmony_ci      break;
257bf215546Sopenharmony_ci   case OP_BFIND:
258bf215546Sopenharmony_ci   case OP_COS:
259bf215546Sopenharmony_ci   case OP_EX2:
260bf215546Sopenharmony_ci   case OP_LG2:
261bf215546Sopenharmony_ci   case OP_POPCNT:
262bf215546Sopenharmony_ci   case OP_QUADON:
263bf215546Sopenharmony_ci   case OP_QUADPOP:
264bf215546Sopenharmony_ci   case OP_RCP:
265bf215546Sopenharmony_ci   case OP_RSQ:
266bf215546Sopenharmony_ci   case OP_SIN:
267bf215546Sopenharmony_ci   case OP_SQRT:
268bf215546Sopenharmony_ci      return 13;
269bf215546Sopenharmony_ci   default:
270bf215546Sopenharmony_ci      break;
271bf215546Sopenharmony_ci   }
272bf215546Sopenharmony_ci   // Use the maximum number of stall counts for other instructions.
273bf215546Sopenharmony_ci   return 15;
274bf215546Sopenharmony_ci}
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci// Return the operand read latency which is the number of stall counts before
277bf215546Sopenharmony_ci// an instruction can read its sources. For memory operations like ATOM, LOAD
278bf215546Sopenharmony_ci// and STORE, the memory access has to be indirect.
279bf215546Sopenharmony_ciint
280bf215546Sopenharmony_ciTargetGM107::getReadLatency(const Instruction *insn) const
281bf215546Sopenharmony_ci{
282bf215546Sopenharmony_ci   switch (insn->op) {
283bf215546Sopenharmony_ci   case OP_ABS:
284bf215546Sopenharmony_ci   case OP_BFIND:
285bf215546Sopenharmony_ci   case OP_CEIL:
286bf215546Sopenharmony_ci   case OP_COS:
287bf215546Sopenharmony_ci   case OP_EX2:
288bf215546Sopenharmony_ci   case OP_FLOOR:
289bf215546Sopenharmony_ci   case OP_LG2:
290bf215546Sopenharmony_ci   case OP_NEG:
291bf215546Sopenharmony_ci   case OP_POPCNT:
292bf215546Sopenharmony_ci   case OP_RCP:
293bf215546Sopenharmony_ci   case OP_RSQ:
294bf215546Sopenharmony_ci   case OP_SAT:
295bf215546Sopenharmony_ci   case OP_SIN:
296bf215546Sopenharmony_ci   case OP_SQRT:
297bf215546Sopenharmony_ci   case OP_SULDB:
298bf215546Sopenharmony_ci   case OP_SULDP:
299bf215546Sopenharmony_ci   case OP_SUREDB:
300bf215546Sopenharmony_ci   case OP_SUREDP:
301bf215546Sopenharmony_ci   case OP_SUSTB:
302bf215546Sopenharmony_ci   case OP_SUSTP:
303bf215546Sopenharmony_ci   case OP_TRUNC:
304bf215546Sopenharmony_ci      return 4;
305bf215546Sopenharmony_ci   case OP_CVT:
306bf215546Sopenharmony_ci      if (insn->def(0).getFile() != FILE_PREDICATE &&
307bf215546Sopenharmony_ci          insn->src(0).getFile() != FILE_PREDICATE)
308bf215546Sopenharmony_ci         return 4;
309bf215546Sopenharmony_ci      break;
310bf215546Sopenharmony_ci   case OP_ATOM:
311bf215546Sopenharmony_ci   case OP_LOAD:
312bf215546Sopenharmony_ci   case OP_STORE:
313bf215546Sopenharmony_ci      if (insn->src(0).isIndirect(0)) {
314bf215546Sopenharmony_ci         switch (insn->src(0).getFile()) {
315bf215546Sopenharmony_ci         case FILE_MEMORY_SHARED:
316bf215546Sopenharmony_ci         case FILE_MEMORY_CONST:
317bf215546Sopenharmony_ci            return 2;
318bf215546Sopenharmony_ci         case FILE_MEMORY_GLOBAL:
319bf215546Sopenharmony_ci         case FILE_MEMORY_LOCAL:
320bf215546Sopenharmony_ci            return 4;
321bf215546Sopenharmony_ci         default:
322bf215546Sopenharmony_ci            break;
323bf215546Sopenharmony_ci         }
324bf215546Sopenharmony_ci      }
325bf215546Sopenharmony_ci      break;
326bf215546Sopenharmony_ci   case OP_EXPORT:
327bf215546Sopenharmony_ci   case OP_PFETCH:
328bf215546Sopenharmony_ci   case OP_SHFL:
329bf215546Sopenharmony_ci   case OP_VFETCH:
330bf215546Sopenharmony_ci      return 2;
331bf215546Sopenharmony_ci   default:
332bf215546Sopenharmony_ci      break;
333bf215546Sopenharmony_ci   }
334bf215546Sopenharmony_ci   return 0;
335bf215546Sopenharmony_ci}
336bf215546Sopenharmony_ci
337bf215546Sopenharmony_cibool
338bf215546Sopenharmony_ciTargetGM107::isCS2RSV(SVSemantic sv) const
339bf215546Sopenharmony_ci{
340bf215546Sopenharmony_ci   return sv == SV_CLOCK;
341bf215546Sopenharmony_ci}
342bf215546Sopenharmony_ci
343bf215546Sopenharmony_cibool
344bf215546Sopenharmony_ciTargetGM107::runLegalizePass(Program *prog, CGStage stage) const
345bf215546Sopenharmony_ci{
346bf215546Sopenharmony_ci   if (stage == CG_STAGE_PRE_SSA) {
347bf215546Sopenharmony_ci      GM107LoweringPass pass(prog);
348bf215546Sopenharmony_ci      return pass.run(prog, false, true);
349bf215546Sopenharmony_ci   } else
350bf215546Sopenharmony_ci   if (stage == CG_STAGE_POST_RA) {
351bf215546Sopenharmony_ci      NVC0LegalizePostRA pass(prog);
352bf215546Sopenharmony_ci      return pass.run(prog, false, true);
353bf215546Sopenharmony_ci   } else
354bf215546Sopenharmony_ci   if (stage == CG_STAGE_SSA) {
355bf215546Sopenharmony_ci      GM107LegalizeSSA pass;
356bf215546Sopenharmony_ci      return pass.run(prog, false, true);
357bf215546Sopenharmony_ci   }
358bf215546Sopenharmony_ci   return false;
359bf215546Sopenharmony_ci}
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ciCodeEmitter *
362bf215546Sopenharmony_ciTargetGM107::getCodeEmitter(Program::Type type)
363bf215546Sopenharmony_ci{
364bf215546Sopenharmony_ci   return createCodeEmitterGM107(type);
365bf215546Sopenharmony_ci}
366bf215546Sopenharmony_ci
367bf215546Sopenharmony_ci} // namespace nv50_ir
368