1/*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "aco_ir.h"
26
27#ifdef LLVM_AVAILABLE
28#if defined(_MSC_VER) && defined(restrict)
29#undef restrict
30#endif
31#include "llvm/ac_llvm_util.h"
32
33#include "llvm-c/Disassembler.h"
34#include <llvm/ADT/StringRef.h>
35#include <llvm/MC/MCDisassembler/MCDisassembler.h>
36#endif
37
38#include <array>
39#include <iomanip>
40#include <vector>
41
42namespace aco {
43namespace {
44
45std::vector<bool>
46get_referenced_blocks(Program* program)
47{
48   std::vector<bool> referenced_blocks(program->blocks.size());
49   referenced_blocks[0] = true;
50   for (Block& block : program->blocks) {
51      for (unsigned succ : block.linear_succs)
52         referenced_blocks[succ] = true;
53   }
54   return referenced_blocks;
55}
56
57void
58print_block_markers(FILE* output, Program* program, const std::vector<bool>& referenced_blocks,
59                    unsigned* next_block, unsigned pos)
60{
61   while (*next_block < program->blocks.size() && pos == program->blocks[*next_block].offset) {
62      if (referenced_blocks[*next_block])
63         fprintf(output, "BB%u:\n", *next_block);
64      (*next_block)++;
65   }
66}
67
68void
69print_instr(FILE* output, const std::vector<uint32_t>& binary, char* instr, unsigned size,
70            unsigned pos)
71{
72   fprintf(output, "%-60s ;", instr);
73
74   for (unsigned i = 0; i < size; i++)
75      fprintf(output, " %.8x", binary[pos + i]);
76   fputc('\n', output);
77}
78
79void
80print_constant_data(FILE* output, Program* program)
81{
82   if (program->constant_data.empty())
83      return;
84
85   fputs("\n/* constant data */\n", output);
86   for (unsigned i = 0; i < program->constant_data.size(); i += 32) {
87      fprintf(output, "[%.6u]", i);
88      unsigned line_size = std::min<size_t>(program->constant_data.size() - i, 32);
89      for (unsigned j = 0; j < line_size; j += 4) {
90         unsigned size = std::min<size_t>(program->constant_data.size() - (i + j), 4);
91         uint32_t v = 0;
92         memcpy(&v, &program->constant_data[i + j], size);
93         fprintf(output, " %.8x", v);
94      }
95      fputc('\n', output);
96   }
97}
98
99/**
100 * Determines the GPU type to use for CLRXdisasm
101 */
102const char*
103to_clrx_device_name(amd_gfx_level gfx_level, radeon_family family)
104{
105   switch (gfx_level) {
106   case GFX6:
107      switch (family) {
108      case CHIP_TAHITI: return "tahiti";
109      case CHIP_PITCAIRN: return "pitcairn";
110      case CHIP_VERDE: return "capeverde";
111      case CHIP_OLAND: return "oland";
112      case CHIP_HAINAN: return "hainan";
113      default: return nullptr;
114      }
115   case GFX7:
116      switch (family) {
117      case CHIP_BONAIRE: return "bonaire";
118      case CHIP_KAVERI: return "gfx700";
119      case CHIP_HAWAII: return "hawaii";
120      default: return nullptr;
121      }
122   case GFX8:
123      switch (family) {
124      case CHIP_TONGA: return "tonga";
125      case CHIP_ICELAND: return "iceland";
126      case CHIP_CARRIZO: return "carrizo";
127      case CHIP_FIJI: return "fiji";
128      case CHIP_STONEY: return "stoney";
129      case CHIP_POLARIS10: return "polaris10";
130      case CHIP_POLARIS11: return "polaris11";
131      case CHIP_POLARIS12: return "polaris12";
132      case CHIP_VEGAM: return "polaris11";
133      default: return nullptr;
134      }
135   case GFX9:
136      switch (family) {
137      case CHIP_VEGA10: return "vega10";
138      case CHIP_VEGA12: return "vega12";
139      case CHIP_VEGA20: return "vega20";
140      case CHIP_RAVEN: return "raven";
141      default: return nullptr;
142      }
143   case GFX10:
144      switch (family) {
145      case CHIP_NAVI10: return "gfx1010";
146      case CHIP_NAVI12: return "gfx1011";
147      default: return nullptr;
148      }
149   case GFX10_3:
150   case GFX11: return nullptr;
151   default: unreachable("Invalid chip class!"); return nullptr;
152   }
153}
154
155bool
156get_branch_target(char** output, Program* program, const std::vector<bool>& referenced_blocks,
157                  char** line_start)
158{
159   unsigned pos;
160   if (sscanf(*line_start, ".L%d_0", &pos) != 1)
161      return false;
162   pos /= 4;
163   *line_start = strchr(*line_start, '_') + 2;
164
165   for (Block& block : program->blocks) {
166      if (referenced_blocks[block.index] && block.offset == pos) {
167         *output += sprintf(*output, "BB%u", block.index);
168         return true;
169      }
170   }
171   return false;
172}
173
174bool
175print_asm_clrx(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
176{
177#ifdef _WIN32
178   return true;
179#else
180   char path[] = "/tmp/fileXXXXXX";
181   char line[2048], command[128];
182   FILE* p;
183   int fd;
184
185   const char* gpu_type = to_clrx_device_name(program->gfx_level, program->family);
186
187   /* Dump the binary into a temporary file. */
188   fd = mkstemp(path);
189   if (fd < 0)
190      return true;
191
192   for (unsigned i = 0; i < exec_size; i++) {
193      if (write(fd, &binary[i], 4) == -1)
194         goto fail;
195   }
196
197   sprintf(command, "clrxdisasm --gpuType=%s -r %s", gpu_type, path);
198
199   p = popen(command, "r");
200   if (p) {
201      if (!fgets(line, sizeof(line), p)) {
202         fprintf(output, "clrxdisasm not found\n");
203         pclose(p);
204         goto fail;
205      }
206
207      std::vector<bool> referenced_blocks = get_referenced_blocks(program);
208      unsigned next_block = 0;
209
210      char prev_instr[2048];
211      unsigned prev_pos = 0;
212      do {
213         char* line_start = line;
214         if (strncmp(line_start, "/*", 2))
215            continue;
216
217         unsigned pos;
218         if (sscanf(line_start, "/*%x*/", &pos) != 1)
219            continue;
220         pos /= 4u; /* get the dword position */
221
222         while (strncmp(line_start, "*/", 2))
223            line_start++;
224         line_start += 2;
225
226         while (line_start[0] == ' ')
227            line_start++;
228         *strchr(line_start, '\n') = 0;
229
230         if (*line_start == 0)
231            continue; /* not an instruction, only a comment */
232
233         if (pos != prev_pos) {
234            /* Print the previous instruction, now that we know the encoding size. */
235            print_instr(output, binary, prev_instr, pos - prev_pos, prev_pos);
236            prev_pos = pos;
237         }
238
239         print_block_markers(output, program, referenced_blocks, &next_block, pos);
240
241         char* dest = prev_instr;
242         *(dest++) = '\t';
243         while (*line_start) {
244            if (!strncmp(line_start, ".L", 2) &&
245                get_branch_target(&dest, program, referenced_blocks, &line_start))
246               continue;
247            *(dest++) = *(line_start++);
248         }
249         *(dest++) = 0;
250      } while (fgets(line, sizeof(line), p));
251
252      if (prev_pos != exec_size)
253         print_instr(output, binary, prev_instr, exec_size - prev_pos, prev_pos);
254
255      pclose(p);
256
257      print_constant_data(output, program);
258   }
259
260   return false;
261
262fail:
263   close(fd);
264   unlink(path);
265   return true;
266#endif
267}
268
269#ifdef LLVM_AVAILABLE
270std::pair<bool, size_t>
271disasm_instr(amd_gfx_level gfx_level, LLVMDisasmContextRef disasm, uint32_t* binary,
272             unsigned exec_size, size_t pos, char* outline, unsigned outline_size)
273{
274   size_t l =
275      LLVMDisasmInstruction(disasm, (uint8_t*)&binary[pos], (exec_size - pos) * sizeof(uint32_t),
276                            pos * 4, outline, outline_size);
277
278   if (gfx_level >= GFX10 && l == 8 && ((binary[pos] & 0xffff0000) == 0xd7610000) &&
279       ((binary[pos + 1] & 0x1ff) == 0xff)) {
280      /* v_writelane with literal uses 3 dwords but llvm consumes only 2 */
281      l += 4;
282   }
283
284   bool invalid = false;
285   size_t size;
286   if (!l &&
287       ((gfx_level >= GFX9 &&
288         (binary[pos] & 0xffff8000) == 0xd1348000) || /* v_add_u32_e64 + clamp */
289        (gfx_level >= GFX10 &&
290         (binary[pos] & 0xffff8000) == 0xd7038000) || /* v_add_u16_e64 + clamp */
291        (gfx_level <= GFX9 &&
292         (binary[pos] & 0xffff8000) == 0xd1268000) || /* v_add_u16_e64 + clamp */
293        (gfx_level >= GFX10 && (binary[pos] & 0xffff8000) == 0xd76d8000) || /* v_add3_u32 + clamp */
294        (gfx_level == GFX9 && (binary[pos] & 0xffff8000) == 0xd1ff8000)) /* v_add3_u32 + clamp */) {
295      strcpy(outline, "\tinteger addition + clamp");
296      bool has_literal = gfx_level >= GFX10 && (((binary[pos + 1] & 0x1ff) == 0xff) ||
297                                                (((binary[pos + 1] >> 9) & 0x1ff) == 0xff));
298      size = 2 + has_literal;
299   } else if (gfx_level >= GFX10 && l == 4 && ((binary[pos] & 0xfe0001ff) == 0x020000f9)) {
300      strcpy(outline, "\tv_cndmask_b32 + sdwa");
301      size = 2;
302   } else if (!l) {
303      strcpy(outline, "(invalid instruction)");
304      size = 1;
305      invalid = true;
306   } else {
307      assert(l % 4 == 0);
308      size = l / 4;
309   }
310
311#if LLVM_VERSION_MAJOR <= 14
312   /* See: https://github.com/GPUOpen-Tools/radeon_gpu_profiler/issues/65 and
313    * https://github.com/llvm/llvm-project/issues/38652
314    */
315   if (invalid) {
316      /* do nothing */
317   } else if (gfx_level == GFX9 && (binary[pos] & 0xfc024000) == 0xc0024000) {
318      /* SMEM with IMM=1 and SOE=1: LLVM ignores SOFFSET */
319      size_t len = strlen(outline);
320
321      char imm[16] = {0};
322      while (outline[--len] != ' ') ;
323      strncpy(imm, outline + len + 1, sizeof(imm) - 1);
324
325      snprintf(outline + len, outline_size - len, " s%u offset:%s", binary[pos + 1] >> 25, imm);
326   } else if (gfx_level >= GFX10 && (binary[pos] & 0xfc000000) == 0xf4000000 &&
327              (binary[pos + 1] & 0xfe000000) != 0xfa000000) {
328      /* SMEM non-NULL SOFFSET: LLVM ignores OFFSET */
329      uint32_t offset = binary[pos + 1] & 0x1fffff;
330      if (offset) {
331         size_t len = strlen(outline);
332         snprintf(outline + len, outline_size - len, " offset:0x%x", offset);
333      }
334   }
335#endif
336
337   return std::make_pair(invalid, size);
338}
339
340bool
341print_asm_llvm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
342{
343   std::vector<bool> referenced_blocks = get_referenced_blocks(program);
344
345   std::vector<llvm::SymbolInfoTy> symbols;
346   std::vector<std::array<char, 16>> block_names;
347   block_names.reserve(program->blocks.size());
348   for (Block& block : program->blocks) {
349      if (!referenced_blocks[block.index])
350         continue;
351      std::array<char, 16> name;
352      sprintf(name.data(), "BB%u", block.index);
353      block_names.push_back(name);
354      symbols.emplace_back(block.offset * 4,
355                           llvm::StringRef(block_names[block_names.size() - 1].data()), 0);
356   }
357
358   const char* features = "";
359   if (program->gfx_level >= GFX10 && program->wave_size == 64) {
360      features = "+wavefrontsize64";
361   }
362
363   LLVMDisasmContextRef disasm =
364      LLVMCreateDisasmCPUFeatures("amdgcn-mesa-mesa3d", ac_get_llvm_processor_name(program->family),
365                                  features, &symbols, 0, NULL, NULL);
366
367   size_t pos = 0;
368   bool invalid = false;
369   unsigned next_block = 0;
370
371   unsigned prev_size = 0;
372   unsigned prev_pos = 0;
373   unsigned repeat_count = 0;
374   while (pos < exec_size) {
375      bool new_block =
376         next_block < program->blocks.size() && pos == program->blocks[next_block].offset;
377      if (pos + prev_size <= exec_size && prev_pos != pos && !new_block &&
378          memcmp(&binary[prev_pos], &binary[pos], prev_size * 4) == 0) {
379         repeat_count++;
380         pos += prev_size;
381         continue;
382      } else {
383         if (repeat_count)
384            fprintf(output, "\t(then repeated %u times)\n", repeat_count);
385         repeat_count = 0;
386      }
387
388      print_block_markers(output, program, referenced_blocks, &next_block, pos);
389
390      char outline[1024];
391      std::pair<bool, size_t> res = disasm_instr(program->gfx_level, disasm, binary.data(),
392                                                 exec_size, pos, outline, sizeof(outline));
393      invalid |= res.first;
394
395      print_instr(output, binary, outline, res.second, pos);
396
397      prev_size = res.second;
398      prev_pos = pos;
399      pos += res.second;
400   }
401   assert(next_block == program->blocks.size());
402
403   LLVMDisasmDispose(disasm);
404
405   print_constant_data(output, program);
406
407   return invalid;
408}
409#endif /* LLVM_AVAILABLE */
410
411} /* end namespace */
412
413bool
414check_print_asm_support(Program* program)
415{
416#ifdef LLVM_AVAILABLE
417   if (program->gfx_level >= GFX8) {
418      /* LLVM disassembler only supports GFX8+ */
419      const char* name = ac_get_llvm_processor_name(program->family);
420      const char* triple = "amdgcn--";
421      LLVMTargetRef target = ac_get_llvm_target(triple);
422
423      LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
424         target, triple, name, "", LLVMCodeGenLevelDefault, LLVMRelocDefault, LLVMCodeModelDefault);
425
426      bool supported = ac_is_llvm_processor_supported(tm, name);
427      LLVMDisposeTargetMachine(tm);
428
429      if (supported)
430         return true;
431   }
432#endif
433
434#ifndef _WIN32
435   /* Check if CLRX disassembler binary is available and can disassemble the program */
436   return to_clrx_device_name(program->gfx_level, program->family) &&
437          system("clrxdisasm --version") == 0;
438#else
439   return false;
440#endif
441}
442
443/* Returns true on failure */
444bool
445print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)
446{
447#ifdef LLVM_AVAILABLE
448   if (program->gfx_level >= GFX8) {
449      return print_asm_llvm(program, binary, exec_size, output);
450   }
451#endif
452
453   return print_asm_clrx(program, binary, exec_size, output);
454}
455
456} // namespace aco
457