xref: /third_party/mesa3d/src/amd/llvm/ac_llvm_util.c (revision bf215546)
1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
26#include "ac_llvm_util.h"
27
28#include "ac_llvm_build.h"
29#include "c11/threads.h"
30#include "util/bitscan.h"
31#include "util/u_math.h"
32#include <llvm-c/Core.h>
33#include <llvm-c/Support.h>
34#include <llvm-c/Transforms/IPO.h>
35#include <llvm-c/Transforms/Scalar.h>
36#include <llvm-c/Transforms/Utils.h>
37
38#include <assert.h>
39#include <stdio.h>
40#include <string.h>
41
42static void ac_init_llvm_target(void)
43{
44   LLVMInitializeAMDGPUTargetInfo();
45   LLVMInitializeAMDGPUTarget();
46   LLVMInitializeAMDGPUTargetMC();
47   LLVMInitializeAMDGPUAsmPrinter();
48
49   /* For inline assembly. */
50   LLVMInitializeAMDGPUAsmParser();
51
52   /* For ACO disassembly. */
53   LLVMInitializeAMDGPUDisassembler();
54
55   const char *argv[] = {
56      /* error messages prefix */
57      "mesa",
58      "-amdgpu-atomic-optimizations=true",
59#if LLVM_VERSION_MAJOR == 11
60      /* This fixes variable indexing on LLVM 11. It also breaks atomic.cmpswap on LLVM >= 12. */
61      "-structurizecfg-skip-uniform-regions",
62#endif
63   };
64
65   ac_reset_llvm_all_options_occurences();
66   LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL);
67}
68
69PUBLIC void ac_init_shared_llvm_once(void)
70{
71   static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
72   call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
73}
74
75#if !LLVM_IS_SHARED
76static once_flag ac_init_static_llvm_target_once_flag = ONCE_FLAG_INIT;
77static void ac_init_static_llvm_once(void)
78{
79   call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target);
80}
81#endif
82
83void ac_init_llvm_once(void)
84{
85#if LLVM_IS_SHARED
86   ac_init_shared_llvm_once();
87#else
88   ac_init_static_llvm_once();
89#endif
90}
91
92LLVMTargetRef ac_get_llvm_target(const char *triple)
93{
94   LLVMTargetRef target = NULL;
95   char *err_message = NULL;
96
97   if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
98      fprintf(stderr, "Cannot find target for triple %s ", triple);
99      if (err_message) {
100         fprintf(stderr, "%s\n", err_message);
101      }
102      LLVMDisposeMessage(err_message);
103      return NULL;
104   }
105   return target;
106}
107
108const char *ac_get_llvm_processor_name(enum radeon_family family)
109{
110   switch (family) {
111   case CHIP_TAHITI:
112      return "tahiti";
113   case CHIP_PITCAIRN:
114      return "pitcairn";
115   case CHIP_VERDE:
116      return "verde";
117   case CHIP_OLAND:
118      return "oland";
119   case CHIP_HAINAN:
120      return "hainan";
121   case CHIP_BONAIRE:
122      return "bonaire";
123   case CHIP_KABINI:
124      return "kabini";
125   case CHIP_KAVERI:
126      return "kaveri";
127   case CHIP_HAWAII:
128      return "hawaii";
129   case CHIP_TONGA:
130      return "tonga";
131   case CHIP_ICELAND:
132      return "iceland";
133   case CHIP_CARRIZO:
134      return "carrizo";
135   case CHIP_FIJI:
136      return "fiji";
137   case CHIP_STONEY:
138      return "stoney";
139   case CHIP_POLARIS10:
140      return "polaris10";
141   case CHIP_POLARIS11:
142   case CHIP_POLARIS12:
143   case CHIP_VEGAM:
144      return "polaris11";
145   case CHIP_VEGA10:
146      return "gfx900";
147   case CHIP_RAVEN:
148      return "gfx902";
149   case CHIP_VEGA12:
150      return "gfx904";
151   case CHIP_VEGA20:
152      return "gfx906";
153   case CHIP_RAVEN2:
154   case CHIP_RENOIR:
155      return "gfx909";
156   case CHIP_ARCTURUS:
157      return "gfx908";
158   case CHIP_ALDEBARAN:
159      return "gfx90a";
160   case CHIP_NAVI10:
161      return "gfx1010";
162   case CHIP_NAVI12:
163      return "gfx1011";
164   case CHIP_NAVI14:
165      return "gfx1012";
166   case CHIP_NAVI21:
167      return "gfx1030";
168   case CHIP_NAVI22:
169      return LLVM_VERSION_MAJOR >= 12 ? "gfx1031" : "gfx1030";
170   case CHIP_NAVI23:
171      return LLVM_VERSION_MAJOR >= 12 ? "gfx1032" : "gfx1030";
172   case CHIP_VANGOGH:
173      return LLVM_VERSION_MAJOR >= 12 ? "gfx1033" : "gfx1030";
174   case CHIP_NAVI24:
175      return LLVM_VERSION_MAJOR >= 13 ? "gfx1034" : "gfx1030";
176   case CHIP_REMBRANDT:
177      return LLVM_VERSION_MAJOR >= 13 ? "gfx1035" : "gfx1030";
178   case CHIP_GFX1036: /* TODO: LLVM 15 doesn't support this yet */
179      return "gfx1030";
180   case CHIP_GFX1100:
181      return "gfx1100";
182   case CHIP_GFX1101:
183      return "gfx1101";
184   case CHIP_GFX1102:
185      return "gfx1102";
186   case CHIP_GFX1103:
187      return "gfx1103";
188   default:
189      return "";
190   }
191}
192
193static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
194                                                     enum ac_target_machine_options tm_options,
195                                                     LLVMCodeGenOptLevel level,
196                                                     const char **out_triple)
197{
198   assert(family >= CHIP_TAHITI);
199   const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
200   LLVMTargetRef target = ac_get_llvm_target(triple);
201   const char *name = ac_get_llvm_processor_name(family);
202
203   LLVMTargetMachineRef tm =
204      LLVMCreateTargetMachine(target, triple, name, "", level,
205                              LLVMRelocDefault, LLVMCodeModelDefault);
206
207   if (!ac_is_llvm_processor_supported(tm, name)) {
208      LLVMDisposeTargetMachine(tm);
209      fprintf(stderr, "amd: LLVM doesn't support %s, bailing out...\n", name);
210      return NULL;
211   }
212
213   if (out_triple)
214      *out_triple = triple;
215
216   return tm;
217}
218
219static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
220                                            bool check_ir)
221{
222   LLVMPassManagerRef passmgr = LLVMCreatePassManager();
223   if (!passmgr)
224      return NULL;
225
226   if (target_library_info)
227      LLVMAddTargetLibraryInfo(target_library_info, passmgr);
228
229   if (check_ir)
230      LLVMAddVerifierPass(passmgr);
231   LLVMAddAlwaysInlinerPass(passmgr);
232   /* Normally, the pass manager runs all passes on one function before
233    * moving onto another. Adding a barrier no-op pass forces the pass
234    * manager to run the inliner on all functions first, which makes sure
235    * that the following passes are only run on the remaining non-inline
236    * function, so it removes useless work done on dead inline functions.
237    */
238   ac_llvm_add_barrier_noop_pass(passmgr);
239   /* This pass should eliminate all the load and store instructions. */
240   LLVMAddPromoteMemoryToRegisterPass(passmgr);
241   LLVMAddScalarReplAggregatesPass(passmgr);
242   LLVMAddLICMPass(passmgr);
243   LLVMAddAggressiveDCEPass(passmgr);
244   LLVMAddCFGSimplificationPass(passmgr);
245   /* This is recommended by the instruction combining pass. */
246   LLVMAddEarlyCSEMemSSAPass(passmgr);
247   LLVMAddInstructionCombiningPass(passmgr);
248   return passmgr;
249}
250
251static const char *attr_to_str(enum ac_func_attr attr)
252{
253   switch (attr) {
254   case AC_FUNC_ATTR_ALWAYSINLINE:
255      return "alwaysinline";
256   case AC_FUNC_ATTR_INREG:
257      return "inreg";
258   case AC_FUNC_ATTR_NOALIAS:
259      return "noalias";
260   case AC_FUNC_ATTR_NOUNWIND:
261      return "nounwind";
262   case AC_FUNC_ATTR_READNONE:
263      return "readnone";
264   case AC_FUNC_ATTR_READONLY:
265      return "readonly";
266   case AC_FUNC_ATTR_WRITEONLY:
267      return "writeonly";
268   case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY:
269      return "inaccessiblememonly";
270   case AC_FUNC_ATTR_CONVERGENT:
271      return "convergent";
272   default:
273      fprintf(stderr, "Unhandled function attribute: %x\n", attr);
274      return 0;
275   }
276}
277
278void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx,
279                          enum ac_func_attr attr)
280{
281   const char *attr_name = attr_to_str(attr);
282   unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name));
283   LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0);
284
285   if (LLVMIsAFunction(function))
286      LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
287   else
288      LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
289}
290
291void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask)
292{
293   attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
294   attrib_mask &= ~AC_FUNC_ATTR_LEGACY;
295
296   while (attrib_mask) {
297      enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
298      ac_add_function_attr(ctx, function, -1, attr);
299   }
300}
301
302void ac_dump_module(LLVMModuleRef module)
303{
304   char *str = LLVMPrintModuleToString(module);
305   fprintf(stderr, "%s", str);
306   LLVMDisposeMessage(str);
307}
308
309void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value)
310{
311   char str[16];
312
313   snprintf(str, sizeof(str), "0x%x", value);
314   LLVMAddTargetDependentFunctionAttr(F, name, str);
315}
316
317void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size)
318{
319   if (!size)
320      return;
321
322   char str[32];
323   snprintf(str, sizeof(str), "%u,%u", size, size);
324   LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str);
325}
326
327void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx)
328{
329   char features[2048];
330
331   snprintf(features, sizeof(features), "+DumpCode%s%s",
332            /* GFX9 has broken VGPR indexing, so always promote alloca to scratch. */
333            ctx->gfx_level == GFX9 ? ",-promote-alloca" : "",
334            /* Wave32 is the default. */
335            ctx->gfx_level >= GFX10 && ctx->wave_size == 64 ?
336               ",+wavefrontsize64,-wavefrontsize32" : "");
337
338   LLVMAddTargetDependentFunctionAttr(F, "target-features", features);
339}
340
341bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family,
342                           enum ac_target_machine_options tm_options)
343{
344   const char *triple;
345   memset(compiler, 0, sizeof(*compiler));
346
347   compiler->tm = ac_create_target_machine(family, tm_options, LLVMCodeGenLevelDefault, &triple);
348   if (!compiler->tm)
349      return false;
350
351   if (tm_options & AC_TM_CREATE_LOW_OPT) {
352      compiler->low_opt_tm =
353         ac_create_target_machine(family, tm_options, LLVMCodeGenLevelLess, NULL);
354      if (!compiler->low_opt_tm)
355         goto fail;
356   }
357
358   compiler->target_library_info = ac_create_target_library_info(triple);
359   if (!compiler->target_library_info)
360      goto fail;
361
362   compiler->passmgr =
363      ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR);
364   if (!compiler->passmgr)
365      goto fail;
366
367   return true;
368fail:
369   ac_destroy_llvm_compiler(compiler);
370   return false;
371}
372
373void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
374{
375   ac_destroy_llvm_passes(compiler->passes);
376   ac_destroy_llvm_passes(compiler->low_opt_passes);
377
378   if (compiler->passmgr)
379      LLVMDisposePassManager(compiler->passmgr);
380   if (compiler->target_library_info)
381      ac_dispose_target_library_info(compiler->target_library_info);
382   if (compiler->low_opt_tm)
383      LLVMDisposeTargetMachine(compiler->low_opt_tm);
384   if (compiler->tm)
385      LLVMDisposeTargetMachine(compiler->tm);
386}
387