1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sub license, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 18 * USE OR OTHER DEALINGS IN THE SOFTWARE. 19 * 20 * The above copyright notice and this permission notice (including the 21 * next paragraph) shall be included in all copies or substantial portions 22 * of the Software. 23 * 24 */ 25/* based on pieces from si_pipe.c and radeon_llvm_emit.c */ 26#include "ac_llvm_util.h" 27 28#include "ac_llvm_build.h" 29#include "c11/threads.h" 30#include "util/bitscan.h" 31#include "util/u_math.h" 32#include <llvm-c/Core.h> 33#include <llvm-c/Support.h> 34#include <llvm-c/Transforms/IPO.h> 35#include <llvm-c/Transforms/Scalar.h> 36#include <llvm-c/Transforms/Utils.h> 37 38#include <assert.h> 39#include <stdio.h> 40#include <string.h> 41 42static void ac_init_llvm_target(void) 43{ 44 LLVMInitializeAMDGPUTargetInfo(); 45 LLVMInitializeAMDGPUTarget(); 46 LLVMInitializeAMDGPUTargetMC(); 47 LLVMInitializeAMDGPUAsmPrinter(); 48 49 /* For inline assembly. */ 50 LLVMInitializeAMDGPUAsmParser(); 51 52 /* For ACO disassembly. */ 53 LLVMInitializeAMDGPUDisassembler(); 54 55 const char *argv[] = { 56 /* error messages prefix */ 57 "mesa", 58 "-amdgpu-atomic-optimizations=true", 59#if LLVM_VERSION_MAJOR == 11 60 /* This fixes variable indexing on LLVM 11. It also breaks atomic.cmpswap on LLVM >= 12. */ 61 "-structurizecfg-skip-uniform-regions", 62#endif 63 }; 64 65 ac_reset_llvm_all_options_occurences(); 66 LLVMParseCommandLineOptions(ARRAY_SIZE(argv), argv, NULL); 67} 68 69PUBLIC void ac_init_shared_llvm_once(void) 70{ 71 static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT; 72 call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target); 73} 74 75#if !LLVM_IS_SHARED 76static once_flag ac_init_static_llvm_target_once_flag = ONCE_FLAG_INIT; 77static void ac_init_static_llvm_once(void) 78{ 79 call_once(&ac_init_static_llvm_target_once_flag, ac_init_llvm_target); 80} 81#endif 82 83void ac_init_llvm_once(void) 84{ 85#if LLVM_IS_SHARED 86 ac_init_shared_llvm_once(); 87#else 88 ac_init_static_llvm_once(); 89#endif 90} 91 92LLVMTargetRef ac_get_llvm_target(const char *triple) 93{ 94 LLVMTargetRef target = NULL; 95 char *err_message = NULL; 96 97 if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { 98 fprintf(stderr, "Cannot find target for triple %s ", triple); 99 if (err_message) { 100 fprintf(stderr, "%s\n", err_message); 101 } 102 LLVMDisposeMessage(err_message); 103 return NULL; 104 } 105 return target; 106} 107 108const char *ac_get_llvm_processor_name(enum radeon_family family) 109{ 110 switch (family) { 111 case CHIP_TAHITI: 112 return "tahiti"; 113 case CHIP_PITCAIRN: 114 return "pitcairn"; 115 case CHIP_VERDE: 116 return "verde"; 117 case CHIP_OLAND: 118 return "oland"; 119 case CHIP_HAINAN: 120 return "hainan"; 121 case CHIP_BONAIRE: 122 return "bonaire"; 123 case CHIP_KABINI: 124 return "kabini"; 125 case CHIP_KAVERI: 126 return "kaveri"; 127 case CHIP_HAWAII: 128 return "hawaii"; 129 case CHIP_TONGA: 130 return "tonga"; 131 case CHIP_ICELAND: 132 return "iceland"; 133 case CHIP_CARRIZO: 134 return "carrizo"; 135 case CHIP_FIJI: 136 return "fiji"; 137 case CHIP_STONEY: 138 return "stoney"; 139 case CHIP_POLARIS10: 140 return "polaris10"; 141 case CHIP_POLARIS11: 142 case CHIP_POLARIS12: 143 case CHIP_VEGAM: 144 return "polaris11"; 145 case CHIP_VEGA10: 146 return "gfx900"; 147 case CHIP_RAVEN: 148 return "gfx902"; 149 case CHIP_VEGA12: 150 return "gfx904"; 151 case CHIP_VEGA20: 152 return "gfx906"; 153 case CHIP_RAVEN2: 154 case CHIP_RENOIR: 155 return "gfx909"; 156 case CHIP_ARCTURUS: 157 return "gfx908"; 158 case CHIP_ALDEBARAN: 159 return "gfx90a"; 160 case CHIP_NAVI10: 161 return "gfx1010"; 162 case CHIP_NAVI12: 163 return "gfx1011"; 164 case CHIP_NAVI14: 165 return "gfx1012"; 166 case CHIP_NAVI21: 167 return "gfx1030"; 168 case CHIP_NAVI22: 169 return LLVM_VERSION_MAJOR >= 12 ? "gfx1031" : "gfx1030"; 170 case CHIP_NAVI23: 171 return LLVM_VERSION_MAJOR >= 12 ? "gfx1032" : "gfx1030"; 172 case CHIP_VANGOGH: 173 return LLVM_VERSION_MAJOR >= 12 ? "gfx1033" : "gfx1030"; 174 case CHIP_NAVI24: 175 return LLVM_VERSION_MAJOR >= 13 ? "gfx1034" : "gfx1030"; 176 case CHIP_REMBRANDT: 177 return LLVM_VERSION_MAJOR >= 13 ? "gfx1035" : "gfx1030"; 178 case CHIP_GFX1036: /* TODO: LLVM 15 doesn't support this yet */ 179 return "gfx1030"; 180 case CHIP_GFX1100: 181 return "gfx1100"; 182 case CHIP_GFX1101: 183 return "gfx1101"; 184 case CHIP_GFX1102: 185 return "gfx1102"; 186 case CHIP_GFX1103: 187 return "gfx1103"; 188 default: 189 return ""; 190 } 191} 192 193static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, 194 enum ac_target_machine_options tm_options, 195 LLVMCodeGenOptLevel level, 196 const char **out_triple) 197{ 198 assert(family >= CHIP_TAHITI); 199 const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--"; 200 LLVMTargetRef target = ac_get_llvm_target(triple); 201 const char *name = ac_get_llvm_processor_name(family); 202 203 LLVMTargetMachineRef tm = 204 LLVMCreateTargetMachine(target, triple, name, "", level, 205 LLVMRelocDefault, LLVMCodeModelDefault); 206 207 if (!ac_is_llvm_processor_supported(tm, name)) { 208 LLVMDisposeTargetMachine(tm); 209 fprintf(stderr, "amd: LLVM doesn't support %s, bailing out...\n", name); 210 return NULL; 211 } 212 213 if (out_triple) 214 *out_triple = triple; 215 216 return tm; 217} 218 219static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, 220 bool check_ir) 221{ 222 LLVMPassManagerRef passmgr = LLVMCreatePassManager(); 223 if (!passmgr) 224 return NULL; 225 226 if (target_library_info) 227 LLVMAddTargetLibraryInfo(target_library_info, passmgr); 228 229 if (check_ir) 230 LLVMAddVerifierPass(passmgr); 231 LLVMAddAlwaysInlinerPass(passmgr); 232 /* Normally, the pass manager runs all passes on one function before 233 * moving onto another. Adding a barrier no-op pass forces the pass 234 * manager to run the inliner on all functions first, which makes sure 235 * that the following passes are only run on the remaining non-inline 236 * function, so it removes useless work done on dead inline functions. 237 */ 238 ac_llvm_add_barrier_noop_pass(passmgr); 239 /* This pass should eliminate all the load and store instructions. */ 240 LLVMAddPromoteMemoryToRegisterPass(passmgr); 241 LLVMAddScalarReplAggregatesPass(passmgr); 242 LLVMAddLICMPass(passmgr); 243 LLVMAddAggressiveDCEPass(passmgr); 244 LLVMAddCFGSimplificationPass(passmgr); 245 /* This is recommended by the instruction combining pass. */ 246 LLVMAddEarlyCSEMemSSAPass(passmgr); 247 LLVMAddInstructionCombiningPass(passmgr); 248 return passmgr; 249} 250 251static const char *attr_to_str(enum ac_func_attr attr) 252{ 253 switch (attr) { 254 case AC_FUNC_ATTR_ALWAYSINLINE: 255 return "alwaysinline"; 256 case AC_FUNC_ATTR_INREG: 257 return "inreg"; 258 case AC_FUNC_ATTR_NOALIAS: 259 return "noalias"; 260 case AC_FUNC_ATTR_NOUNWIND: 261 return "nounwind"; 262 case AC_FUNC_ATTR_READNONE: 263 return "readnone"; 264 case AC_FUNC_ATTR_READONLY: 265 return "readonly"; 266 case AC_FUNC_ATTR_WRITEONLY: 267 return "writeonly"; 268 case AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY: 269 return "inaccessiblememonly"; 270 case AC_FUNC_ATTR_CONVERGENT: 271 return "convergent"; 272 default: 273 fprintf(stderr, "Unhandled function attribute: %x\n", attr); 274 return 0; 275 } 276} 277 278void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, 279 enum ac_func_attr attr) 280{ 281 const char *attr_name = attr_to_str(attr); 282 unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name)); 283 LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(ctx, kind_id, 0); 284 285 if (LLVMIsAFunction(function)) 286 LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); 287 else 288 LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr); 289} 290 291void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, unsigned attrib_mask) 292{ 293 attrib_mask |= AC_FUNC_ATTR_NOUNWIND; 294 attrib_mask &= ~AC_FUNC_ATTR_LEGACY; 295 296 while (attrib_mask) { 297 enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask); 298 ac_add_function_attr(ctx, function, -1, attr); 299 } 300} 301 302void ac_dump_module(LLVMModuleRef module) 303{ 304 char *str = LLVMPrintModuleToString(module); 305 fprintf(stderr, "%s", str); 306 LLVMDisposeMessage(str); 307} 308 309void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, const char *name, unsigned value) 310{ 311 char str[16]; 312 313 snprintf(str, sizeof(str), "0x%x", value); 314 LLVMAddTargetDependentFunctionAttr(F, name, str); 315} 316 317void ac_llvm_set_workgroup_size(LLVMValueRef F, unsigned size) 318{ 319 if (!size) 320 return; 321 322 char str[32]; 323 snprintf(str, sizeof(str), "%u,%u", size, size); 324 LLVMAddTargetDependentFunctionAttr(F, "amdgpu-flat-work-group-size", str); 325} 326 327void ac_llvm_set_target_features(LLVMValueRef F, struct ac_llvm_context *ctx) 328{ 329 char features[2048]; 330 331 snprintf(features, sizeof(features), "+DumpCode%s%s", 332 /* GFX9 has broken VGPR indexing, so always promote alloca to scratch. */ 333 ctx->gfx_level == GFX9 ? ",-promote-alloca" : "", 334 /* Wave32 is the default. */ 335 ctx->gfx_level >= GFX10 && ctx->wave_size == 64 ? 336 ",+wavefrontsize64,-wavefrontsize32" : ""); 337 338 LLVMAddTargetDependentFunctionAttr(F, "target-features", features); 339} 340 341bool ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, enum radeon_family family, 342 enum ac_target_machine_options tm_options) 343{ 344 const char *triple; 345 memset(compiler, 0, sizeof(*compiler)); 346 347 compiler->tm = ac_create_target_machine(family, tm_options, LLVMCodeGenLevelDefault, &triple); 348 if (!compiler->tm) 349 return false; 350 351 if (tm_options & AC_TM_CREATE_LOW_OPT) { 352 compiler->low_opt_tm = 353 ac_create_target_machine(family, tm_options, LLVMCodeGenLevelLess, NULL); 354 if (!compiler->low_opt_tm) 355 goto fail; 356 } 357 358 compiler->target_library_info = ac_create_target_library_info(triple); 359 if (!compiler->target_library_info) 360 goto fail; 361 362 compiler->passmgr = 363 ac_create_passmgr(compiler->target_library_info, tm_options & AC_TM_CHECK_IR); 364 if (!compiler->passmgr) 365 goto fail; 366 367 return true; 368fail: 369 ac_destroy_llvm_compiler(compiler); 370 return false; 371} 372 373void ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) 374{ 375 ac_destroy_llvm_passes(compiler->passes); 376 ac_destroy_llvm_passes(compiler->low_opt_passes); 377 378 if (compiler->passmgr) 379 LLVMDisposePassManager(compiler->passmgr); 380 if (compiler->target_library_info) 381 ac_dispose_target_library_info(compiler->target_library_info); 382 if (compiler->low_opt_tm) 383 LLVMDisposeTargetMachine(compiler->low_opt_tm); 384 if (compiler->tm) 385 LLVMDisposeTargetMachine(compiler->tm); 386} 387