1/* 2 * Copyright 2014 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the 6 * "Software"), to deal in the Software without restriction, including 7 * without limitation the rights to use, copy, modify, merge, publish, 8 * distribute, sub license, and/or sell copies of the Software, and to 9 * permit persons to whom the Software is furnished to do so, subject to 10 * the following conditions: 11 * 12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 18 * USE OR OTHER DEALINGS IN THE SOFTWARE. 19 * 20 * The above copyright notice and this permission notice (including the 21 * next paragraph) shall be included in all copies or substantial portions 22 * of the Software. 23 * 24 */ 25 26#include <llvm-c/Core.h> 27#include <llvm/Analysis/TargetLibraryInfo.h> 28#include <llvm/IR/IRBuilder.h> 29#include <llvm/IR/LegacyPassManager.h> 30#include <llvm/Target/TargetMachine.h> 31#include <llvm/MC/MCSubtargetInfo.h> 32#include <llvm/Support/CommandLine.h> 33#include <llvm/Transforms/IPO.h> 34 35#include <cstring> 36 37/* DO NOT REORDER THE HEADERS 38 * The LLVM headers need to all be included before any Mesa header, 39 * as they use the `restrict` keyword in ways that are incompatible 40 * with our #define in include/c99_compat.h 41 */ 42 43#include "ac_binary.h" 44#include "ac_llvm_util.h" 45#include "ac_llvm_build.h" 46#include "util/macros.h" 47 48bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor) 49{ 50 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm); 51 return TM->getMCSubtargetInfo()->isCPUStringValid(processor); 52} 53 54void ac_reset_llvm_all_options_occurences() 55{ 56 llvm::cl::ResetAllOptionOccurrences(); 57} 58 59void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes) 60{ 61 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val); 62 A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes)); 63} 64 65void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes) 66{ 67 llvm::Argument *A = llvm::unwrap<llvm::Argument>(val); 68 A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes))); 69} 70 71bool ac_is_sgpr_param(LLVMValueRef arg) 72{ 73 llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg); 74 llvm::AttributeList AS = A->getParent()->getAttributes(); 75 unsigned ArgNo = A->getArgNo(); 76 return AS.hasParamAttr(ArgNo, llvm::Attribute::InReg); 77} 78 79LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx) 80{ 81 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm); 82 LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx); 83 84 llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple()); 85 llvm::unwrap(module)->setDataLayout(TM->createDataLayout()); 86 return module; 87} 88 89LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode) 90{ 91 LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx); 92 93 llvm::FastMathFlags flags; 94 95 switch (float_mode) { 96 case AC_FLOAT_MODE_DEFAULT: 97 case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO: 98 break; 99 100 case AC_FLOAT_MODE_DEFAULT_OPENGL: 101 /* Allow optimizations to treat the sign of a zero argument or 102 * result as insignificant. 103 */ 104 flags.setNoSignedZeros(); /* nsz */ 105 106 /* Allow optimizations to use the reciprocal of an argument 107 * rather than perform division. 108 */ 109 flags.setAllowReciprocal(); /* arcp */ 110 111 llvm::unwrap(builder)->setFastMathFlags(flags); 112 break; 113 } 114 115 return builder; 116} 117 118void ac_enable_signed_zeros(struct ac_llvm_context *ctx) 119{ 120 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) { 121 auto *b = llvm::unwrap(ctx->builder); 122 llvm::FastMathFlags flags = b->getFastMathFlags(); 123 124 /* This disables the optimization of (x + 0), which is used 125 * to convert negative zero to positive zero. 126 */ 127 flags.setNoSignedZeros(false); 128 b->setFastMathFlags(flags); 129 } 130} 131 132void ac_disable_signed_zeros(struct ac_llvm_context *ctx) 133{ 134 if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) { 135 auto *b = llvm::unwrap(ctx->builder); 136 llvm::FastMathFlags flags = b->getFastMathFlags(); 137 138 flags.setNoSignedZeros(); 139 b->setFastMathFlags(flags); 140 } 141} 142 143LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple) 144{ 145 return reinterpret_cast<LLVMTargetLibraryInfoRef>( 146 new llvm::TargetLibraryInfoImpl(llvm::Triple(triple))); 147} 148 149void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info) 150{ 151 delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info); 152} 153 154/* Implementation of raw_pwrite_stream that works on malloc()ed memory for 155 * better compatibility with C code. */ 156struct raw_memory_ostream : public llvm::raw_pwrite_stream { 157 char *buffer; 158 size_t written; 159 size_t bufsize; 160 161 raw_memory_ostream() 162 { 163 buffer = NULL; 164 written = 0; 165 bufsize = 0; 166 SetUnbuffered(); 167 } 168 169 ~raw_memory_ostream() 170 { 171 free(buffer); 172 } 173 174 void clear() 175 { 176 written = 0; 177 } 178 179 void take(char *&out_buffer, size_t &out_size) 180 { 181 out_buffer = buffer; 182 out_size = written; 183 buffer = NULL; 184 written = 0; 185 bufsize = 0; 186 } 187 188 void flush() = delete; 189 190 void write_impl(const char *ptr, size_t size) override 191 { 192 if (unlikely(written + size < written)) 193 abort(); 194 if (written + size > bufsize) { 195 bufsize = MAX3(1024, written + size, bufsize / 3 * 4); 196 buffer = (char *)realloc(buffer, bufsize); 197 if (!buffer) { 198 fprintf(stderr, "amd: out of memory allocating ELF buffer\n"); 199 abort(); 200 } 201 } 202 memcpy(buffer + written, ptr, size); 203 written += size; 204 } 205 206 void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override 207 { 208 assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written); 209 memcpy(buffer + offset, ptr, size); 210 } 211 212 uint64_t current_pos() const override 213 { 214 return written; 215 } 216}; 217 218/* The LLVM compiler is represented as a pass manager containing passes for 219 * optimizations, instruction selection, and code generation. 220 */ 221struct ac_compiler_passes { 222 raw_memory_ostream ostream; /* ELF shader binary stream */ 223 llvm::legacy::PassManager passmgr; /* list of passes */ 224}; 225 226struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm) 227{ 228 struct ac_compiler_passes *p = new ac_compiler_passes(); 229 if (!p) 230 return NULL; 231 232 llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm); 233 234 if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr, 235 llvm::CGFT_ObjectFile)) { 236 fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n"); 237 delete p; 238 return NULL; 239 } 240 return p; 241} 242 243void ac_destroy_llvm_passes(struct ac_compiler_passes *p) 244{ 245 delete p; 246} 247 248/* This returns false on failure. */ 249bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module, 250 char **pelf_buffer, size_t *pelf_size) 251{ 252 p->passmgr.run(*llvm::unwrap(module)); 253 p->ostream.take(*pelf_buffer, *pelf_size); 254 return true; 255} 256 257void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr) 258{ 259 llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass()); 260} 261 262LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op, 263 LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope) 264{ 265 llvm::AtomicRMWInst::BinOp binop; 266 switch (op) { 267 case LLVMAtomicRMWBinOpXchg: 268 binop = llvm::AtomicRMWInst::Xchg; 269 break; 270 case LLVMAtomicRMWBinOpAdd: 271 binop = llvm::AtomicRMWInst::Add; 272 break; 273 case LLVMAtomicRMWBinOpSub: 274 binop = llvm::AtomicRMWInst::Sub; 275 break; 276 case LLVMAtomicRMWBinOpAnd: 277 binop = llvm::AtomicRMWInst::And; 278 break; 279 case LLVMAtomicRMWBinOpNand: 280 binop = llvm::AtomicRMWInst::Nand; 281 break; 282 case LLVMAtomicRMWBinOpOr: 283 binop = llvm::AtomicRMWInst::Or; 284 break; 285 case LLVMAtomicRMWBinOpXor: 286 binop = llvm::AtomicRMWInst::Xor; 287 break; 288 case LLVMAtomicRMWBinOpMax: 289 binop = llvm::AtomicRMWInst::Max; 290 break; 291 case LLVMAtomicRMWBinOpMin: 292 binop = llvm::AtomicRMWInst::Min; 293 break; 294 case LLVMAtomicRMWBinOpUMax: 295 binop = llvm::AtomicRMWInst::UMax; 296 break; 297 case LLVMAtomicRMWBinOpUMin: 298 binop = llvm::AtomicRMWInst::UMin; 299 break; 300 case LLVMAtomicRMWBinOpFAdd: 301 binop = llvm::AtomicRMWInst::FAdd; 302 break; 303 default: 304 unreachable("invalid LLVMAtomicRMWBinOp"); 305 break; 306 } 307 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); 308 return llvm::wrap(llvm::unwrap(ctx->builder) 309 ->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val), 310#if LLVM_VERSION_MAJOR >= 13 311 llvm::MaybeAlign(0), 312#endif 313 llvm::AtomicOrdering::SequentiallyConsistent, SSID)); 314} 315 316LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr, 317 LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope) 318{ 319 unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope); 320 return llvm::wrap(llvm::unwrap(ctx->builder) 321 ->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp), 322 llvm::unwrap(val), 323#if LLVM_VERSION_MAJOR >= 13 324 llvm::MaybeAlign(0), 325#endif 326 llvm::AtomicOrdering::SequentiallyConsistent, 327 llvm::AtomicOrdering::SequentiallyConsistent, SSID)); 328} 329