1/*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25
26#include <llvm-c/Core.h>
27#include <llvm/Analysis/TargetLibraryInfo.h>
28#include <llvm/IR/IRBuilder.h>
29#include <llvm/IR/LegacyPassManager.h>
30#include <llvm/Target/TargetMachine.h>
31#include <llvm/MC/MCSubtargetInfo.h>
32#include <llvm/Support/CommandLine.h>
33#include <llvm/Transforms/IPO.h>
34
35#include <cstring>
36
37/* DO NOT REORDER THE HEADERS
38 * The LLVM headers need to all be included before any Mesa header,
39 * as they use the `restrict` keyword in ways that are incompatible
40 * with our #define in include/c99_compat.h
41 */
42
43#include "ac_binary.h"
44#include "ac_llvm_util.h"
45#include "ac_llvm_build.h"
46#include "util/macros.h"
47
48bool ac_is_llvm_processor_supported(LLVMTargetMachineRef tm, const char *processor)
49{
50   llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
51   return TM->getMCSubtargetInfo()->isCPUStringValid(processor);
52}
53
54void ac_reset_llvm_all_options_occurences()
55{
56   llvm::cl::ResetAllOptionOccurrences();
57}
58
59void ac_add_attr_dereferenceable(LLVMValueRef val, uint64_t bytes)
60{
61   llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
62   A->addAttr(llvm::Attribute::getWithDereferenceableBytes(A->getContext(), bytes));
63}
64
65void ac_add_attr_alignment(LLVMValueRef val, uint64_t bytes)
66{
67   llvm::Argument *A = llvm::unwrap<llvm::Argument>(val);
68   A->addAttr(llvm::Attribute::getWithAlignment(A->getContext(), llvm::Align(bytes)));
69}
70
71bool ac_is_sgpr_param(LLVMValueRef arg)
72{
73   llvm::Argument *A = llvm::unwrap<llvm::Argument>(arg);
74   llvm::AttributeList AS = A->getParent()->getAttributes();
75   unsigned ArgNo = A->getArgNo();
76   return AS.hasParamAttr(ArgNo, llvm::Attribute::InReg);
77}
78
79LLVMModuleRef ac_create_module(LLVMTargetMachineRef tm, LLVMContextRef ctx)
80{
81   llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
82   LLVMModuleRef module = LLVMModuleCreateWithNameInContext("mesa-shader", ctx);
83
84   llvm::unwrap(module)->setTargetTriple(TM->getTargetTriple().getTriple());
85   llvm::unwrap(module)->setDataLayout(TM->createDataLayout());
86   return module;
87}
88
89LLVMBuilderRef ac_create_builder(LLVMContextRef ctx, enum ac_float_mode float_mode)
90{
91   LLVMBuilderRef builder = LLVMCreateBuilderInContext(ctx);
92
93   llvm::FastMathFlags flags;
94
95   switch (float_mode) {
96   case AC_FLOAT_MODE_DEFAULT:
97   case AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO:
98      break;
99
100   case AC_FLOAT_MODE_DEFAULT_OPENGL:
101      /* Allow optimizations to treat the sign of a zero argument or
102       * result as insignificant.
103       */
104      flags.setNoSignedZeros(); /* nsz */
105
106      /* Allow optimizations to use the reciprocal of an argument
107       * rather than perform division.
108       */
109      flags.setAllowReciprocal(); /* arcp */
110
111      llvm::unwrap(builder)->setFastMathFlags(flags);
112      break;
113   }
114
115   return builder;
116}
117
118void ac_enable_signed_zeros(struct ac_llvm_context *ctx)
119{
120   if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
121      auto *b = llvm::unwrap(ctx->builder);
122      llvm::FastMathFlags flags = b->getFastMathFlags();
123
124      /* This disables the optimization of (x + 0), which is used
125       * to convert negative zero to positive zero.
126       */
127      flags.setNoSignedZeros(false);
128      b->setFastMathFlags(flags);
129   }
130}
131
132void ac_disable_signed_zeros(struct ac_llvm_context *ctx)
133{
134   if (ctx->float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL) {
135      auto *b = llvm::unwrap(ctx->builder);
136      llvm::FastMathFlags flags = b->getFastMathFlags();
137
138      flags.setNoSignedZeros();
139      b->setFastMathFlags(flags);
140   }
141}
142
143LLVMTargetLibraryInfoRef ac_create_target_library_info(const char *triple)
144{
145   return reinterpret_cast<LLVMTargetLibraryInfoRef>(
146      new llvm::TargetLibraryInfoImpl(llvm::Triple(triple)));
147}
148
149void ac_dispose_target_library_info(LLVMTargetLibraryInfoRef library_info)
150{
151   delete reinterpret_cast<llvm::TargetLibraryInfoImpl *>(library_info);
152}
153
154/* Implementation of raw_pwrite_stream that works on malloc()ed memory for
155 * better compatibility with C code. */
156struct raw_memory_ostream : public llvm::raw_pwrite_stream {
157   char *buffer;
158   size_t written;
159   size_t bufsize;
160
161   raw_memory_ostream()
162   {
163      buffer = NULL;
164      written = 0;
165      bufsize = 0;
166      SetUnbuffered();
167   }
168
169   ~raw_memory_ostream()
170   {
171      free(buffer);
172   }
173
174   void clear()
175   {
176      written = 0;
177   }
178
179   void take(char *&out_buffer, size_t &out_size)
180   {
181      out_buffer = buffer;
182      out_size = written;
183      buffer = NULL;
184      written = 0;
185      bufsize = 0;
186   }
187
188   void flush() = delete;
189
190   void write_impl(const char *ptr, size_t size) override
191   {
192      if (unlikely(written + size < written))
193         abort();
194      if (written + size > bufsize) {
195         bufsize = MAX3(1024, written + size, bufsize / 3 * 4);
196         buffer = (char *)realloc(buffer, bufsize);
197         if (!buffer) {
198            fprintf(stderr, "amd: out of memory allocating ELF buffer\n");
199            abort();
200         }
201      }
202      memcpy(buffer + written, ptr, size);
203      written += size;
204   }
205
206   void pwrite_impl(const char *ptr, size_t size, uint64_t offset) override
207   {
208      assert(offset == (size_t)offset && offset + size >= offset && offset + size <= written);
209      memcpy(buffer + offset, ptr, size);
210   }
211
212   uint64_t current_pos() const override
213   {
214      return written;
215   }
216};
217
218/* The LLVM compiler is represented as a pass manager containing passes for
219 * optimizations, instruction selection, and code generation.
220 */
221struct ac_compiler_passes {
222   raw_memory_ostream ostream;        /* ELF shader binary stream */
223   llvm::legacy::PassManager passmgr; /* list of passes */
224};
225
226struct ac_compiler_passes *ac_create_llvm_passes(LLVMTargetMachineRef tm)
227{
228   struct ac_compiler_passes *p = new ac_compiler_passes();
229   if (!p)
230      return NULL;
231
232   llvm::TargetMachine *TM = reinterpret_cast<llvm::TargetMachine *>(tm);
233
234   if (TM->addPassesToEmitFile(p->passmgr, p->ostream, nullptr,
235                               llvm::CGFT_ObjectFile)) {
236      fprintf(stderr, "amd: TargetMachine can't emit a file of this type!\n");
237      delete p;
238      return NULL;
239   }
240   return p;
241}
242
243void ac_destroy_llvm_passes(struct ac_compiler_passes *p)
244{
245   delete p;
246}
247
248/* This returns false on failure. */
249bool ac_compile_module_to_elf(struct ac_compiler_passes *p, LLVMModuleRef module,
250                              char **pelf_buffer, size_t *pelf_size)
251{
252   p->passmgr.run(*llvm::unwrap(module));
253   p->ostream.take(*pelf_buffer, *pelf_size);
254   return true;
255}
256
257void ac_llvm_add_barrier_noop_pass(LLVMPassManagerRef passmgr)
258{
259   llvm::unwrap(passmgr)->add(llvm::createBarrierNoopPass());
260}
261
262LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
263                                 LLVMValueRef ptr, LLVMValueRef val, const char *sync_scope)
264{
265   llvm::AtomicRMWInst::BinOp binop;
266   switch (op) {
267   case LLVMAtomicRMWBinOpXchg:
268      binop = llvm::AtomicRMWInst::Xchg;
269      break;
270   case LLVMAtomicRMWBinOpAdd:
271      binop = llvm::AtomicRMWInst::Add;
272      break;
273   case LLVMAtomicRMWBinOpSub:
274      binop = llvm::AtomicRMWInst::Sub;
275      break;
276   case LLVMAtomicRMWBinOpAnd:
277      binop = llvm::AtomicRMWInst::And;
278      break;
279   case LLVMAtomicRMWBinOpNand:
280      binop = llvm::AtomicRMWInst::Nand;
281      break;
282   case LLVMAtomicRMWBinOpOr:
283      binop = llvm::AtomicRMWInst::Or;
284      break;
285   case LLVMAtomicRMWBinOpXor:
286      binop = llvm::AtomicRMWInst::Xor;
287      break;
288   case LLVMAtomicRMWBinOpMax:
289      binop = llvm::AtomicRMWInst::Max;
290      break;
291   case LLVMAtomicRMWBinOpMin:
292      binop = llvm::AtomicRMWInst::Min;
293      break;
294   case LLVMAtomicRMWBinOpUMax:
295      binop = llvm::AtomicRMWInst::UMax;
296      break;
297   case LLVMAtomicRMWBinOpUMin:
298      binop = llvm::AtomicRMWInst::UMin;
299      break;
300   case LLVMAtomicRMWBinOpFAdd:
301      binop = llvm::AtomicRMWInst::FAdd;
302      break;
303   default:
304      unreachable("invalid LLVMAtomicRMWBinOp");
305      break;
306   }
307   unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
308   return llvm::wrap(llvm::unwrap(ctx->builder)
309                        ->CreateAtomicRMW(binop, llvm::unwrap(ptr), llvm::unwrap(val),
310#if LLVM_VERSION_MAJOR >= 13
311                                          llvm::MaybeAlign(0),
312#endif
313                                          llvm::AtomicOrdering::SequentiallyConsistent, SSID));
314}
315
316LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
317                                      LLVMValueRef cmp, LLVMValueRef val, const char *sync_scope)
318{
319   unsigned SSID = llvm::unwrap(ctx->context)->getOrInsertSyncScopeID(sync_scope);
320   return llvm::wrap(llvm::unwrap(ctx->builder)
321                        ->CreateAtomicCmpXchg(llvm::unwrap(ptr), llvm::unwrap(cmp),
322                                              llvm::unwrap(val),
323#if LLVM_VERSION_MAJOR >= 13
324                                              llvm::MaybeAlign(0),
325#endif
326                                              llvm::AtomicOrdering::SequentiallyConsistent,
327                                              llvm::AtomicOrdering::SequentiallyConsistent, SSID));
328}
329