1//
2// Copyright 2012-2016 Francisco Jerez
3// Copyright 2012-2016 Advanced Micro Devices, Inc.
4// Copyright 2015 Zoltan Gilian
5//
6// Permission is hereby granted, free of charge, to any person obtaining a
7// copy of this software and associated documentation files (the "Software"),
8// to deal in the Software without restriction, including without limitation
9// the rights to use, copy, modify, merge, publish, distribute, sublicense,
10// and/or sell copies of the Software, and to permit persons to whom the
11// Software is furnished to do so, subject to the following conditions:
12//
13// The above copyright notice and this permission notice shall be included in
14// all copies or substantial portions of the Software.
15//
16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22// OTHER DEALINGS IN THE SOFTWARE.
23//
24
25///
26/// \file
27/// Codegen back-end-independent part of the construction of an executable
28/// clover::binary, including kernel argument metadata extraction and
29/// formatting of the pre-generated binary code in a form that can be
30/// understood by pipe drivers.
31///
32
33#include <llvm/IR/Type.h>
34#include <llvm/Support/Allocator.h>
35
36#include "llvm/codegen.hpp"
37#include "llvm/metadata.hpp"
38
39#include "CL/cl.h"
40
41#include "pipe/p_state.h"
42#include "util/u_math.h"
43
44#include <clang/Basic/TargetInfo.h>
45
46using clover::binary;
47using clover::detokenize;
48using namespace clover::llvm;
49
50using ::llvm::Module;
51using ::llvm::Function;
52using ::llvm::Type;
53using ::llvm::isa;
54using ::llvm::cast;
55using ::llvm::dyn_cast;
56
57namespace {
58   enum binary::argument::type
59   get_image_type(const std::string &type,
60                  const std::string &qual) {
61      if (type == "image1d_t" || type == "image2d_t" || type == "image3d_t") {
62         if (qual == "read_only")
63            return binary::argument::image_rd;
64         else if (qual == "write_only")
65            return binary::argument::image_wr;
66      }
67
68      unreachable("Unsupported image type");
69   }
70
71   binary::arg_info create_arg_info(const std::string &arg_name,
72                                    const std::string &type_name,
73                                    const std::string &type_qualifier,
74                                    const uint64_t address_qualifier,
75                                    const std::string &access_qualifier) {
76
77      cl_kernel_arg_type_qualifier cl_type_qualifier =
78                                                   CL_KERNEL_ARG_TYPE_NONE;
79      if (type_qualifier.find("const") != std::string::npos)
80         cl_type_qualifier |= CL_KERNEL_ARG_TYPE_CONST;
81      if (type_qualifier.find("restrict") != std::string::npos)
82         cl_type_qualifier |=  CL_KERNEL_ARG_TYPE_RESTRICT;
83      if (type_qualifier.find("volatile") != std::string::npos)
84         cl_type_qualifier |=  CL_KERNEL_ARG_TYPE_VOLATILE;
85
86      cl_kernel_arg_address_qualifier cl_address_qualifier =
87                                             CL_KERNEL_ARG_ADDRESS_PRIVATE;
88      if (address_qualifier == 1)
89         cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL;
90      else if (address_qualifier == 2)
91         cl_address_qualifier =  CL_KERNEL_ARG_ADDRESS_CONSTANT;
92      else if (address_qualifier == 3)
93         cl_address_qualifier =  CL_KERNEL_ARG_ADDRESS_LOCAL;
94
95      cl_kernel_arg_access_qualifier cl_access_qualifier =
96                                                   CL_KERNEL_ARG_ACCESS_NONE;
97      if (access_qualifier == "read_only")
98         cl_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_ONLY;
99      else if (access_qualifier == "write_only")
100         cl_access_qualifier = CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
101      else if (access_qualifier == "read_write")
102         cl_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_WRITE;
103
104      return binary::arg_info(arg_name, type_name, cl_type_qualifier,
105                              cl_address_qualifier, cl_access_qualifier);
106   }
107
108   std::vector<size_t>
109   get_reqd_work_group_size(const Module &mod,
110                            const std::string &kernel_name) {
111      const Function &f = *mod.getFunction(kernel_name);
112      auto vector_metadata = get_uint_vector_kernel_metadata(f, "reqd_work_group_size");
113
114      return vector_metadata.empty() ? std::vector<size_t>({0, 0, 0}) : vector_metadata;
115   }
116
117
118   std::string
119   kernel_attributes(const Module &mod, const std::string &kernel_name) {
120      std::vector<std::string> attributes;
121
122      const Function &f = *mod.getFunction(kernel_name);
123
124      auto vec_type_hint = get_type_kernel_metadata(f, "vec_type_hint");
125      if (!vec_type_hint.empty())
126         attributes.emplace_back("vec_type_hint(" + vec_type_hint + ")");
127
128      auto work_group_size_hint = get_uint_vector_kernel_metadata(f, "work_group_size_hint");
129      if (!work_group_size_hint.empty()) {
130         std::string s = "work_group_size_hint(";
131         s += detokenize(work_group_size_hint, ",");
132         s += ")";
133         attributes.emplace_back(s);
134      }
135
136      auto reqd_work_group_size = get_uint_vector_kernel_metadata(f, "reqd_work_group_size");
137      if (!reqd_work_group_size.empty()) {
138         std::string s = "reqd_work_group_size(";
139         s += detokenize(reqd_work_group_size, ",");
140         s += ")";
141         attributes.emplace_back(s);
142      }
143
144      auto nosvm = get_str_kernel_metadata(f, "nosvm");
145      if (!nosvm.empty())
146         attributes.emplace_back("nosvm");
147
148      return detokenize(attributes, " ");
149   }
150
151   std::vector<binary::argument>
152   make_kernel_args(const Module &mod, const std::string &kernel_name,
153                    const clang::CompilerInstance &c) {
154      std::vector<binary::argument> args;
155      const Function &f = *mod.getFunction(kernel_name);
156      ::llvm::DataLayout dl(&mod);
157      const auto size_type =
158         dl.getSmallestLegalIntType(mod.getContext(), sizeof(cl_uint) * 8);
159
160      for (const auto &arg : f.args()) {
161         const auto arg_type = arg.getType();
162
163         // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data
164         // type that is not a power of two bytes in size must be
165         // aligned to the next larger power of two.
166         // This rule applies to built-in types only, not structs or unions."
167         const unsigned arg_api_size = dl.getTypeAllocSize(arg_type);
168
169         const unsigned target_size = dl.getTypeStoreSize(arg_type);
170         const unsigned target_align = dl.getABITypeAlignment(arg_type);
171
172         const auto type_name = get_str_argument_metadata(f, arg,
173                                                          "kernel_arg_type");
174         if (type_name == "image2d_t" || type_name == "image3d_t") {
175            // Image.
176            const auto access_qual = get_str_argument_metadata(
177               f, arg, "kernel_arg_access_qual");
178            args.emplace_back(get_image_type(type_name, access_qual),
179                              target_size, target_size,
180                              target_align, binary::argument::zero_ext);
181
182         } else if (type_name == "sampler_t") {
183            args.emplace_back(binary::argument::sampler, arg_api_size,
184                              target_size, target_align,
185                              binary::argument::zero_ext);
186
187         } else if (type_name == "__llvm_image_size") {
188            // Image size implicit argument.
189            args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
190                              dl.getTypeStoreSize(size_type),
191                              dl.getABITypeAlignment(size_type),
192                              binary::argument::zero_ext,
193                              binary::argument::image_size);
194
195         } else if (type_name == "__llvm_image_format") {
196            // Image format implicit argument.
197            args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
198                              dl.getTypeStoreSize(size_type),
199                              dl.getABITypeAlignment(size_type),
200                              binary::argument::zero_ext,
201                              binary::argument::image_format);
202
203         } else {
204            // Other types.
205            const auto actual_type =
206               isa< ::llvm::PointerType>(arg_type) && arg.hasByValAttr() ?
207               cast< ::llvm::PointerType>(arg_type)->getPointerElementType() : arg_type;
208
209            if (actual_type->isPointerTy()) {
210               const unsigned address_space =
211                  cast< ::llvm::PointerType>(actual_type)->getAddressSpace();
212
213               const auto &map = c.getTarget().getAddressSpaceMap();
214               const auto offset =
215                           static_cast<unsigned>(clang::LangAS::opencl_local);
216               if (address_space == map[offset]) {
217                  const auto pointee_type = cast<
218                     ::llvm::PointerType>(actual_type)->getPointerElementType();
219                  args.emplace_back(binary::argument::local, arg_api_size,
220                                    target_size,
221                                    dl.getABITypeAlignment(pointee_type),
222                                    binary::argument::zero_ext);
223               } else {
224                  // XXX: Correctly handle constant address space.  There is no
225                  // way for r600g to pass a handle for constant buffers back
226                  // to clover like it can for global buffers, so
227                  // creating constant arguments will break r600g.  For now,
228                  // continue treating constant buffers as global buffers
229                  // until we can come up with a way to create handles for
230                  // constant buffers.
231                  args.emplace_back(binary::argument::global, arg_api_size,
232                                    target_size, target_align,
233                                    binary::argument::zero_ext);
234               }
235
236            } else {
237               const bool needs_sign_ext = f.getAttributes().hasParamAttr(
238                  arg.getArgNo(), ::llvm::Attribute::SExt);
239
240               args.emplace_back(binary::argument::scalar, arg_api_size,
241                                 target_size, target_align,
242                                 (needs_sign_ext ? binary::argument::sign_ext :
243                                  binary::argument::zero_ext));
244            }
245
246            // Add kernel argument infos if built with -cl-kernel-arg-info.
247            if (c.getCodeGenOpts().EmitOpenCLArgMetadata) {
248               args.back().info = create_arg_info(
249                  get_str_argument_metadata(f, arg, "kernel_arg_name"),
250                  type_name,
251                  get_str_argument_metadata(f, arg, "kernel_arg_type_qual"),
252                  get_uint_argument_metadata(f, arg, "kernel_arg_addr_space"),
253                  get_str_argument_metadata(f, arg, "kernel_arg_access_qual"));
254            }
255         }
256      }
257
258      // Append implicit arguments.  XXX - The types, ordering and
259      // vector size of the implicit arguments should depend on the
260      // target according to the selected calling convention.
261      args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
262                        dl.getTypeStoreSize(size_type),
263                        dl.getABITypeAlignment(size_type),
264                        binary::argument::zero_ext,
265                        binary::argument::grid_dimension);
266
267      args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
268                        dl.getTypeStoreSize(size_type),
269                        dl.getABITypeAlignment(size_type),
270                        binary::argument::zero_ext,
271                        binary::argument::grid_offset);
272
273      return args;
274   }
275
276   binary::section
277   make_text_section(const std::vector<char> &code) {
278      const pipe_binary_program_header header { uint32_t(code.size()) };
279      binary::section text { 0, binary::section::text_executable,
280                             header.num_bytes, {} };
281
282      text.data.insert(text.data.end(), reinterpret_cast<const char *>(&header),
283                       reinterpret_cast<const char *>(&header) + sizeof(header));
284      text.data.insert(text.data.end(), code.begin(), code.end());
285
286      return text;
287   }
288}
289
290binary
291clover::llvm::build_module_common(const Module &mod,
292                                  const std::vector<char> &code,
293                                  const std::map<std::string,
294                                                 unsigned> &offsets,
295                                  const clang::CompilerInstance &c) {
296   binary b;
297
298   for (const auto &llvm_name : map(std::mem_fn(&Function::getName),
299                               get_kernels(mod))) {
300      const ::std::string name(llvm_name);
301      if (offsets.count(name))
302         b.syms.emplace_back(name, kernel_attributes(mod, name),
303                             get_reqd_work_group_size(mod, name),
304                             0, offsets.at(name),
305                             make_kernel_args(mod, name, c));
306   }
307
308   b.secs.push_back(make_text_section(code));
309   return b;
310}
311