1 //
2 // Copyright 2012-2016 Francisco Jerez
3 // Copyright 2012-2016 Advanced Micro Devices, Inc.
4 // Copyright 2015 Zoltan Gilian
5 //
6 // Permission is hereby granted, free of charge, to any person obtaining a
7 // copy of this software and associated documentation files (the "Software"),
8 // to deal in the Software without restriction, including without limitation
9 // the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 // and/or sell copies of the Software, and to permit persons to whom the
11 // Software is furnished to do so, subject to the following conditions:
12 //
13 // The above copyright notice and this permission notice shall be included in
14 // all copies or substantial portions of the Software.
15 //
16 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 // OTHER DEALINGS IN THE SOFTWARE.
23 //
24 
25 ///
26 /// \file
27 /// Codegen back-end-independent part of the construction of an executable
28 /// clover::binary, including kernel argument metadata extraction and
29 /// formatting of the pre-generated binary code in a form that can be
30 /// understood by pipe drivers.
31 ///
32 
33 #include <llvm/IR/Type.h>
34 #include <llvm/Support/Allocator.h>
35 
36 #include "llvm/codegen.hpp"
37 #include "llvm/metadata.hpp"
38 
39 #include "CL/cl.h"
40 
41 #include "pipe/p_state.h"
42 #include "util/u_math.h"
43 
44 #include <clang/Basic/TargetInfo.h>
45 
46 using clover::binary;
47 using clover::detokenize;
48 using namespace clover::llvm;
49 
50 using ::llvm::Module;
51 using ::llvm::Function;
52 using ::llvm::Type;
53 using ::llvm::isa;
54 using ::llvm::cast;
55 using ::llvm::dyn_cast;
56 
57 namespace {
58    enum binary::argument::type
get_image_type(const std::string &type, const std::string &qual)59    get_image_type(const std::string &type,
60                   const std::string &qual) {
61       if (type == "image1d_t" || type == "image2d_t" || type == "image3d_t") {
62          if (qual == "read_only")
63             return binary::argument::image_rd;
64          else if (qual == "write_only")
65             return binary::argument::image_wr;
66       }
67 
68       unreachable("Unsupported image type");
69    }
70 
create_arg_info(const std::string &arg_name, const std::string &type_name, const std::string &type_qualifier, const uint64_t address_qualifier, const std::string &access_qualifier)71    binary::arg_info create_arg_info(const std::string &arg_name,
72                                     const std::string &type_name,
73                                     const std::string &type_qualifier,
74                                     const uint64_t address_qualifier,
75                                     const std::string &access_qualifier) {
76 
77       cl_kernel_arg_type_qualifier cl_type_qualifier =
78                                                    CL_KERNEL_ARG_TYPE_NONE;
79       if (type_qualifier.find("const") != std::string::npos)
80          cl_type_qualifier |= CL_KERNEL_ARG_TYPE_CONST;
81       if (type_qualifier.find("restrict") != std::string::npos)
82          cl_type_qualifier |=  CL_KERNEL_ARG_TYPE_RESTRICT;
83       if (type_qualifier.find("volatile") != std::string::npos)
84          cl_type_qualifier |=  CL_KERNEL_ARG_TYPE_VOLATILE;
85 
86       cl_kernel_arg_address_qualifier cl_address_qualifier =
87                                              CL_KERNEL_ARG_ADDRESS_PRIVATE;
88       if (address_qualifier == 1)
89          cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL;
90       else if (address_qualifier == 2)
91          cl_address_qualifier =  CL_KERNEL_ARG_ADDRESS_CONSTANT;
92       else if (address_qualifier == 3)
93          cl_address_qualifier =  CL_KERNEL_ARG_ADDRESS_LOCAL;
94 
95       cl_kernel_arg_access_qualifier cl_access_qualifier =
96                                                    CL_KERNEL_ARG_ACCESS_NONE;
97       if (access_qualifier == "read_only")
98          cl_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_ONLY;
99       else if (access_qualifier == "write_only")
100          cl_access_qualifier = CL_KERNEL_ARG_ACCESS_WRITE_ONLY;
101       else if (access_qualifier == "read_write")
102          cl_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_WRITE;
103 
104       return binary::arg_info(arg_name, type_name, cl_type_qualifier,
105                               cl_address_qualifier, cl_access_qualifier);
106    }
107 
108    std::vector<size_t>
get_reqd_work_group_size(const Module &mod, const std::string &kernel_name)109    get_reqd_work_group_size(const Module &mod,
110                             const std::string &kernel_name) {
111       const Function &f = *mod.getFunction(kernel_name);
112       auto vector_metadata = get_uint_vector_kernel_metadata(f, "reqd_work_group_size");
113 
114       return vector_metadata.empty() ? std::vector<size_t>({0, 0, 0}) : vector_metadata;
115    }
116 
117 
118    std::string
kernel_attributes(const Module &mod, const std::string &kernel_name)119    kernel_attributes(const Module &mod, const std::string &kernel_name) {
120       std::vector<std::string> attributes;
121 
122       const Function &f = *mod.getFunction(kernel_name);
123 
124       auto vec_type_hint = get_type_kernel_metadata(f, "vec_type_hint");
125       if (!vec_type_hint.empty())
126          attributes.emplace_back("vec_type_hint(" + vec_type_hint + ")");
127 
128       auto work_group_size_hint = get_uint_vector_kernel_metadata(f, "work_group_size_hint");
129       if (!work_group_size_hint.empty()) {
130          std::string s = "work_group_size_hint(";
131          s += detokenize(work_group_size_hint, ",");
132          s += ")";
133          attributes.emplace_back(s);
134       }
135 
136       auto reqd_work_group_size = get_uint_vector_kernel_metadata(f, "reqd_work_group_size");
137       if (!reqd_work_group_size.empty()) {
138          std::string s = "reqd_work_group_size(";
139          s += detokenize(reqd_work_group_size, ",");
140          s += ")";
141          attributes.emplace_back(s);
142       }
143 
144       auto nosvm = get_str_kernel_metadata(f, "nosvm");
145       if (!nosvm.empty())
146          attributes.emplace_back("nosvm");
147 
148       return detokenize(attributes, " ");
149    }
150 
151    std::vector<binary::argument>
make_kernel_args(const Module &mod, const std::string &kernel_name, const clang::CompilerInstance &c)152    make_kernel_args(const Module &mod, const std::string &kernel_name,
153                     const clang::CompilerInstance &c) {
154       std::vector<binary::argument> args;
155       const Function &f = *mod.getFunction(kernel_name);
156       ::llvm::DataLayout dl(&mod);
157       const auto size_type =
158          dl.getSmallestLegalIntType(mod.getContext(), sizeof(cl_uint) * 8);
159 
160       for (const auto &arg : f.args()) {
161          const auto arg_type = arg.getType();
162 
163          // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data
164          // type that is not a power of two bytes in size must be
165          // aligned to the next larger power of two.
166          // This rule applies to built-in types only, not structs or unions."
167          const unsigned arg_api_size = dl.getTypeAllocSize(arg_type);
168 
169          const unsigned target_size = dl.getTypeStoreSize(arg_type);
170          const unsigned target_align = dl.getABITypeAlignment(arg_type);
171 
172          const auto type_name = get_str_argument_metadata(f, arg,
173                                                           "kernel_arg_type");
174          if (type_name == "image2d_t" || type_name == "image3d_t") {
175             // Image.
176             const auto access_qual = get_str_argument_metadata(
177                f, arg, "kernel_arg_access_qual");
178             args.emplace_back(get_image_type(type_name, access_qual),
179                               target_size, target_size,
180                               target_align, binary::argument::zero_ext);
181 
182          } else if (type_name == "sampler_t") {
183             args.emplace_back(binary::argument::sampler, arg_api_size,
184                               target_size, target_align,
185                               binary::argument::zero_ext);
186 
187          } else if (type_name == "__llvm_image_size") {
188             // Image size implicit argument.
189             args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
190                               dl.getTypeStoreSize(size_type),
191                               dl.getABITypeAlignment(size_type),
192                               binary::argument::zero_ext,
193                               binary::argument::image_size);
194 
195          } else if (type_name == "__llvm_image_format") {
196             // Image format implicit argument.
197             args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
198                               dl.getTypeStoreSize(size_type),
199                               dl.getABITypeAlignment(size_type),
200                               binary::argument::zero_ext,
201                               binary::argument::image_format);
202 
203          } else {
204             // Other types.
205             const auto actual_type =
206                isa< ::llvm::PointerType>(arg_type) && arg.hasByValAttr() ?
207                cast< ::llvm::PointerType>(arg_type)->getPointerElementType() : arg_type;
208 
209             if (actual_type->isPointerTy()) {
210                const unsigned address_space =
211                   cast< ::llvm::PointerType>(actual_type)->getAddressSpace();
212 
213                const auto &map = c.getTarget().getAddressSpaceMap();
214                const auto offset =
215                            static_cast<unsigned>(clang::LangAS::opencl_local);
216                if (address_space == map[offset]) {
217                   const auto pointee_type = cast<
218                      ::llvm::PointerType>(actual_type)->getPointerElementType();
219                   args.emplace_back(binary::argument::local, arg_api_size,
220                                     target_size,
221                                     dl.getABITypeAlignment(pointee_type),
222                                     binary::argument::zero_ext);
223                } else {
224                   // XXX: Correctly handle constant address space.  There is no
225                   // way for r600g to pass a handle for constant buffers back
226                   // to clover like it can for global buffers, so
227                   // creating constant arguments will break r600g.  For now,
228                   // continue treating constant buffers as global buffers
229                   // until we can come up with a way to create handles for
230                   // constant buffers.
231                   args.emplace_back(binary::argument::global, arg_api_size,
232                                     target_size, target_align,
233                                     binary::argument::zero_ext);
234                }
235 
236             } else {
237                const bool needs_sign_ext = f.getAttributes().hasParamAttr(
238                   arg.getArgNo(), ::llvm::Attribute::SExt);
239 
240                args.emplace_back(binary::argument::scalar, arg_api_size,
241                                  target_size, target_align,
242                                  (needs_sign_ext ? binary::argument::sign_ext :
243                                   binary::argument::zero_ext));
244             }
245 
246             // Add kernel argument infos if built with -cl-kernel-arg-info.
247             if (c.getCodeGenOpts().EmitOpenCLArgMetadata) {
248                args.back().info = create_arg_info(
249                   get_str_argument_metadata(f, arg, "kernel_arg_name"),
250                   type_name,
251                   get_str_argument_metadata(f, arg, "kernel_arg_type_qual"),
252                   get_uint_argument_metadata(f, arg, "kernel_arg_addr_space"),
253                   get_str_argument_metadata(f, arg, "kernel_arg_access_qual"));
254             }
255          }
256       }
257 
258       // Append implicit arguments.  XXX - The types, ordering and
259       // vector size of the implicit arguments should depend on the
260       // target according to the selected calling convention.
261       args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
262                         dl.getTypeStoreSize(size_type),
263                         dl.getABITypeAlignment(size_type),
264                         binary::argument::zero_ext,
265                         binary::argument::grid_dimension);
266 
267       args.emplace_back(binary::argument::scalar, sizeof(cl_uint),
268                         dl.getTypeStoreSize(size_type),
269                         dl.getABITypeAlignment(size_type),
270                         binary::argument::zero_ext,
271                         binary::argument::grid_offset);
272 
273       return args;
274    }
275 
276    binary::section
make_text_section(const std::vector<char> &code)277    make_text_section(const std::vector<char> &code) {
278       const pipe_binary_program_header header { uint32_t(code.size()) };
279       binary::section text { 0, binary::section::text_executable,
280                              header.num_bytes, {} };
281 
282       text.data.insert(text.data.end(), reinterpret_cast<const char *>(&header),
283                        reinterpret_cast<const char *>(&header) + sizeof(header));
284       text.data.insert(text.data.end(), code.begin(), code.end());
285 
286       return text;
287    }
288 }
289 
290 binary
build_module_common(const Module &mod, const std::vector<char> &code, const std::map<std::string, unsigned> &offsets, const clang::CompilerInstance &c)291 clover::llvm::build_module_common(const Module &mod,
292                                   const std::vector<char> &code,
293                                   const std::map<std::string,
294                                                  unsigned> &offsets,
295                                   const clang::CompilerInstance &c) {
296    binary b;
297 
298    for (const auto &llvm_name : map(std::mem_fn(&Function::getName),
299                                get_kernels(mod))) {
300       const ::std::string name(llvm_name);
301       if (offsets.count(name))
302          b.syms.emplace_back(name, kernel_attributes(mod, name),
303                              get_reqd_work_group_size(mod, name),
304                              0, offsets.at(name),
305                              make_kernel_args(mod, name, c));
306    }
307 
308    b.secs.push_back(make_text_section(code));
309    return b;
310 }
311