1// 2// Copyright 2012-2016 Francisco Jerez 3// Copyright 2012-2016 Advanced Micro Devices, Inc. 4// Copyright 2015 Zoltan Gilian 5// 6// Permission is hereby granted, free of charge, to any person obtaining a 7// copy of this software and associated documentation files (the "Software"), 8// to deal in the Software without restriction, including without limitation 9// the rights to use, copy, modify, merge, publish, distribute, sublicense, 10// and/or sell copies of the Software, and to permit persons to whom the 11// Software is furnished to do so, subject to the following conditions: 12// 13// The above copyright notice and this permission notice shall be included in 14// all copies or substantial portions of the Software. 15// 16// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22// OTHER DEALINGS IN THE SOFTWARE. 23// 24 25/// 26/// \file 27/// Codegen back-end-independent part of the construction of an executable 28/// clover::binary, including kernel argument metadata extraction and 29/// formatting of the pre-generated binary code in a form that can be 30/// understood by pipe drivers. 31/// 32 33#include <llvm/IR/Type.h> 34#include <llvm/Support/Allocator.h> 35 36#include "llvm/codegen.hpp" 37#include "llvm/metadata.hpp" 38 39#include "CL/cl.h" 40 41#include "pipe/p_state.h" 42#include "util/u_math.h" 43 44#include <clang/Basic/TargetInfo.h> 45 46using clover::binary; 47using clover::detokenize; 48using namespace clover::llvm; 49 50using ::llvm::Module; 51using ::llvm::Function; 52using ::llvm::Type; 53using ::llvm::isa; 54using ::llvm::cast; 55using ::llvm::dyn_cast; 56 57namespace { 58 enum binary::argument::type 59 get_image_type(const std::string &type, 60 const std::string &qual) { 61 if (type == "image1d_t" || type == "image2d_t" || type == "image3d_t") { 62 if (qual == "read_only") 63 return binary::argument::image_rd; 64 else if (qual == "write_only") 65 return binary::argument::image_wr; 66 } 67 68 unreachable("Unsupported image type"); 69 } 70 71 binary::arg_info create_arg_info(const std::string &arg_name, 72 const std::string &type_name, 73 const std::string &type_qualifier, 74 const uint64_t address_qualifier, 75 const std::string &access_qualifier) { 76 77 cl_kernel_arg_type_qualifier cl_type_qualifier = 78 CL_KERNEL_ARG_TYPE_NONE; 79 if (type_qualifier.find("const") != std::string::npos) 80 cl_type_qualifier |= CL_KERNEL_ARG_TYPE_CONST; 81 if (type_qualifier.find("restrict") != std::string::npos) 82 cl_type_qualifier |= CL_KERNEL_ARG_TYPE_RESTRICT; 83 if (type_qualifier.find("volatile") != std::string::npos) 84 cl_type_qualifier |= CL_KERNEL_ARG_TYPE_VOLATILE; 85 86 cl_kernel_arg_address_qualifier cl_address_qualifier = 87 CL_KERNEL_ARG_ADDRESS_PRIVATE; 88 if (address_qualifier == 1) 89 cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_GLOBAL; 90 else if (address_qualifier == 2) 91 cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_CONSTANT; 92 else if (address_qualifier == 3) 93 cl_address_qualifier = CL_KERNEL_ARG_ADDRESS_LOCAL; 94 95 cl_kernel_arg_access_qualifier cl_access_qualifier = 96 CL_KERNEL_ARG_ACCESS_NONE; 97 if (access_qualifier == "read_only") 98 cl_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_ONLY; 99 else if (access_qualifier == "write_only") 100 cl_access_qualifier = CL_KERNEL_ARG_ACCESS_WRITE_ONLY; 101 else if (access_qualifier == "read_write") 102 cl_access_qualifier = CL_KERNEL_ARG_ACCESS_READ_WRITE; 103 104 return binary::arg_info(arg_name, type_name, cl_type_qualifier, 105 cl_address_qualifier, cl_access_qualifier); 106 } 107 108 std::vector<size_t> 109 get_reqd_work_group_size(const Module &mod, 110 const std::string &kernel_name) { 111 const Function &f = *mod.getFunction(kernel_name); 112 auto vector_metadata = get_uint_vector_kernel_metadata(f, "reqd_work_group_size"); 113 114 return vector_metadata.empty() ? std::vector<size_t>({0, 0, 0}) : vector_metadata; 115 } 116 117 118 std::string 119 kernel_attributes(const Module &mod, const std::string &kernel_name) { 120 std::vector<std::string> attributes; 121 122 const Function &f = *mod.getFunction(kernel_name); 123 124 auto vec_type_hint = get_type_kernel_metadata(f, "vec_type_hint"); 125 if (!vec_type_hint.empty()) 126 attributes.emplace_back("vec_type_hint(" + vec_type_hint + ")"); 127 128 auto work_group_size_hint = get_uint_vector_kernel_metadata(f, "work_group_size_hint"); 129 if (!work_group_size_hint.empty()) { 130 std::string s = "work_group_size_hint("; 131 s += detokenize(work_group_size_hint, ","); 132 s += ")"; 133 attributes.emplace_back(s); 134 } 135 136 auto reqd_work_group_size = get_uint_vector_kernel_metadata(f, "reqd_work_group_size"); 137 if (!reqd_work_group_size.empty()) { 138 std::string s = "reqd_work_group_size("; 139 s += detokenize(reqd_work_group_size, ","); 140 s += ")"; 141 attributes.emplace_back(s); 142 } 143 144 auto nosvm = get_str_kernel_metadata(f, "nosvm"); 145 if (!nosvm.empty()) 146 attributes.emplace_back("nosvm"); 147 148 return detokenize(attributes, " "); 149 } 150 151 std::vector<binary::argument> 152 make_kernel_args(const Module &mod, const std::string &kernel_name, 153 const clang::CompilerInstance &c) { 154 std::vector<binary::argument> args; 155 const Function &f = *mod.getFunction(kernel_name); 156 ::llvm::DataLayout dl(&mod); 157 const auto size_type = 158 dl.getSmallestLegalIntType(mod.getContext(), sizeof(cl_uint) * 8); 159 160 for (const auto &arg : f.args()) { 161 const auto arg_type = arg.getType(); 162 163 // OpenCL 1.2 specification, Ch. 6.1.5: "A built-in data 164 // type that is not a power of two bytes in size must be 165 // aligned to the next larger power of two. 166 // This rule applies to built-in types only, not structs or unions." 167 const unsigned arg_api_size = dl.getTypeAllocSize(arg_type); 168 169 const unsigned target_size = dl.getTypeStoreSize(arg_type); 170 const unsigned target_align = dl.getABITypeAlignment(arg_type); 171 172 const auto type_name = get_str_argument_metadata(f, arg, 173 "kernel_arg_type"); 174 if (type_name == "image2d_t" || type_name == "image3d_t") { 175 // Image. 176 const auto access_qual = get_str_argument_metadata( 177 f, arg, "kernel_arg_access_qual"); 178 args.emplace_back(get_image_type(type_name, access_qual), 179 target_size, target_size, 180 target_align, binary::argument::zero_ext); 181 182 } else if (type_name == "sampler_t") { 183 args.emplace_back(binary::argument::sampler, arg_api_size, 184 target_size, target_align, 185 binary::argument::zero_ext); 186 187 } else if (type_name == "__llvm_image_size") { 188 // Image size implicit argument. 189 args.emplace_back(binary::argument::scalar, sizeof(cl_uint), 190 dl.getTypeStoreSize(size_type), 191 dl.getABITypeAlignment(size_type), 192 binary::argument::zero_ext, 193 binary::argument::image_size); 194 195 } else if (type_name == "__llvm_image_format") { 196 // Image format implicit argument. 197 args.emplace_back(binary::argument::scalar, sizeof(cl_uint), 198 dl.getTypeStoreSize(size_type), 199 dl.getABITypeAlignment(size_type), 200 binary::argument::zero_ext, 201 binary::argument::image_format); 202 203 } else { 204 // Other types. 205 const auto actual_type = 206 isa< ::llvm::PointerType>(arg_type) && arg.hasByValAttr() ? 207 cast< ::llvm::PointerType>(arg_type)->getPointerElementType() : arg_type; 208 209 if (actual_type->isPointerTy()) { 210 const unsigned address_space = 211 cast< ::llvm::PointerType>(actual_type)->getAddressSpace(); 212 213 const auto &map = c.getTarget().getAddressSpaceMap(); 214 const auto offset = 215 static_cast<unsigned>(clang::LangAS::opencl_local); 216 if (address_space == map[offset]) { 217 const auto pointee_type = cast< 218 ::llvm::PointerType>(actual_type)->getPointerElementType(); 219 args.emplace_back(binary::argument::local, arg_api_size, 220 target_size, 221 dl.getABITypeAlignment(pointee_type), 222 binary::argument::zero_ext); 223 } else { 224 // XXX: Correctly handle constant address space. There is no 225 // way for r600g to pass a handle for constant buffers back 226 // to clover like it can for global buffers, so 227 // creating constant arguments will break r600g. For now, 228 // continue treating constant buffers as global buffers 229 // until we can come up with a way to create handles for 230 // constant buffers. 231 args.emplace_back(binary::argument::global, arg_api_size, 232 target_size, target_align, 233 binary::argument::zero_ext); 234 } 235 236 } else { 237 const bool needs_sign_ext = f.getAttributes().hasParamAttr( 238 arg.getArgNo(), ::llvm::Attribute::SExt); 239 240 args.emplace_back(binary::argument::scalar, arg_api_size, 241 target_size, target_align, 242 (needs_sign_ext ? binary::argument::sign_ext : 243 binary::argument::zero_ext)); 244 } 245 246 // Add kernel argument infos if built with -cl-kernel-arg-info. 247 if (c.getCodeGenOpts().EmitOpenCLArgMetadata) { 248 args.back().info = create_arg_info( 249 get_str_argument_metadata(f, arg, "kernel_arg_name"), 250 type_name, 251 get_str_argument_metadata(f, arg, "kernel_arg_type_qual"), 252 get_uint_argument_metadata(f, arg, "kernel_arg_addr_space"), 253 get_str_argument_metadata(f, arg, "kernel_arg_access_qual")); 254 } 255 } 256 } 257 258 // Append implicit arguments. XXX - The types, ordering and 259 // vector size of the implicit arguments should depend on the 260 // target according to the selected calling convention. 261 args.emplace_back(binary::argument::scalar, sizeof(cl_uint), 262 dl.getTypeStoreSize(size_type), 263 dl.getABITypeAlignment(size_type), 264 binary::argument::zero_ext, 265 binary::argument::grid_dimension); 266 267 args.emplace_back(binary::argument::scalar, sizeof(cl_uint), 268 dl.getTypeStoreSize(size_type), 269 dl.getABITypeAlignment(size_type), 270 binary::argument::zero_ext, 271 binary::argument::grid_offset); 272 273 return args; 274 } 275 276 binary::section 277 make_text_section(const std::vector<char> &code) { 278 const pipe_binary_program_header header { uint32_t(code.size()) }; 279 binary::section text { 0, binary::section::text_executable, 280 header.num_bytes, {} }; 281 282 text.data.insert(text.data.end(), reinterpret_cast<const char *>(&header), 283 reinterpret_cast<const char *>(&header) + sizeof(header)); 284 text.data.insert(text.data.end(), code.begin(), code.end()); 285 286 return text; 287 } 288} 289 290binary 291clover::llvm::build_module_common(const Module &mod, 292 const std::vector<char> &code, 293 const std::map<std::string, 294 unsigned> &offsets, 295 const clang::CompilerInstance &c) { 296 binary b; 297 298 for (const auto &llvm_name : map(std::mem_fn(&Function::getName), 299 get_kernels(mod))) { 300 const ::std::string name(llvm_name); 301 if (offsets.count(name)) 302 b.syms.emplace_back(name, kernel_attributes(mod, name), 303 get_reqd_work_group_size(mod, name), 304 0, offsets.at(name), 305 make_kernel_args(mod, name, c)); 306 } 307 308 b.secs.push_back(make_text_section(code)); 309 return b; 310} 311