1// 2// Copyright 2012 Francisco Jerez 3// 4// Permission is hereby granted, free of charge, to any person obtaining a 5// copy of this software and associated documentation files (the "Software"), 6// to deal in the Software without restriction, including without limitation 7// the rights to use, copy, modify, merge, publish, distribute, sublicense, 8// and/or sell copies of the Software, and to permit persons to whom the 9// Software is furnished to do so, subject to the following conditions: 10// 11// The above copyright notice and this permission notice shall be included in 12// all copies or substantial portions of the Software. 13// 14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20// OTHER DEALINGS IN THE SOFTWARE. 21// 22 23#include "api/util.hpp" 24#include "core/kernel.hpp" 25#include "core/event.hpp" 26 27using namespace clover; 28 29CLOVER_API cl_kernel 30clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try { 31 auto &prog = obj(d_prog); 32 33 if (!name) 34 throw error(CL_INVALID_VALUE); 35 36 auto &sym = find(name_equals(name), prog.symbols()); 37 38 ret_error(r_errcode, CL_SUCCESS); 39 return new kernel(prog, name, range(sym.args)); 40 41} catch (std::out_of_range &) { 42 ret_error(r_errcode, CL_INVALID_KERNEL_NAME); 43 return NULL; 44 45} catch (error &e) { 46 ret_error(r_errcode, e); 47 return NULL; 48} 49 50CLOVER_API cl_int 51clCreateKernelsInProgram(cl_program d_prog, cl_uint count, 52 cl_kernel *rd_kerns, cl_uint *r_count) try { 53 auto &prog = obj(d_prog); 54 auto &syms = prog.symbols(); 55 56 if (rd_kerns && count < syms.size()) 57 throw error(CL_INVALID_VALUE); 58 59 if (rd_kerns) 60 copy(map([&](const binary::symbol &sym) { 61 return desc(new kernel(prog, 62 std::string(sym.name.begin(), 63 sym.name.end()), 64 range(sym.args))); 65 }, syms), 66 rd_kerns); 67 68 if (r_count) 69 *r_count = syms.size(); 70 71 return CL_SUCCESS; 72 73} catch (error &e) { 74 return e.get(); 75} 76 77CLOVER_API cl_int 78clRetainKernel(cl_kernel d_kern) try { 79 obj(d_kern).retain(); 80 return CL_SUCCESS; 81 82} catch (error &e) { 83 return e.get(); 84} 85 86CLOVER_API cl_int 87clReleaseKernel(cl_kernel d_kern) try { 88 if (obj(d_kern).release()) 89 delete pobj(d_kern); 90 91 return CL_SUCCESS; 92 93} catch (error &e) { 94 return e.get(); 95} 96 97CLOVER_API cl_int 98clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size, 99 const void *value) try { 100 obj(d_kern).args().at(idx).set(size, value); 101 return CL_SUCCESS; 102 103} catch (std::out_of_range &) { 104 return CL_INVALID_ARG_INDEX; 105 106} catch (error &e) { 107 return e.get(); 108} 109 110CLOVER_API cl_int 111clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param, 112 size_t size, void *r_buf, size_t *r_size) try { 113 property_buffer buf { r_buf, size, r_size }; 114 auto &kern = obj(d_kern); 115 116 switch (param) { 117 case CL_KERNEL_FUNCTION_NAME: 118 buf.as_string() = kern.name(); 119 break; 120 121 case CL_KERNEL_NUM_ARGS: 122 buf.as_scalar<cl_uint>() = kern.args().size(); 123 break; 124 125 case CL_KERNEL_REFERENCE_COUNT: 126 buf.as_scalar<cl_uint>() = kern.ref_count(); 127 break; 128 129 case CL_KERNEL_CONTEXT: 130 buf.as_scalar<cl_context>() = desc(kern.program().context()); 131 break; 132 133 case CL_KERNEL_PROGRAM: 134 buf.as_scalar<cl_program>() = desc(kern.program()); 135 break; 136 137 case CL_KERNEL_ATTRIBUTES: 138 buf.as_string() = find(name_equals(kern.name()), kern.program().symbols()).attributes; 139 break; 140 141 default: 142 throw error(CL_INVALID_VALUE); 143 } 144 145 return CL_SUCCESS; 146 147} catch (error &e) { 148 return e.get(); 149} 150 151CLOVER_API cl_int 152clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev, 153 cl_kernel_work_group_info param, 154 size_t size, void *r_buf, size_t *r_size) try { 155 property_buffer buf { r_buf, size, r_size }; 156 auto &kern = obj(d_kern); 157 auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.program().devices())); 158 159 if (!count(dev, kern.program().devices())) 160 throw error(CL_INVALID_DEVICE); 161 162 switch (param) { 163 case CL_KERNEL_WORK_GROUP_SIZE: 164 buf.as_scalar<size_t>() = dev.max_threads_per_block(); 165 break; 166 167 case CL_KERNEL_COMPILE_WORK_GROUP_SIZE: 168 buf.as_vector<size_t>() = kern.required_block_size(); 169 break; 170 171 case CL_KERNEL_LOCAL_MEM_SIZE: 172 buf.as_scalar<cl_ulong>() = kern.mem_local(); 173 break; 174 175 case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE: 176 buf.as_scalar<size_t>() = dev.subgroup_size(); 177 break; 178 179 case CL_KERNEL_PRIVATE_MEM_SIZE: 180 buf.as_scalar<cl_ulong>() = kern.mem_private(); 181 break; 182 183 default: 184 throw error(CL_INVALID_VALUE); 185 } 186 187 return CL_SUCCESS; 188 189} catch (error &e) { 190 return e.get(); 191 192} catch (std::out_of_range &) { 193 return CL_INVALID_DEVICE; 194} 195 196CLOVER_API cl_int 197clGetKernelArgInfo(cl_kernel d_kern, 198 cl_uint idx, cl_kernel_arg_info param, 199 size_t size, void *r_buf, size_t *r_size) try { 200 property_buffer buf { r_buf, size, r_size }; 201 202 auto info = obj(d_kern).args_infos().at(idx); 203 204 if (info.arg_name.empty()) 205 return CL_KERNEL_ARG_INFO_NOT_AVAILABLE; 206 207 switch (param) { 208 case CL_KERNEL_ARG_ADDRESS_QUALIFIER: 209 buf.as_scalar<cl_kernel_arg_address_qualifier>() = info.address_qualifier; 210 break; 211 212 case CL_KERNEL_ARG_ACCESS_QUALIFIER: 213 buf.as_scalar<cl_kernel_arg_access_qualifier>() = info.access_qualifier; 214 break; 215 216 case CL_KERNEL_ARG_TYPE_NAME: 217 buf.as_string() = info.type_name; 218 break; 219 220 case CL_KERNEL_ARG_TYPE_QUALIFIER: 221 buf.as_scalar<cl_kernel_arg_type_qualifier>() = info.type_qualifier; 222 break; 223 224 case CL_KERNEL_ARG_NAME: 225 buf.as_string() = info.arg_name; 226 break; 227 228 default: 229 throw error(CL_INVALID_VALUE); 230 } 231 232 return CL_SUCCESS; 233 234} catch (std::out_of_range &) { 235 return CL_INVALID_ARG_INDEX; 236 237} catch (error &e) { 238 return e.get(); 239} 240 241namespace { 242 /// 243 /// Common argument checking shared by kernel invocation commands. 244 /// 245 void 246 validate_common(const command_queue &q, kernel &kern, 247 const ref_vector<event> &deps) { 248 if (kern.program().context() != q.context() || 249 any_of([&](const event &ev) { 250 return ev.context() != q.context(); 251 }, deps)) 252 throw error(CL_INVALID_CONTEXT); 253 254 if (any_of([](kernel::argument &arg) { 255 return !arg.set(); 256 }, kern.args())) 257 throw error(CL_INVALID_KERNEL_ARGS); 258 259 // If the command queue's device is not associated to the program, we get 260 // a binary, with no sections, which will also fail the following test. 261 auto &b = kern.program().build(q.device()).bin; 262 if (!any_of(type_equals(binary::section::text_executable), b.secs)) 263 throw error(CL_INVALID_PROGRAM_EXECUTABLE); 264 } 265 266 std::vector<size_t> 267 validate_grid_size(const command_queue &q, cl_uint dims, 268 const size_t *d_grid_size) { 269 auto grid_size = range(d_grid_size, dims); 270 271 if (dims < 1 || dims > q.device().max_block_size().size()) 272 throw error(CL_INVALID_WORK_DIMENSION); 273 274 return grid_size; 275 } 276 277 std::vector<size_t> 278 validate_grid_offset(const command_queue &q, cl_uint dims, 279 const size_t *d_grid_offset) { 280 if (d_grid_offset) 281 return range(d_grid_offset, dims); 282 else 283 return std::vector<size_t>(dims, 0); 284 } 285 286 std::vector<size_t> 287 validate_block_size(const command_queue &q, const kernel &kern, 288 cl_uint dims, const size_t *d_grid_size, 289 const size_t *d_block_size) { 290 auto grid_size = range(d_grid_size, dims); 291 292 if (d_block_size) { 293 auto block_size = range(d_block_size, dims); 294 295 if (any_of(is_zero(), block_size) || 296 any_of(greater(), block_size, q.device().max_block_size())) 297 throw error(CL_INVALID_WORK_ITEM_SIZE); 298 299 if (any_of(modulus(), grid_size, block_size)) 300 throw error(CL_INVALID_WORK_GROUP_SIZE); 301 302 if (fold(multiplies(), 1u, block_size) > 303 q.device().max_threads_per_block()) 304 throw error(CL_INVALID_WORK_GROUP_SIZE); 305 306 return block_size; 307 308 } else { 309 return kern.optimal_block_size(q, grid_size); 310 } 311 } 312} 313 314CLOVER_API cl_int 315clEnqueueNDRangeKernel(cl_command_queue d_q, cl_kernel d_kern, 316 cl_uint dims, const size_t *d_grid_offset, 317 const size_t *d_grid_size, const size_t *d_block_size, 318 cl_uint num_deps, const cl_event *d_deps, 319 cl_event *rd_ev) try { 320 auto &q = obj(d_q); 321 auto &kern = obj(d_kern); 322 auto deps = objs<wait_list_tag>(d_deps, num_deps); 323 auto grid_size = validate_grid_size(q, dims, d_grid_size); 324 auto grid_offset = validate_grid_offset(q, dims, d_grid_offset); 325 auto block_size = validate_block_size(q, kern, dims, 326 d_grid_size, d_block_size); 327 328 validate_common(q, kern, deps); 329 330 auto hev = create<hard_event>( 331 q, CL_COMMAND_NDRANGE_KERNEL, deps, 332 [=, &kern, &q](event &) { 333 kern.launch(q, grid_offset, grid_size, block_size); 334 }); 335 336 ret_object(rd_ev, hev); 337 return CL_SUCCESS; 338 339} catch (error &e) { 340 return e.get(); 341} 342 343CLOVER_API cl_int 344clEnqueueTask(cl_command_queue d_q, cl_kernel d_kern, 345 cl_uint num_deps, const cl_event *d_deps, 346 cl_event *rd_ev) try { 347 auto &q = obj(d_q); 348 auto &kern = obj(d_kern); 349 auto deps = objs<wait_list_tag>(d_deps, num_deps); 350 351 validate_common(q, kern, deps); 352 353 auto hev = create<hard_event>( 354 q, CL_COMMAND_TASK, deps, 355 [=, &kern, &q](event &) { 356 kern.launch(q, { 0 }, { 1 }, { 1 }); 357 }); 358 359 ret_object(rd_ev, hev); 360 return CL_SUCCESS; 361 362} catch (error &e) { 363 return e.get(); 364} 365 366CLOVER_API cl_int 367clEnqueueNativeKernel(cl_command_queue d_q, void (*func)(void *), 368 void *args, size_t args_size, 369 cl_uint num_mems, const cl_mem *d_mems, 370 const void **mem_handles, cl_uint num_deps, 371 const cl_event *d_deps, cl_event *rd_ev) { 372 return CL_INVALID_OPERATION; 373} 374 375CLOVER_API cl_int 376clSetKernelArgSVMPointer(cl_kernel d_kern, 377 cl_uint arg_index, 378 const void *arg_value) try { 379 if (!any_of(std::mem_fn(&device::svm_support), obj(d_kern).program().devices())) 380 return CL_INVALID_OPERATION; 381 obj(d_kern).args().at(arg_index).set_svm(arg_value); 382 return CL_SUCCESS; 383 384} catch (std::out_of_range &) { 385 return CL_INVALID_ARG_INDEX; 386 387} catch (error &e) { 388 return e.get(); 389} 390 391CLOVER_API cl_int 392clSetKernelExecInfo(cl_kernel d_kern, 393 cl_kernel_exec_info param_name, 394 size_t param_value_size, 395 const void *param_value) try { 396 397 if (!any_of(std::mem_fn(&device::svm_support), obj(d_kern).program().devices())) 398 return CL_INVALID_OPERATION; 399 400 auto &kern = obj(d_kern); 401 402 const bool has_system_svm = all_of(std::mem_fn(&device::has_system_svm), 403 kern.program().context().devices()); 404 405 if (!param_value) 406 return CL_INVALID_VALUE; 407 408 switch (param_name) { 409 case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM: 410 case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM: { 411 if (param_value_size != sizeof(cl_bool)) 412 return CL_INVALID_VALUE; 413 414 cl_bool val = *static_cast<const cl_bool*>(param_value); 415 if (val == CL_TRUE && !has_system_svm) 416 return CL_INVALID_OPERATION; 417 else 418 return CL_SUCCESS; 419 } 420 421 case CL_KERNEL_EXEC_INFO_SVM_PTRS: 422 case CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM: 423 if (has_system_svm) 424 return CL_SUCCESS; 425 426 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 427 return CL_INVALID_VALUE; 428 429 default: 430 return CL_INVALID_VALUE; 431 } 432 433} catch (error &e) { 434 return e.get(); 435} 436