1//
2// Copyright 2012 Francisco Jerez
3//
4// Permission is hereby granted, free of charge, to any person obtaining a
5// copy of this software and associated documentation files (the "Software"),
6// to deal in the Software without restriction, including without limitation
7// the rights to use, copy, modify, merge, publish, distribute, sublicense,
8// and/or sell copies of the Software, and to permit persons to whom the
9// Software is furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in
12// all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
18// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20// OTHER DEALINGS IN THE SOFTWARE.
21//
22
23#include "api/util.hpp"
24#include "core/kernel.hpp"
25#include "core/event.hpp"
26
27using namespace clover;
28
29CLOVER_API cl_kernel
30clCreateKernel(cl_program d_prog, const char *name, cl_int *r_errcode) try {
31   auto &prog = obj(d_prog);
32
33   if (!name)
34      throw error(CL_INVALID_VALUE);
35
36   auto &sym = find(name_equals(name), prog.symbols());
37
38   ret_error(r_errcode, CL_SUCCESS);
39   return new kernel(prog, name, range(sym.args));
40
41} catch (std::out_of_range &) {
42   ret_error(r_errcode, CL_INVALID_KERNEL_NAME);
43   return NULL;
44
45} catch (error &e) {
46   ret_error(r_errcode, e);
47   return NULL;
48}
49
50CLOVER_API cl_int
51clCreateKernelsInProgram(cl_program d_prog, cl_uint count,
52                         cl_kernel *rd_kerns, cl_uint *r_count) try {
53   auto &prog = obj(d_prog);
54   auto &syms = prog.symbols();
55
56   if (rd_kerns && count < syms.size())
57      throw error(CL_INVALID_VALUE);
58
59   if (rd_kerns)
60      copy(map([&](const binary::symbol &sym) {
61               return desc(new kernel(prog,
62                                      std::string(sym.name.begin(),
63                                                  sym.name.end()),
64                                      range(sym.args)));
65            }, syms),
66         rd_kerns);
67
68   if (r_count)
69      *r_count = syms.size();
70
71   return CL_SUCCESS;
72
73} catch (error &e) {
74   return e.get();
75}
76
77CLOVER_API cl_int
78clRetainKernel(cl_kernel d_kern) try {
79   obj(d_kern).retain();
80   return CL_SUCCESS;
81
82} catch (error &e) {
83   return e.get();
84}
85
86CLOVER_API cl_int
87clReleaseKernel(cl_kernel d_kern) try {
88   if (obj(d_kern).release())
89      delete pobj(d_kern);
90
91   return CL_SUCCESS;
92
93} catch (error &e) {
94   return e.get();
95}
96
97CLOVER_API cl_int
98clSetKernelArg(cl_kernel d_kern, cl_uint idx, size_t size,
99               const void *value) try {
100   obj(d_kern).args().at(idx).set(size, value);
101   return CL_SUCCESS;
102
103} catch (std::out_of_range &) {
104   return CL_INVALID_ARG_INDEX;
105
106} catch (error &e) {
107   return e.get();
108}
109
110CLOVER_API cl_int
111clGetKernelInfo(cl_kernel d_kern, cl_kernel_info param,
112                size_t size, void *r_buf, size_t *r_size) try {
113   property_buffer buf { r_buf, size, r_size };
114   auto &kern = obj(d_kern);
115
116   switch (param) {
117   case CL_KERNEL_FUNCTION_NAME:
118      buf.as_string() = kern.name();
119      break;
120
121   case CL_KERNEL_NUM_ARGS:
122      buf.as_scalar<cl_uint>() = kern.args().size();
123      break;
124
125   case CL_KERNEL_REFERENCE_COUNT:
126      buf.as_scalar<cl_uint>() = kern.ref_count();
127      break;
128
129   case CL_KERNEL_CONTEXT:
130      buf.as_scalar<cl_context>() = desc(kern.program().context());
131      break;
132
133   case CL_KERNEL_PROGRAM:
134      buf.as_scalar<cl_program>() = desc(kern.program());
135      break;
136
137   case CL_KERNEL_ATTRIBUTES:
138      buf.as_string() = find(name_equals(kern.name()), kern.program().symbols()).attributes;
139      break;
140
141   default:
142      throw error(CL_INVALID_VALUE);
143   }
144
145   return CL_SUCCESS;
146
147} catch (error &e) {
148   return e.get();
149}
150
151CLOVER_API cl_int
152clGetKernelWorkGroupInfo(cl_kernel d_kern, cl_device_id d_dev,
153                         cl_kernel_work_group_info param,
154                         size_t size, void *r_buf, size_t *r_size) try {
155   property_buffer buf { r_buf, size, r_size };
156   auto &kern = obj(d_kern);
157   auto &dev = (d_dev ? *pobj(d_dev) : unique(kern.program().devices()));
158
159   if (!count(dev, kern.program().devices()))
160      throw error(CL_INVALID_DEVICE);
161
162   switch (param) {
163   case CL_KERNEL_WORK_GROUP_SIZE:
164      buf.as_scalar<size_t>() = dev.max_threads_per_block();
165      break;
166
167   case CL_KERNEL_COMPILE_WORK_GROUP_SIZE:
168      buf.as_vector<size_t>() = kern.required_block_size();
169      break;
170
171   case CL_KERNEL_LOCAL_MEM_SIZE:
172      buf.as_scalar<cl_ulong>() = kern.mem_local();
173      break;
174
175   case CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE:
176      buf.as_scalar<size_t>() = dev.subgroup_size();
177      break;
178
179   case CL_KERNEL_PRIVATE_MEM_SIZE:
180      buf.as_scalar<cl_ulong>() = kern.mem_private();
181      break;
182
183   default:
184      throw error(CL_INVALID_VALUE);
185   }
186
187   return CL_SUCCESS;
188
189} catch (error &e) {
190   return e.get();
191
192} catch (std::out_of_range &) {
193   return CL_INVALID_DEVICE;
194}
195
196CLOVER_API cl_int
197clGetKernelArgInfo(cl_kernel d_kern,
198                   cl_uint idx, cl_kernel_arg_info param,
199                   size_t size, void *r_buf, size_t *r_size) try {
200   property_buffer buf { r_buf, size, r_size };
201
202   auto info = obj(d_kern).args_infos().at(idx);
203
204   if (info.arg_name.empty())
205      return CL_KERNEL_ARG_INFO_NOT_AVAILABLE;
206
207   switch (param) {
208   case CL_KERNEL_ARG_ADDRESS_QUALIFIER:
209      buf.as_scalar<cl_kernel_arg_address_qualifier>() = info.address_qualifier;
210      break;
211
212   case CL_KERNEL_ARG_ACCESS_QUALIFIER:
213      buf.as_scalar<cl_kernel_arg_access_qualifier>() = info.access_qualifier;
214      break;
215
216   case CL_KERNEL_ARG_TYPE_NAME:
217      buf.as_string() = info.type_name;
218      break;
219
220   case CL_KERNEL_ARG_TYPE_QUALIFIER:
221      buf.as_scalar<cl_kernel_arg_type_qualifier>() = info.type_qualifier;
222      break;
223
224   case CL_KERNEL_ARG_NAME:
225      buf.as_string() = info.arg_name;
226      break;
227
228   default:
229      throw error(CL_INVALID_VALUE);
230   }
231
232   return CL_SUCCESS;
233
234} catch (std::out_of_range &) {
235   return CL_INVALID_ARG_INDEX;
236
237} catch (error &e) {
238   return e.get();
239}
240
241namespace {
242   ///
243   /// Common argument checking shared by kernel invocation commands.
244   ///
245   void
246   validate_common(const command_queue &q, kernel &kern,
247                   const ref_vector<event> &deps) {
248      if (kern.program().context() != q.context() ||
249          any_of([&](const event &ev) {
250                return ev.context() != q.context();
251             }, deps))
252         throw error(CL_INVALID_CONTEXT);
253
254      if (any_of([](kernel::argument &arg) {
255               return !arg.set();
256            }, kern.args()))
257         throw error(CL_INVALID_KERNEL_ARGS);
258
259      // If the command queue's device is not associated to the program, we get
260      // a binary, with no sections, which will also fail the following test.
261      auto &b = kern.program().build(q.device()).bin;
262      if (!any_of(type_equals(binary::section::text_executable), b.secs))
263         throw error(CL_INVALID_PROGRAM_EXECUTABLE);
264   }
265
266   std::vector<size_t>
267   validate_grid_size(const command_queue &q, cl_uint dims,
268                      const size_t *d_grid_size) {
269      auto grid_size = range(d_grid_size, dims);
270
271      if (dims < 1 || dims > q.device().max_block_size().size())
272         throw error(CL_INVALID_WORK_DIMENSION);
273
274      return grid_size;
275   }
276
277   std::vector<size_t>
278   validate_grid_offset(const command_queue &q, cl_uint dims,
279                        const size_t *d_grid_offset) {
280      if (d_grid_offset)
281         return range(d_grid_offset, dims);
282      else
283         return std::vector<size_t>(dims, 0);
284   }
285
286   std::vector<size_t>
287   validate_block_size(const command_queue &q, const kernel &kern,
288                       cl_uint dims, const size_t *d_grid_size,
289                       const size_t *d_block_size) {
290      auto grid_size = range(d_grid_size, dims);
291
292      if (d_block_size) {
293         auto block_size = range(d_block_size, dims);
294
295         if (any_of(is_zero(), block_size) ||
296             any_of(greater(), block_size, q.device().max_block_size()))
297            throw error(CL_INVALID_WORK_ITEM_SIZE);
298
299         if (any_of(modulus(), grid_size, block_size))
300            throw error(CL_INVALID_WORK_GROUP_SIZE);
301
302         if (fold(multiplies(), 1u, block_size) >
303             q.device().max_threads_per_block())
304            throw error(CL_INVALID_WORK_GROUP_SIZE);
305
306         return block_size;
307
308      } else {
309         return kern.optimal_block_size(q, grid_size);
310      }
311   }
312}
313
314CLOVER_API cl_int
315clEnqueueNDRangeKernel(cl_command_queue d_q, cl_kernel d_kern,
316                       cl_uint dims, const size_t *d_grid_offset,
317                       const size_t *d_grid_size, const size_t *d_block_size,
318                       cl_uint num_deps, const cl_event *d_deps,
319                       cl_event *rd_ev) try {
320   auto &q = obj(d_q);
321   auto &kern = obj(d_kern);
322   auto deps = objs<wait_list_tag>(d_deps, num_deps);
323   auto grid_size = validate_grid_size(q, dims, d_grid_size);
324   auto grid_offset = validate_grid_offset(q, dims, d_grid_offset);
325   auto block_size = validate_block_size(q, kern, dims,
326                                         d_grid_size, d_block_size);
327
328   validate_common(q, kern, deps);
329
330   auto hev = create<hard_event>(
331      q, CL_COMMAND_NDRANGE_KERNEL, deps,
332      [=, &kern, &q](event &) {
333         kern.launch(q, grid_offset, grid_size, block_size);
334      });
335
336   ret_object(rd_ev, hev);
337   return CL_SUCCESS;
338
339} catch (error &e) {
340   return e.get();
341}
342
343CLOVER_API cl_int
344clEnqueueTask(cl_command_queue d_q, cl_kernel d_kern,
345              cl_uint num_deps, const cl_event *d_deps,
346              cl_event *rd_ev) try {
347   auto &q = obj(d_q);
348   auto &kern = obj(d_kern);
349   auto deps = objs<wait_list_tag>(d_deps, num_deps);
350
351   validate_common(q, kern, deps);
352
353   auto hev = create<hard_event>(
354      q, CL_COMMAND_TASK, deps,
355      [=, &kern, &q](event &) {
356         kern.launch(q, { 0 }, { 1 }, { 1 });
357      });
358
359   ret_object(rd_ev, hev);
360   return CL_SUCCESS;
361
362} catch (error &e) {
363   return e.get();
364}
365
366CLOVER_API cl_int
367clEnqueueNativeKernel(cl_command_queue d_q, void (*func)(void *),
368                      void *args, size_t args_size,
369                      cl_uint num_mems, const cl_mem *d_mems,
370                      const void **mem_handles, cl_uint num_deps,
371                      const cl_event *d_deps, cl_event *rd_ev) {
372   return CL_INVALID_OPERATION;
373}
374
375CLOVER_API cl_int
376clSetKernelArgSVMPointer(cl_kernel d_kern,
377                         cl_uint arg_index,
378                         const void *arg_value) try {
379  if (!any_of(std::mem_fn(&device::svm_support), obj(d_kern).program().devices()))
380      return CL_INVALID_OPERATION;
381   obj(d_kern).args().at(arg_index).set_svm(arg_value);
382   return CL_SUCCESS;
383
384} catch (std::out_of_range &) {
385   return CL_INVALID_ARG_INDEX;
386
387} catch (error &e) {
388   return e.get();
389}
390
391CLOVER_API cl_int
392clSetKernelExecInfo(cl_kernel d_kern,
393                    cl_kernel_exec_info param_name,
394                    size_t param_value_size,
395                    const void *param_value) try {
396
397   if (!any_of(std::mem_fn(&device::svm_support), obj(d_kern).program().devices()))
398      return CL_INVALID_OPERATION;
399
400   auto &kern = obj(d_kern);
401
402   const bool has_system_svm = all_of(std::mem_fn(&device::has_system_svm),
403                                      kern.program().context().devices());
404
405   if (!param_value)
406      return CL_INVALID_VALUE;
407
408   switch (param_name) {
409   case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM:
410   case CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM_ARM: {
411      if (param_value_size != sizeof(cl_bool))
412         return CL_INVALID_VALUE;
413
414      cl_bool val = *static_cast<const cl_bool*>(param_value);
415      if (val == CL_TRUE && !has_system_svm)
416         return CL_INVALID_OPERATION;
417      else
418         return CL_SUCCESS;
419   }
420
421   case CL_KERNEL_EXEC_INFO_SVM_PTRS:
422   case CL_KERNEL_EXEC_INFO_SVM_PTRS_ARM:
423      if (has_system_svm)
424         return CL_SUCCESS;
425
426      CLOVER_NOT_SUPPORTED_UNTIL("2.0");
427      return CL_INVALID_VALUE;
428
429   default:
430      return CL_INVALID_VALUE;
431   }
432
433} catch (error &e) {
434   return e.get();
435}
436