1 // 2 // Copyright 2012 Francisco Jerez 3 // 4 // Permission is hereby granted, free of charge, to any person obtaining a 5 // copy of this software and associated documentation files (the "Software"), 6 // to deal in the Software without restriction, including without limitation 7 // the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 // and/or sell copies of the Software, and to permit persons to whom the 9 // Software is furnished to do so, subject to the following conditions: 10 // 11 // The above copyright notice and this permission notice shall be included in 12 // all copies or substantial portions of the Software. 13 // 14 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18 // OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20 // OTHER DEALINGS IN THE SOFTWARE. 21 // 22 23 #include <cstring> 24 25 #include "util/bitscan.h" 26 27 #include "api/dispatch.hpp" 28 #include "api/util.hpp" 29 #include "core/event.hpp" 30 #include "core/memory.hpp" 31 32 using namespace clover; 33 34 namespace { 35 typedef resource::vector vector_t; 36 37 vector_t vector(const size_t *p)38 vector(const size_t *p) { 39 if (!p) 40 throw error(CL_INVALID_VALUE); 41 return range(p, 3); 42 } 43 44 vector_t pitch(const vector_t ®ion, vector_t pitch)45 pitch(const vector_t ®ion, vector_t pitch) { 46 for (auto x : zip(tail(pitch), 47 map(multiplies(), region, pitch))) { 48 // The spec defines a value of zero as the natural pitch, 49 // i.e. the unaligned size of the previous dimension. 50 if (std::get<0>(x) == 0) 51 std::get<0>(x) = std::get<1>(x); 52 } 53 54 return pitch; 55 } 56 57 /// 58 /// Size of a region in bytes. 59 /// 60 size_t size(const vector_t &pitch, const vector_t ®ion)61 size(const vector_t &pitch, const vector_t ®ion) { 62 if (any_of(is_zero(), region)) 63 return 0; 64 else 65 return dot(pitch, region - vector_t{ 0, 1, 1 }); 66 } 67 68 /// 69 /// Common argument checking shared by memory transfer commands. 70 /// 71 void validate_common(command_queue &q, const ref_vector<event> &deps)72 validate_common(command_queue &q, 73 const ref_vector<event> &deps) { 74 if (any_of([&](const event &ev) { 75 return ev.context() != q.context(); 76 }, deps)) 77 throw error(CL_INVALID_CONTEXT); 78 } 79 80 /// 81 /// Common error checking for a buffer object argument. 82 /// 83 void validate_object(command_queue &q, buffer &mem, const vector_t &origin, const vector_t &pitch, const vector_t ®ion)84 validate_object(command_queue &q, buffer &mem, const vector_t &origin, 85 const vector_t &pitch, const vector_t ®ion) { 86 if (mem.context() != q.context()) 87 throw error(CL_INVALID_CONTEXT); 88 89 // The region must fit within the specified pitch, 90 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch))) 91 throw error(CL_INVALID_VALUE); 92 93 // ...and within the specified object. 94 if (dot(pitch, origin) + size(pitch, region) > mem.size()) 95 throw error(CL_INVALID_VALUE); 96 97 if (any_of(is_zero(), region)) 98 throw error(CL_INVALID_VALUE); 99 } 100 101 /// 102 /// Common error checking for an image argument. 103 /// 104 void validate_object(command_queue &q, image &img, const vector_t &orig, const vector_t ®ion)105 validate_object(command_queue &q, image &img, 106 const vector_t &orig, const vector_t ®ion) { 107 size_t height = img.type() == CL_MEM_OBJECT_IMAGE1D_ARRAY ? img.array_size() : img.height(); 108 size_t depth = img.type() == CL_MEM_OBJECT_IMAGE2D_ARRAY ? img.array_size() : img.depth(); 109 vector_t size = { img.width(), height, depth }; 110 const auto &dev = q.device(); 111 112 if (!dev.image_support()) 113 throw error(CL_INVALID_OPERATION); 114 115 if (img.context() != q.context()) 116 throw error(CL_INVALID_CONTEXT); 117 118 if (any_of(greater(), orig + region, size)) 119 throw error(CL_INVALID_VALUE); 120 121 if (any_of(is_zero(), region)) 122 throw error(CL_INVALID_VALUE); 123 124 switch (img.type()) { 125 case CL_MEM_OBJECT_IMAGE1D: { 126 const size_t max = dev.max_image_size(); 127 if (img.width() > max) 128 throw error(CL_INVALID_IMAGE_SIZE); 129 break; 130 } 131 case CL_MEM_OBJECT_IMAGE1D_ARRAY: { 132 const size_t max_size = dev.max_image_size(); 133 const size_t max_array = dev.max_image_array_number(); 134 if (img.width() > max_size || img.array_size() > max_array) 135 throw error(CL_INVALID_IMAGE_SIZE); 136 break; 137 } 138 case CL_MEM_OBJECT_IMAGE2D: { 139 const size_t max = dev.max_image_size(); 140 if (img.width() > max || img.height() > max) 141 throw error(CL_INVALID_IMAGE_SIZE); 142 break; 143 } 144 case CL_MEM_OBJECT_IMAGE2D_ARRAY: { 145 const size_t max_size = dev.max_image_size(); 146 const size_t max_array = dev.max_image_array_number(); 147 if (img.width() > max_size || img.height() > max_size || img.array_size() > max_array) 148 throw error(CL_INVALID_IMAGE_SIZE); 149 break; 150 } 151 case CL_MEM_OBJECT_IMAGE3D: { 152 const size_t max = dev.max_image_size_3d(); 153 if (img.width() > max || img.height() > max || img.depth() > max) 154 throw error(CL_INVALID_IMAGE_SIZE); 155 break; 156 } 157 // XXX: Implement missing checks once Clover supports more image types. 158 default: 159 throw error(CL_INVALID_IMAGE_SIZE); 160 } 161 } 162 163 /// 164 /// Common error checking for a host pointer argument. 165 /// 166 void validate_object(command_queue &q, const void *ptr, const vector_t &orig, const vector_t &pitch, const vector_t ®ion)167 validate_object(command_queue &q, const void *ptr, const vector_t &orig, 168 const vector_t &pitch, const vector_t ®ion) { 169 if (!ptr) 170 throw error(CL_INVALID_VALUE); 171 172 // The region must fit within the specified pitch. 173 if (any_of(greater(), map(multiplies(), pitch, region), tail(pitch))) 174 throw error(CL_INVALID_VALUE); 175 } 176 177 /// 178 /// Common argument checking for a copy between two buffer objects. 179 /// 180 void validate_copy(command_queue &q, buffer &dst_mem, const vector_t &dst_orig, const vector_t &dst_pitch, buffer &src_mem, const vector_t &src_orig, const vector_t &src_pitch, const vector_t ®ion)181 validate_copy(command_queue &q, buffer &dst_mem, 182 const vector_t &dst_orig, const vector_t &dst_pitch, 183 buffer &src_mem, 184 const vector_t &src_orig, const vector_t &src_pitch, 185 const vector_t ®ion) { 186 if (dst_mem == src_mem) { 187 auto dst_offset = dot(dst_pitch, dst_orig); 188 auto src_offset = dot(src_pitch, src_orig); 189 190 if (interval_overlaps()( 191 dst_offset, dst_offset + size(dst_pitch, region), 192 src_offset, src_offset + size(src_pitch, region))) 193 throw error(CL_MEM_COPY_OVERLAP); 194 } 195 } 196 197 /// 198 /// Common argument checking for a copy between two image objects. 199 /// 200 void validate_copy(command_queue &q, image &dst_img, const vector_t &dst_orig, image &src_img, const vector_t &src_orig, const vector_t ®ion)201 validate_copy(command_queue &q, 202 image &dst_img, const vector_t &dst_orig, 203 image &src_img, const vector_t &src_orig, 204 const vector_t ®ion) { 205 if (dst_img.format() != src_img.format()) 206 throw error(CL_IMAGE_FORMAT_MISMATCH); 207 208 if (dst_img == src_img) { 209 if (all_of(interval_overlaps(), 210 dst_orig, dst_orig + region, 211 src_orig, src_orig + region)) 212 throw error(CL_MEM_COPY_OVERLAP); 213 } 214 } 215 216 /// 217 /// Checks that the host access flags of the memory object are 218 /// within the allowed set \a flags. 219 /// 220 void validate_object_access(const memory_obj &mem, const cl_mem_flags flags)221 validate_object_access(const memory_obj &mem, const cl_mem_flags flags) { 222 if (mem.flags() & ~flags & 223 (CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_WRITE_ONLY | 224 CL_MEM_HOST_NO_ACCESS)) 225 throw error(CL_INVALID_OPERATION); 226 } 227 228 /// 229 /// Checks that the mapping flags are correct. 230 /// 231 void validate_map_flags(const memory_obj &mem, const cl_map_flags flags)232 validate_map_flags(const memory_obj &mem, const cl_map_flags flags) { 233 if ((flags & (CL_MAP_WRITE | CL_MAP_READ)) && 234 (flags & CL_MAP_WRITE_INVALIDATE_REGION)) 235 throw error(CL_INVALID_VALUE); 236 237 if (flags & CL_MAP_READ) 238 validate_object_access(mem, CL_MEM_HOST_READ_ONLY); 239 240 if (flags & (CL_MAP_WRITE | CL_MAP_WRITE_INVALIDATE_REGION)) 241 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY); 242 } 243 244 /// 245 /// Checks that the memory migration flags are correct. 246 /// 247 void validate_mem_migration_flags(const cl_mem_migration_flags flags)248 validate_mem_migration_flags(const cl_mem_migration_flags flags) { 249 const cl_mem_migration_flags valid = 250 CL_MIGRATE_MEM_OBJECT_HOST | 251 CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED; 252 253 if (flags & ~valid) 254 throw error(CL_INVALID_VALUE); 255 } 256 257 /// 258 /// Class that encapsulates the task of mapping an object of type 259 /// \a T. The return value of get() should be implicitly 260 /// convertible to \a void *. 261 /// 262 template<typename T> 263 struct _map; 264 265 template<> 266 struct _map<image*> { _map__anon8585::_map267 _map(command_queue &q, image *img, cl_map_flags flags, 268 vector_t offset, vector_t pitch, vector_t region) : 269 map(q, img->resource_in(q), flags, true, offset, region), 270 pitch(map.pitch()) 271 { } 272 273 template<typename T> operator T *__anon8585::_map274 operator T *() const { 275 return static_cast<T *>(map); 276 } 277 278 mapping map; 279 vector_t pitch; 280 }; 281 282 template<> 283 struct _map<buffer*> { _map__anon8585::_map284 _map(command_queue &q, buffer *mem, cl_map_flags flags, 285 vector_t offset, vector_t pitch, vector_t region) : 286 map(q, mem->resource_in(q), flags, true, 287 {{ dot(pitch, offset) }}, {{ size(pitch, region) }}), 288 pitch(pitch) 289 { } 290 291 template<typename T> operator T *__anon8585::_map292 operator T *() const { 293 return static_cast<T *>(map); 294 } 295 296 mapping map; 297 vector_t pitch; 298 }; 299 300 template<typename P> 301 struct _map<P *> { _map__anon8585::_map302 _map(command_queue &q, P *ptr, cl_map_flags flags, 303 vector_t offset, vector_t pitch, vector_t region) : 304 ptr((P *)((char *)ptr + dot(pitch, offset))), pitch(pitch) 305 { } 306 307 template<typename T> operator T *__anon8585::_map308 operator T *() const { 309 return static_cast<T *>(ptr); 310 } 311 312 P *ptr; 313 vector_t pitch; 314 }; 315 316 /// 317 /// Software copy from \a src_obj to \a dst_obj. They can be 318 /// either pointers or memory objects. 319 /// 320 template<typename T, typename S> 321 std::function<void (event &)> soft_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch, S src_obj, const vector_t &src_orig, const vector_t &src_pitch, const vector_t ®ion)322 soft_copy_op(command_queue &q, 323 T dst_obj, const vector_t &dst_orig, const vector_t &dst_pitch, 324 S src_obj, const vector_t &src_orig, const vector_t &src_pitch, 325 const vector_t ®ion) { 326 return [=, &q](event &) { 327 _map<T> dst = { q, dst_obj, CL_MAP_WRITE, 328 dst_orig, dst_pitch, region }; 329 _map<S> src = { q, src_obj, CL_MAP_READ, 330 src_orig, src_pitch, region }; 331 assert(src.pitch[0] == dst.pitch[0]); 332 vector_t v = {}; 333 334 for (v[2] = 0; v[2] < region[2]; ++v[2]) { 335 for (v[1] = 0; v[1] < region[1]; ++v[1]) { 336 std::memcpy( 337 static_cast<char *>(dst) + dot(dst.pitch, v), 338 static_cast<const char *>(src) + dot(src.pitch, v), 339 src.pitch[0] * region[0]); 340 } 341 } 342 }; 343 } 344 345 /// 346 /// Hardware copy from \a src_obj to \a dst_obj. 347 /// 348 template<typename T, typename S> 349 std::function<void (event &)> hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig, S src_obj, const vector_t &src_orig, const vector_t ®ion)350 hard_copy_op(command_queue &q, T dst_obj, const vector_t &dst_orig, 351 S src_obj, const vector_t &src_orig, const vector_t ®ion) { 352 return [=, &q](event &) { 353 dst_obj->resource_in(q).copy(q, dst_orig, region, 354 src_obj->resource_in(q), src_orig); 355 }; 356 } 357 } 358 359 CLOVER_API cl_int 360 clEnqueueReadBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 361 size_t offset, size_t size, void *ptr, 362 cl_uint num_deps, const cl_event *d_deps, 363 cl_event *rd_ev) try { 364 auto &q = obj(d_q); 365 auto &mem = obj<buffer>(d_mem); 366 auto deps = objs<wait_list_tag>(d_deps, num_deps); 367 vector_t region = { size, 1, 1 }; 368 vector_t obj_origin = { offset }; 369 auto obj_pitch = pitch(region, {{ 1 }}); 370 371 validate_common(q, deps); 372 validate_object(q, ptr, {}, obj_pitch, region); 373 validate_object(q, mem, obj_origin, obj_pitch, region); 374 validate_object_access(mem, CL_MEM_HOST_READ_ONLY); 375 376 auto hev = create<hard_event>( 377 q, CL_COMMAND_READ_BUFFER, deps, 378 soft_copy_op(q, ptr, {}, obj_pitch, 379 &mem, obj_origin, obj_pitch, 380 region)); 381 382 if (blocking) 383 hev().wait_signalled(); 384 385 ret_object(rd_ev, hev); 386 return CL_SUCCESS; 387 388 } catch (error &e) { 389 return e.get(); 390 } 391 392 CLOVER_API cl_int 393 clEnqueueWriteBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 394 size_t offset, size_t size, const void *ptr, 395 cl_uint num_deps, const cl_event *d_deps, 396 cl_event *rd_ev) try { 397 auto &q = obj(d_q); 398 auto &mem = obj<buffer>(d_mem); 399 auto deps = objs<wait_list_tag>(d_deps, num_deps); 400 vector_t region = { size, 1, 1 }; 401 vector_t obj_origin = { offset }; 402 auto obj_pitch = pitch(region, {{ 1 }}); 403 404 validate_common(q, deps); 405 validate_object(q, mem, obj_origin, obj_pitch, region); 406 validate_object(q, ptr, {}, obj_pitch, region); 407 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY); 408 409 auto hev = create<hard_event>( 410 q, CL_COMMAND_WRITE_BUFFER, deps, 411 soft_copy_op(q, &mem, obj_origin, obj_pitch, 412 ptr, {}, obj_pitch, 413 region)); 414 415 if (blocking) 416 hev().wait_signalled(); 417 418 ret_object(rd_ev, hev); 419 return CL_SUCCESS; 420 421 } catch (error &e) { 422 return e.get(); 423 } 424 425 CLOVER_API cl_int 426 clEnqueueReadBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 427 const size_t *p_obj_origin, 428 const size_t *p_host_origin, 429 const size_t *p_region, 430 size_t obj_row_pitch, size_t obj_slice_pitch, 431 size_t host_row_pitch, size_t host_slice_pitch, 432 void *ptr, 433 cl_uint num_deps, const cl_event *d_deps, 434 cl_event *rd_ev) try { 435 auto &q = obj(d_q); 436 auto &mem = obj<buffer>(d_mem); 437 auto deps = objs<wait_list_tag>(d_deps, num_deps); 438 auto region = vector(p_region); 439 auto obj_origin = vector(p_obj_origin); 440 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }}); 441 auto host_origin = vector(p_host_origin); 442 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }}); 443 444 validate_common(q, deps); 445 validate_object(q, ptr, host_origin, host_pitch, region); 446 validate_object(q, mem, obj_origin, obj_pitch, region); 447 validate_object_access(mem, CL_MEM_HOST_READ_ONLY); 448 449 auto hev = create<hard_event>( 450 q, CL_COMMAND_READ_BUFFER_RECT, deps, 451 soft_copy_op(q, ptr, host_origin, host_pitch, 452 &mem, obj_origin, obj_pitch, 453 region)); 454 455 if (blocking) 456 hev().wait_signalled(); 457 458 ret_object(rd_ev, hev); 459 return CL_SUCCESS; 460 461 } catch (error &e) { 462 return e.get(); 463 } 464 465 CLOVER_API cl_int 466 clEnqueueWriteBufferRect(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 467 const size_t *p_obj_origin, 468 const size_t *p_host_origin, 469 const size_t *p_region, 470 size_t obj_row_pitch, size_t obj_slice_pitch, 471 size_t host_row_pitch, size_t host_slice_pitch, 472 const void *ptr, 473 cl_uint num_deps, const cl_event *d_deps, 474 cl_event *rd_ev) try { 475 auto &q = obj(d_q); 476 auto &mem = obj<buffer>(d_mem); 477 auto deps = objs<wait_list_tag>(d_deps, num_deps); 478 auto region = vector(p_region); 479 auto obj_origin = vector(p_obj_origin); 480 auto obj_pitch = pitch(region, {{ 1, obj_row_pitch, obj_slice_pitch }}); 481 auto host_origin = vector(p_host_origin); 482 auto host_pitch = pitch(region, {{ 1, host_row_pitch, host_slice_pitch }}); 483 484 validate_common(q, deps); 485 validate_object(q, mem, obj_origin, obj_pitch, region); 486 validate_object(q, ptr, host_origin, host_pitch, region); 487 validate_object_access(mem, CL_MEM_HOST_WRITE_ONLY); 488 489 auto hev = create<hard_event>( 490 q, CL_COMMAND_WRITE_BUFFER_RECT, deps, 491 soft_copy_op(q, &mem, obj_origin, obj_pitch, 492 ptr, host_origin, host_pitch, 493 region)); 494 495 if (blocking) 496 hev().wait_signalled(); 497 498 ret_object(rd_ev, hev); 499 return CL_SUCCESS; 500 501 } catch (error &e) { 502 return e.get(); 503 } 504 505 CLOVER_API cl_int 506 clEnqueueFillBuffer(cl_command_queue d_queue, cl_mem d_mem, 507 const void *pattern, size_t pattern_size, 508 size_t offset, size_t size, 509 cl_uint num_deps, const cl_event *d_deps, 510 cl_event *rd_ev) try { 511 auto &q = obj(d_queue); 512 auto &mem = obj<buffer>(d_mem); 513 auto deps = objs<wait_list_tag>(d_deps, num_deps); 514 vector_t region = { size, 1, 1 }; 515 vector_t origin = { offset }; 516 auto dst_pitch = pitch(region, {{ 1 }}); 517 518 validate_common(q, deps); 519 validate_object(q, mem, origin, dst_pitch, region); 520 521 if (!pattern) 522 return CL_INVALID_VALUE; 523 524 if (!util_is_power_of_two_nonzero(pattern_size) || 525 pattern_size > 128 || size % pattern_size 526 || offset % pattern_size) { 527 return CL_INVALID_VALUE; 528 } 529 530 auto sub = dynamic_cast<sub_buffer *>(&mem); 531 if (sub && sub->offset() % q.device().mem_base_addr_align()) { 532 return CL_MISALIGNED_SUB_BUFFER_OFFSET; 533 } 534 535 std::string data = std::string((char *)pattern, pattern_size); 536 auto hev = create<hard_event>( 537 q, CL_COMMAND_FILL_BUFFER, deps, 538 [=, &q, &mem](event &) { 539 mem.resource_in(q).clear(q, origin, region, data); 540 }); 541 542 ret_object(rd_ev, hev); 543 return CL_SUCCESS; 544 545 } catch (error &e) { 546 return e.get(); 547 } 548 549 CLOVER_API cl_int 550 clEnqueueCopyBuffer(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem, 551 size_t src_offset, size_t dst_offset, size_t size, 552 cl_uint num_deps, const cl_event *d_deps, 553 cl_event *rd_ev) try { 554 auto &q = obj(d_q); 555 auto &src_mem = obj<buffer>(d_src_mem); 556 auto &dst_mem = obj<buffer>(d_dst_mem); 557 auto deps = objs<wait_list_tag>(d_deps, num_deps); 558 vector_t region = { size, 1, 1 }; 559 vector_t dst_origin = { dst_offset }; 560 auto dst_pitch = pitch(region, {{ 1 }}); 561 vector_t src_origin = { src_offset }; 562 auto src_pitch = pitch(region, {{ 1 }}); 563 564 validate_common(q, deps); 565 validate_object(q, dst_mem, dst_origin, dst_pitch, region); 566 validate_object(q, src_mem, src_origin, src_pitch, region); 567 validate_copy(q, dst_mem, dst_origin, dst_pitch, 568 src_mem, src_origin, src_pitch, region); 569 570 auto hev = create<hard_event>( 571 q, CL_COMMAND_COPY_BUFFER, deps, 572 hard_copy_op(q, &dst_mem, dst_origin, 573 &src_mem, src_origin, region)); 574 575 ret_object(rd_ev, hev); 576 return CL_SUCCESS; 577 578 } catch (error &e) { 579 return e.get(); 580 } 581 582 CLOVER_API cl_int 583 clEnqueueCopyBufferRect(cl_command_queue d_q, cl_mem d_src_mem, 584 cl_mem d_dst_mem, 585 const size_t *p_src_origin, const size_t *p_dst_origin, 586 const size_t *p_region, 587 size_t src_row_pitch, size_t src_slice_pitch, 588 size_t dst_row_pitch, size_t dst_slice_pitch, 589 cl_uint num_deps, const cl_event *d_deps, 590 cl_event *rd_ev) try { 591 auto &q = obj(d_q); 592 auto &src_mem = obj<buffer>(d_src_mem); 593 auto &dst_mem = obj<buffer>(d_dst_mem); 594 auto deps = objs<wait_list_tag>(d_deps, num_deps); 595 auto region = vector(p_region); 596 auto dst_origin = vector(p_dst_origin); 597 auto dst_pitch = pitch(region, {{ 1, dst_row_pitch, dst_slice_pitch }}); 598 auto src_origin = vector(p_src_origin); 599 auto src_pitch = pitch(region, {{ 1, src_row_pitch, src_slice_pitch }}); 600 601 validate_common(q, deps); 602 validate_object(q, dst_mem, dst_origin, dst_pitch, region); 603 validate_object(q, src_mem, src_origin, src_pitch, region); 604 validate_copy(q, dst_mem, dst_origin, dst_pitch, 605 src_mem, src_origin, src_pitch, region); 606 607 auto hev = create<hard_event>( 608 q, CL_COMMAND_COPY_BUFFER_RECT, deps, 609 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch, 610 &src_mem, src_origin, src_pitch, 611 region)); 612 613 ret_object(rd_ev, hev); 614 return CL_SUCCESS; 615 616 } catch (error &e) { 617 return e.get(); 618 } 619 620 CLOVER_API cl_int 621 clEnqueueReadImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 622 const size_t *p_origin, const size_t *p_region, 623 size_t row_pitch, size_t slice_pitch, void *ptr, 624 cl_uint num_deps, const cl_event *d_deps, 625 cl_event *rd_ev) try { 626 auto &q = obj(d_q); 627 auto &img = obj<image>(d_mem); 628 auto deps = objs<wait_list_tag>(d_deps, num_deps); 629 auto region = vector(p_region); 630 auto dst_pitch = pitch(region, {{ img.pixel_size(), 631 row_pitch, slice_pitch }}); 632 auto src_origin = vector(p_origin); 633 auto src_pitch = pitch(region, {{ img.pixel_size(), 634 img.row_pitch(), img.slice_pitch() }}); 635 636 validate_common(q, deps); 637 validate_object(q, ptr, {}, dst_pitch, region); 638 validate_object(q, img, src_origin, region); 639 validate_object_access(img, CL_MEM_HOST_READ_ONLY); 640 641 auto hev = create<hard_event>( 642 q, CL_COMMAND_READ_IMAGE, deps, 643 soft_copy_op(q, ptr, {}, dst_pitch, 644 &img, src_origin, src_pitch, 645 region)); 646 647 if (blocking) 648 hev().wait_signalled(); 649 650 ret_object(rd_ev, hev); 651 return CL_SUCCESS; 652 653 } catch (error &e) { 654 return e.get(); 655 } 656 657 CLOVER_API cl_int 658 clEnqueueWriteImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 659 const size_t *p_origin, const size_t *p_region, 660 size_t row_pitch, size_t slice_pitch, const void *ptr, 661 cl_uint num_deps, const cl_event *d_deps, 662 cl_event *rd_ev) try { 663 auto &q = obj(d_q); 664 auto &img = obj<image>(d_mem); 665 auto deps = objs<wait_list_tag>(d_deps, num_deps); 666 auto region = vector(p_region); 667 auto dst_origin = vector(p_origin); 668 auto dst_pitch = pitch(region, {{ img.pixel_size(), 669 img.row_pitch(), img.slice_pitch() }}); 670 auto src_pitch = pitch(region, {{ img.pixel_size(), 671 row_pitch, slice_pitch }}); 672 673 validate_common(q, deps); 674 validate_object(q, img, dst_origin, region); 675 validate_object(q, ptr, {}, src_pitch, region); 676 validate_object_access(img, CL_MEM_HOST_WRITE_ONLY); 677 678 auto hev = create<hard_event>( 679 q, CL_COMMAND_WRITE_IMAGE, deps, 680 soft_copy_op(q, &img, dst_origin, dst_pitch, 681 ptr, {}, src_pitch, 682 region)); 683 684 if (blocking) 685 hev().wait_signalled(); 686 687 ret_object(rd_ev, hev); 688 return CL_SUCCESS; 689 690 } catch (error &e) { 691 return e.get(); 692 } 693 694 CLOVER_API cl_int 695 clEnqueueFillImage(cl_command_queue d_queue, cl_mem d_mem, 696 const void *fill_color, 697 const size_t *p_origin, const size_t *p_region, 698 cl_uint num_deps, const cl_event *d_deps, 699 cl_event *rd_ev) try { 700 auto &q = obj(d_queue); 701 auto &img = obj<image>(d_mem); 702 auto deps = objs<wait_list_tag>(d_deps, num_deps); 703 auto origin = vector(p_origin); 704 auto region = vector(p_region); 705 706 validate_common(q, deps); 707 validate_object(q, img, origin, region); 708 709 if (!fill_color) 710 return CL_INVALID_VALUE; 711 712 std::string data = std::string((char *)fill_color, sizeof(cl_uint4)); 713 auto hev = create<hard_event>( 714 q, CL_COMMAND_FILL_IMAGE, deps, 715 [=, &q, &img](event &) { 716 img.resource_in(q).clear(q, origin, region, data); 717 }); 718 719 ret_object(rd_ev, hev); 720 return CL_SUCCESS; 721 722 } catch (error &e) { 723 return e.get(); 724 } 725 726 CLOVER_API cl_int 727 clEnqueueCopyImage(cl_command_queue d_q, cl_mem d_src_mem, cl_mem d_dst_mem, 728 const size_t *p_src_origin, const size_t *p_dst_origin, 729 const size_t *p_region, 730 cl_uint num_deps, const cl_event *d_deps, 731 cl_event *rd_ev) try { 732 auto &q = obj(d_q); 733 auto &src_img = obj<image>(d_src_mem); 734 auto &dst_img = obj<image>(d_dst_mem); 735 auto deps = objs<wait_list_tag>(d_deps, num_deps); 736 auto region = vector(p_region); 737 auto dst_origin = vector(p_dst_origin); 738 auto src_origin = vector(p_src_origin); 739 740 validate_common(q, deps); 741 validate_object(q, dst_img, dst_origin, region); 742 validate_object(q, src_img, src_origin, region); 743 validate_copy(q, dst_img, dst_origin, src_img, src_origin, region); 744 745 auto hev = create<hard_event>( 746 q, CL_COMMAND_COPY_IMAGE, deps, 747 hard_copy_op(q, &dst_img, dst_origin, 748 &src_img, src_origin, 749 region)); 750 751 ret_object(rd_ev, hev); 752 return CL_SUCCESS; 753 754 } catch (error &e) { 755 return e.get(); 756 } 757 758 CLOVER_API cl_int 759 clEnqueueCopyImageToBuffer(cl_command_queue d_q, 760 cl_mem d_src_mem, cl_mem d_dst_mem, 761 const size_t *p_src_origin, const size_t *p_region, 762 size_t dst_offset, 763 cl_uint num_deps, const cl_event *d_deps, 764 cl_event *rd_ev) try { 765 auto &q = obj(d_q); 766 auto &src_img = obj<image>(d_src_mem); 767 auto &dst_mem = obj<buffer>(d_dst_mem); 768 auto deps = objs<wait_list_tag>(d_deps, num_deps); 769 auto region = vector(p_region); 770 vector_t dst_origin = { dst_offset }; 771 auto dst_pitch = pitch(region, {{ src_img.pixel_size() }}); 772 auto src_origin = vector(p_src_origin); 773 auto src_pitch = pitch(region, {{ src_img.pixel_size(), 774 src_img.row_pitch(), 775 src_img.slice_pitch() }}); 776 777 validate_common(q, deps); 778 validate_object(q, dst_mem, dst_origin, dst_pitch, region); 779 validate_object(q, src_img, src_origin, region); 780 781 auto hev = create<hard_event>( 782 q, CL_COMMAND_COPY_IMAGE_TO_BUFFER, deps, 783 soft_copy_op(q, &dst_mem, dst_origin, dst_pitch, 784 &src_img, src_origin, src_pitch, 785 region)); 786 787 ret_object(rd_ev, hev); 788 return CL_SUCCESS; 789 790 } catch (error &e) { 791 return e.get(); 792 } 793 794 CLOVER_API cl_int 795 clEnqueueCopyBufferToImage(cl_command_queue d_q, 796 cl_mem d_src_mem, cl_mem d_dst_mem, 797 size_t src_offset, 798 const size_t *p_dst_origin, const size_t *p_region, 799 cl_uint num_deps, const cl_event *d_deps, 800 cl_event *rd_ev) try { 801 auto &q = obj(d_q); 802 auto &src_mem = obj<buffer>(d_src_mem); 803 auto &dst_img = obj<image>(d_dst_mem); 804 auto deps = objs<wait_list_tag>(d_deps, num_deps); 805 auto region = vector(p_region); 806 auto dst_origin = vector(p_dst_origin); 807 auto dst_pitch = pitch(region, {{ dst_img.pixel_size(), 808 dst_img.row_pitch(), 809 dst_img.slice_pitch() }}); 810 vector_t src_origin = { src_offset }; 811 auto src_pitch = pitch(region, {{ dst_img.pixel_size() }}); 812 813 validate_common(q, deps); 814 validate_object(q, dst_img, dst_origin, region); 815 validate_object(q, src_mem, src_origin, src_pitch, region); 816 817 auto hev = create<hard_event>( 818 q, CL_COMMAND_COPY_BUFFER_TO_IMAGE, deps, 819 soft_copy_op(q, &dst_img, dst_origin, dst_pitch, 820 &src_mem, src_origin, src_pitch, 821 region)); 822 823 ret_object(rd_ev, hev); 824 return CL_SUCCESS; 825 826 } catch (error &e) { 827 return e.get(); 828 } 829 830 CLOVER_API void * 831 clEnqueueMapBuffer(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 832 cl_map_flags flags, size_t offset, size_t size, 833 cl_uint num_deps, const cl_event *d_deps, 834 cl_event *rd_ev, cl_int *r_errcode) try { 835 auto &q = obj(d_q); 836 auto &mem = obj<buffer>(d_mem); 837 auto deps = objs<wait_list_tag>(d_deps, num_deps); 838 vector_t region = { size, 1, 1 }; 839 vector_t obj_origin = { offset }; 840 auto obj_pitch = pitch(region, {{ 1 }}); 841 842 validate_common(q, deps); 843 validate_object(q, mem, obj_origin, obj_pitch, region); 844 validate_map_flags(mem, flags); 845 846 auto *map = mem.resource_in(q).add_map(q, flags, blocking, obj_origin, region); 847 848 auto hev = create<hard_event>(q, CL_COMMAND_MAP_BUFFER, deps); 849 if (blocking) 850 hev().wait_signalled(); 851 852 ret_object(rd_ev, hev); 853 ret_error(r_errcode, CL_SUCCESS); 854 return *map; 855 856 } catch (error &e) { 857 ret_error(r_errcode, e); 858 return NULL; 859 } 860 861 CLOVER_API void * 862 clEnqueueMapImage(cl_command_queue d_q, cl_mem d_mem, cl_bool blocking, 863 cl_map_flags flags, 864 const size_t *p_origin, const size_t *p_region, 865 size_t *row_pitch, size_t *slice_pitch, 866 cl_uint num_deps, const cl_event *d_deps, 867 cl_event *rd_ev, cl_int *r_errcode) try { 868 auto &q = obj(d_q); 869 auto &img = obj<image>(d_mem); 870 auto deps = objs<wait_list_tag>(d_deps, num_deps); 871 auto region = vector(p_region); 872 auto origin = vector(p_origin); 873 874 validate_common(q, deps); 875 validate_object(q, img, origin, region); 876 validate_map_flags(img, flags); 877 878 if (!row_pitch) 879 throw error(CL_INVALID_VALUE); 880 881 if ((img.slice_pitch() || img.array_size()) && !slice_pitch) 882 throw error(CL_INVALID_VALUE); 883 884 auto *map = img.resource_in(q).add_map(q, flags, blocking, origin, region); 885 *row_pitch = map->pitch()[1]; 886 if (slice_pitch) 887 *slice_pitch = map->pitch()[2]; 888 889 auto hev = create<hard_event>(q, CL_COMMAND_MAP_IMAGE, deps); 890 if (blocking) 891 hev().wait_signalled(); 892 893 ret_object(rd_ev, hev); 894 ret_error(r_errcode, CL_SUCCESS); 895 return *map; 896 897 } catch (error &e) { 898 ret_error(r_errcode, e); 899 return NULL; 900 } 901 902 CLOVER_API cl_int 903 clEnqueueUnmapMemObject(cl_command_queue d_q, cl_mem d_mem, void *ptr, 904 cl_uint num_deps, const cl_event *d_deps, 905 cl_event *rd_ev) try { 906 auto &q = obj(d_q); 907 auto &mem = obj(d_mem); 908 auto deps = objs<wait_list_tag>(d_deps, num_deps); 909 910 validate_common(q, deps); 911 912 auto hev = create<hard_event>( 913 q, CL_COMMAND_UNMAP_MEM_OBJECT, deps, 914 [=, &q, &mem](event &) { 915 mem.resource_in(q).del_map(ptr); 916 }); 917 918 ret_object(rd_ev, hev); 919 return CL_SUCCESS; 920 921 } catch (error &e) { 922 return e.get(); 923 } 924 925 CLOVER_API cl_int 926 clEnqueueMigrateMemObjects(cl_command_queue d_q, 927 cl_uint num_mems, 928 const cl_mem *d_mems, 929 cl_mem_migration_flags flags, 930 cl_uint num_deps, 931 const cl_event *d_deps, 932 cl_event *rd_ev) try { 933 auto &q = obj(d_q); 934 auto mems = objs<memory_obj>(d_mems, num_mems); 935 auto deps = objs<wait_list_tag>(d_deps, num_deps); 936 937 validate_common(q, deps); 938 validate_mem_migration_flags(flags); 939 940 if (any_of([&](const memory_obj &m) { 941 return m.context() != q.context(); 942 }, mems)) 943 throw error(CL_INVALID_CONTEXT); 944 945 auto hev = create<hard_event>( 946 q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps, 947 [=, &q](event &) { 948 for (auto &mem: mems) { 949 if (flags & CL_MIGRATE_MEM_OBJECT_HOST) { 950 if ((flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED)) 951 mem.resource_out(q); 952 953 // For flags == CL_MIGRATE_MEM_OBJECT_HOST only to be 954 // efficient we would need cl*ReadBuffer* to implement 955 // reading from host memory. 956 957 } else { 958 if (flags & CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED) 959 mem.resource_undef(q); 960 else 961 mem.resource_in(q); 962 } 963 } 964 }); 965 966 ret_object(rd_ev, hev); 967 return CL_SUCCESS;; 968 969 } catch (error &e) { 970 return e.get(); 971 } 972 973 cl_int 974 clover::EnqueueSVMFree(cl_command_queue d_q, 975 cl_uint num_svm_pointers, 976 void *svm_pointers[], 977 void (CL_CALLBACK *pfn_free_func) ( 978 cl_command_queue queue, cl_uint num_svm_pointers, 979 void *svm_pointers[], void *user_data), 980 void *user_data, 981 cl_uint num_events_in_wait_list, 982 const cl_event *event_wait_list, 983 cl_event *event, 984 cl_int cmd) try { 985 986 if (bool(num_svm_pointers) != bool(svm_pointers)) 987 return CL_INVALID_VALUE; 988 989 auto &q = obj(d_q); 990 991 if (!q.device().svm_support()) 992 return CL_INVALID_OPERATION; 993 994 bool can_emulate = q.device().has_system_svm(); 995 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 996 997 validate_common(q, deps); 998 999 std::vector<void *> svm_pointers_cpy(svm_pointers, 1000 svm_pointers + num_svm_pointers); 1001 if (!pfn_free_func) { 1002 if (!can_emulate) { 1003 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 1004 return CL_INVALID_VALUE; 1005 } 1006 pfn_free_func = [](cl_command_queue d_q, cl_uint num_svm_pointers, 1007 void *svm_pointers[], void *) { 1008 clover::context &ctx = obj(d_q).context(); 1009 for (void *p : range(svm_pointers, num_svm_pointers)) { 1010 ctx.remove_svm_allocation(p); 1011 free(p); 1012 } 1013 }; 1014 } 1015 1016 auto hev = create<hard_event>(q, cmd, deps, 1017 [=](clover::event &) mutable { 1018 pfn_free_func(d_q, num_svm_pointers, svm_pointers_cpy.data(), 1019 user_data); 1020 }); 1021 1022 ret_object(event, hev); 1023 return CL_SUCCESS; 1024 1025 } catch (error &e) { 1026 return e.get(); 1027 } 1028 1029 CLOVER_API cl_int 1030 clEnqueueSVMFree(cl_command_queue d_q, 1031 cl_uint num_svm_pointers, 1032 void *svm_pointers[], 1033 void (CL_CALLBACK *pfn_free_func) ( 1034 cl_command_queue queue, cl_uint num_svm_pointers, 1035 void *svm_pointers[], void *user_data), 1036 void *user_data, 1037 cl_uint num_events_in_wait_list, 1038 const cl_event *event_wait_list, 1039 cl_event *event) { 1040 1041 return EnqueueSVMFree(d_q, num_svm_pointers, svm_pointers, 1042 pfn_free_func, user_data, num_events_in_wait_list, 1043 event_wait_list, event, CL_COMMAND_SVM_FREE); 1044 } 1045 1046 cl_int 1047 clover::EnqueueSVMMemcpy(cl_command_queue d_q, 1048 cl_bool blocking_copy, 1049 void *dst_ptr, 1050 const void *src_ptr, 1051 size_t size, 1052 cl_uint num_events_in_wait_list, 1053 const cl_event *event_wait_list, 1054 cl_event *event, 1055 cl_int cmd) try { 1056 auto &q = obj(d_q); 1057 1058 if (!q.device().svm_support()) 1059 return CL_INVALID_OPERATION; 1060 1061 if (dst_ptr == nullptr || src_ptr == nullptr) 1062 return CL_INVALID_VALUE; 1063 1064 if (static_cast<size_t>(abs(reinterpret_cast<ptrdiff_t>(dst_ptr) - 1065 reinterpret_cast<ptrdiff_t>(src_ptr))) < size) 1066 return CL_MEM_COPY_OVERLAP; 1067 1068 1069 bool can_emulate = q.device().has_system_svm(); 1070 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 1071 1072 validate_common(q, deps); 1073 1074 if (can_emulate) { 1075 auto hev = create<hard_event>(q, cmd, deps, 1076 [=](clover::event &) { 1077 memcpy(dst_ptr, src_ptr, size); 1078 }); 1079 1080 if (blocking_copy) 1081 hev().wait(); 1082 ret_object(event, hev); 1083 return CL_SUCCESS; 1084 } 1085 1086 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 1087 return CL_INVALID_VALUE; 1088 1089 } catch (error &e) { 1090 return e.get(); 1091 } 1092 1093 CLOVER_API cl_int 1094 clEnqueueSVMMemcpy(cl_command_queue d_q, 1095 cl_bool blocking_copy, 1096 void *dst_ptr, 1097 const void *src_ptr, 1098 size_t size, 1099 cl_uint num_events_in_wait_list, 1100 const cl_event *event_wait_list, 1101 cl_event *event) { 1102 1103 return EnqueueSVMMemcpy(d_q, blocking_copy, dst_ptr, src_ptr, 1104 size, num_events_in_wait_list, event_wait_list, 1105 event, CL_COMMAND_SVM_MEMCPY); 1106 } 1107 1108 cl_int 1109 clover::EnqueueSVMMemFill(cl_command_queue d_q, 1110 void *svm_ptr, 1111 const void *pattern, 1112 size_t pattern_size, 1113 size_t size, 1114 cl_uint num_events_in_wait_list, 1115 const cl_event *event_wait_list, 1116 cl_event *event, 1117 cl_int cmd) try { 1118 auto &q = obj(d_q); 1119 1120 if (!q.device().svm_support()) 1121 return CL_INVALID_OPERATION; 1122 1123 if (svm_ptr == nullptr || pattern == nullptr || 1124 !util_is_power_of_two_nonzero(pattern_size) || 1125 pattern_size > 128 || 1126 !ptr_is_aligned(svm_ptr, pattern_size) || 1127 size % pattern_size) 1128 return CL_INVALID_VALUE; 1129 1130 bool can_emulate = q.device().has_system_svm(); 1131 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 1132 1133 validate_common(q, deps); 1134 1135 if (can_emulate) { 1136 auto hev = create<hard_event>(q, cmd, deps, 1137 [=](clover::event &) { 1138 void *ptr = svm_ptr; 1139 for (size_t s = size; s; s -= pattern_size) { 1140 memcpy(ptr, pattern, pattern_size); 1141 ptr = static_cast<uint8_t*>(ptr) + pattern_size; 1142 } 1143 }); 1144 1145 ret_object(event, hev); 1146 return CL_SUCCESS; 1147 } 1148 1149 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 1150 return CL_INVALID_VALUE; 1151 1152 } catch (error &e) { 1153 return e.get(); 1154 } 1155 1156 CLOVER_API cl_int 1157 clEnqueueSVMMemFill(cl_command_queue d_q, 1158 void *svm_ptr, 1159 const void *pattern, 1160 size_t pattern_size, 1161 size_t size, 1162 cl_uint num_events_in_wait_list, 1163 const cl_event *event_wait_list, 1164 cl_event *event) { 1165 1166 return EnqueueSVMMemFill(d_q, svm_ptr, pattern, pattern_size, 1167 size, num_events_in_wait_list, event_wait_list, 1168 event, CL_COMMAND_SVM_MEMFILL); 1169 } 1170 1171 cl_int 1172 clover::EnqueueSVMMap(cl_command_queue d_q, 1173 cl_bool blocking_map, 1174 cl_map_flags map_flags, 1175 void *svm_ptr, 1176 size_t size, 1177 cl_uint num_events_in_wait_list, 1178 const cl_event *event_wait_list, 1179 cl_event *event, 1180 cl_int cmd) try { 1181 auto &q = obj(d_q); 1182 1183 if (!q.device().svm_support()) 1184 return CL_INVALID_OPERATION; 1185 1186 if (svm_ptr == nullptr || size == 0) 1187 return CL_INVALID_VALUE; 1188 1189 bool can_emulate = q.device().has_system_svm(); 1190 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 1191 1192 validate_common(q, deps); 1193 1194 if (can_emulate) { 1195 auto hev = create<hard_event>(q, cmd, deps, 1196 [](clover::event &) { }); 1197 1198 ret_object(event, hev); 1199 return CL_SUCCESS; 1200 } 1201 1202 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 1203 return CL_INVALID_VALUE; 1204 1205 } catch (error &e) { 1206 return e.get(); 1207 } 1208 1209 CLOVER_API cl_int 1210 clEnqueueSVMMap(cl_command_queue d_q, 1211 cl_bool blocking_map, 1212 cl_map_flags map_flags, 1213 void *svm_ptr, 1214 size_t size, 1215 cl_uint num_events_in_wait_list, 1216 const cl_event *event_wait_list, 1217 cl_event *event) { 1218 1219 return EnqueueSVMMap(d_q, blocking_map, map_flags, svm_ptr, size, 1220 num_events_in_wait_list, event_wait_list, event, 1221 CL_COMMAND_SVM_MAP); 1222 } 1223 1224 cl_int 1225 clover::EnqueueSVMUnmap(cl_command_queue d_q, 1226 void *svm_ptr, 1227 cl_uint num_events_in_wait_list, 1228 const cl_event *event_wait_list, 1229 cl_event *event, 1230 cl_int cmd) try { 1231 auto &q = obj(d_q); 1232 1233 if (!q.device().svm_support()) 1234 return CL_INVALID_OPERATION; 1235 1236 if (svm_ptr == nullptr) 1237 return CL_INVALID_VALUE; 1238 1239 bool can_emulate = q.device().has_system_svm(); 1240 auto deps = objs<wait_list_tag>(event_wait_list, num_events_in_wait_list); 1241 1242 validate_common(q, deps); 1243 1244 if (can_emulate) { 1245 auto hev = create<hard_event>(q, cmd, deps, 1246 [](clover::event &) { }); 1247 1248 ret_object(event, hev); 1249 return CL_SUCCESS; 1250 } 1251 1252 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 1253 return CL_INVALID_VALUE; 1254 1255 } catch (error &e) { 1256 return e.get(); 1257 } 1258 1259 CLOVER_API cl_int 1260 clEnqueueSVMUnmap(cl_command_queue d_q, 1261 void *svm_ptr, 1262 cl_uint num_events_in_wait_list, 1263 const cl_event *event_wait_list, 1264 cl_event *event) { 1265 1266 return EnqueueSVMUnmap(d_q, svm_ptr, num_events_in_wait_list, 1267 event_wait_list, event, CL_COMMAND_SVM_UNMAP); 1268 } 1269 1270 CLOVER_API cl_int 1271 clEnqueueSVMMigrateMem(cl_command_queue d_q, 1272 cl_uint num_svm_pointers, 1273 const void **svm_pointers, 1274 const size_t *sizes, 1275 const cl_mem_migration_flags flags, 1276 cl_uint num_deps, 1277 const cl_event *d_deps, 1278 cl_event *rd_ev) try { 1279 auto &q = obj(d_q); 1280 auto deps = objs<wait_list_tag>(d_deps, num_deps); 1281 1282 validate_common(q, deps); 1283 validate_mem_migration_flags(flags); 1284 1285 if (!q.device().svm_support()) 1286 return CL_INVALID_OPERATION; 1287 1288 if (!num_svm_pointers || !svm_pointers) 1289 return CL_INVALID_VALUE; 1290 1291 std::vector<size_t> sizes_copy(num_svm_pointers); 1292 std::vector<const void*> ptrs(num_svm_pointers); 1293 1294 for (unsigned i = 0; i < num_svm_pointers; ++i) { 1295 const void *ptr = svm_pointers[i]; 1296 size_t size = sizes ? sizes[i] : 0; 1297 if (!ptr) 1298 return CL_INVALID_VALUE; 1299 1300 auto p = q.context().find_svm_allocation(ptr); 1301 if (!p.first) 1302 return CL_INVALID_VALUE; 1303 1304 std::ptrdiff_t pdiff = (uint8_t*)ptr - (uint8_t*)p.first; 1305 if (size && size + pdiff > p.second) 1306 return CL_INVALID_VALUE; 1307 1308 sizes_copy[i] = size ? size : p.second; 1309 ptrs[i] = size ? svm_pointers[i] : p.first; 1310 } 1311 1312 auto hev = create<hard_event>( 1313 q, CL_COMMAND_MIGRATE_MEM_OBJECTS, deps, 1314 [=, &q](event &) { 1315 q.svm_migrate(ptrs, sizes_copy, flags); 1316 }); 1317 1318 ret_object(rd_ev, hev); 1319 return CL_SUCCESS; 1320 1321 } catch (error &e) { 1322 return e.get(); 1323 } 1324