1// 2// Copyright 2012 Francisco Jerez 3// 4// Permission is hereby granted, free of charge, to any person obtaining a 5// copy of this software and associated documentation files (the "Software"), 6// to deal in the Software without restriction, including without limitation 7// the rights to use, copy, modify, merge, publish, distribute, sublicense, 8// and/or sell copies of the Software, and to permit persons to whom the 9// Software is furnished to do so, subject to the following conditions: 10// 11// The above copyright notice and this permission notice shall be included in 12// all copies or substantial portions of the Software. 13// 14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17// THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 18// OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 20// OTHER DEALINGS IN THE SOFTWARE. 21// 22 23#include "util/format/u_format.h" 24#include "util/u_math.h" 25#include "api/util.hpp" 26#include "core/memory.hpp" 27#include "core/format.hpp" 28 29using namespace clover; 30 31namespace { 32 cl_mem_flags 33 validate_flags(cl_mem d_parent, cl_mem_flags d_flags, bool svm) { 34 const cl_mem_flags dev_access_flags = 35 CL_MEM_READ_WRITE | CL_MEM_WRITE_ONLY | CL_MEM_READ_ONLY; 36 const cl_mem_flags host_ptr_flags = 37 CL_MEM_USE_HOST_PTR | CL_MEM_ALLOC_HOST_PTR | CL_MEM_COPY_HOST_PTR; 38 const cl_mem_flags host_access_flags = 39 CL_MEM_HOST_WRITE_ONLY | CL_MEM_HOST_READ_ONLY | CL_MEM_HOST_NO_ACCESS; 40 const cl_mem_flags svm_flags = 41 CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS; 42 43 const cl_mem_flags valid_flags = 44 dev_access_flags 45 | (svm || d_parent ? 0 : host_ptr_flags) 46 | (svm ? svm_flags : host_access_flags); 47 48 if ((d_flags & ~valid_flags) || 49 util_bitcount(d_flags & dev_access_flags) > 1 || 50 util_bitcount(d_flags & host_access_flags) > 1) 51 throw error(CL_INVALID_VALUE); 52 53 if ((d_flags & CL_MEM_USE_HOST_PTR) && 54 (d_flags & (CL_MEM_COPY_HOST_PTR | CL_MEM_ALLOC_HOST_PTR))) 55 throw error(CL_INVALID_VALUE); 56 57 if ((d_flags & CL_MEM_SVM_ATOMICS) && 58 !(d_flags & CL_MEM_SVM_FINE_GRAIN_BUFFER)) 59 throw error(CL_INVALID_VALUE); 60 61 if (d_parent) { 62 const auto &parent = obj(d_parent); 63 const cl_mem_flags flags = (d_flags | 64 (d_flags & dev_access_flags ? 0 : 65 parent.flags() & dev_access_flags) | 66 (d_flags & host_access_flags ? 0 : 67 parent.flags() & host_access_flags) | 68 (parent.flags() & host_ptr_flags)); 69 70 if (~flags & parent.flags() & (dev_access_flags & ~CL_MEM_READ_WRITE)) 71 throw error(CL_INVALID_VALUE); 72 73 // Check if new host access flags cause a mismatch between 74 // host-read/write-only. 75 if (!(flags & CL_MEM_HOST_NO_ACCESS) && 76 (~flags & parent.flags() & host_access_flags)) 77 throw error(CL_INVALID_VALUE); 78 79 return flags; 80 81 } else { 82 return d_flags | (d_flags & dev_access_flags ? 0 : CL_MEM_READ_WRITE); 83 } 84 } 85 86 std::vector<cl_mem_properties> 87 fill_properties(const cl_mem_properties *d_properties) { 88 std::vector<cl_mem_properties> properties; 89 if (d_properties) { 90 while (*d_properties) { 91 if (*d_properties != 0) 92 throw error(CL_INVALID_PROPERTY); 93 94 properties.push_back(*d_properties); 95 d_properties++; 96 }; 97 properties.push_back(0); 98 } 99 return properties; 100 } 101} 102 103CLOVER_API cl_mem 104clCreateBufferWithProperties(cl_context d_ctx, 105 const cl_mem_properties *d_properties, 106 cl_mem_flags d_flags, size_t size, 107 void *host_ptr, cl_int *r_errcode) try { 108 109 auto &ctx = obj(d_ctx); 110 const cl_mem_flags flags = validate_flags(NULL, d_flags, false); 111 std::vector<cl_mem_properties> properties = fill_properties(d_properties); 112 113 if (bool(host_ptr) != bool(flags & (CL_MEM_USE_HOST_PTR | 114 CL_MEM_COPY_HOST_PTR))) 115 throw error(CL_INVALID_HOST_PTR); 116 117 if (!size || 118 size > fold(maximum(), cl_ulong(0), 119 map(std::mem_fn(&device::max_mem_alloc_size), ctx.devices()) 120 )) 121 throw error(CL_INVALID_BUFFER_SIZE); 122 123 ret_error(r_errcode, CL_SUCCESS); 124 return new root_buffer(ctx, properties, flags, size, host_ptr); 125} catch (error &e) { 126 ret_error(r_errcode, e); 127 return NULL; 128} 129 130 131CLOVER_API cl_mem 132clCreateBuffer(cl_context d_ctx, cl_mem_flags d_flags, size_t size, 133 void *host_ptr, cl_int *r_errcode) { 134 return clCreateBufferWithProperties(d_ctx, NULL, d_flags, size, 135 host_ptr, r_errcode); 136} 137 138CLOVER_API cl_mem 139clCreateSubBuffer(cl_mem d_mem, cl_mem_flags d_flags, 140 cl_buffer_create_type op, 141 const void *op_info, cl_int *r_errcode) try { 142 auto &parent = obj<root_buffer>(d_mem); 143 const cl_mem_flags flags = validate_flags(d_mem, d_flags, false); 144 145 if (op == CL_BUFFER_CREATE_TYPE_REGION) { 146 auto reg = reinterpret_cast<const cl_buffer_region *>(op_info); 147 148 if (!reg || 149 reg->origin > parent.size() || 150 reg->origin + reg->size > parent.size()) 151 throw error(CL_INVALID_VALUE); 152 153 if (!reg->size) 154 throw error(CL_INVALID_BUFFER_SIZE); 155 156 ret_error(r_errcode, CL_SUCCESS); 157 return new sub_buffer(parent, flags, reg->origin, reg->size); 158 159 } else { 160 throw error(CL_INVALID_VALUE); 161 } 162 163} catch (error &e) { 164 ret_error(r_errcode, e); 165 return NULL; 166} 167 168CLOVER_API cl_mem 169clCreateImageWithProperties(cl_context d_ctx, 170 const cl_mem_properties *d_properties, 171 cl_mem_flags d_flags, 172 const cl_image_format *format, 173 const cl_image_desc *desc, 174 void *host_ptr, cl_int *r_errcode) try { 175 auto &ctx = obj(d_ctx); 176 177 if (!any_of(std::mem_fn(&device::image_support), ctx.devices())) 178 throw error(CL_INVALID_OPERATION); 179 180 if (!format) 181 throw error(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); 182 183 if (!desc) 184 throw error(CL_INVALID_IMAGE_DESCRIPTOR); 185 186 if (desc->image_array_size == 0 && 187 (desc->image_type == CL_MEM_OBJECT_IMAGE1D_ARRAY || 188 desc->image_type == CL_MEM_OBJECT_IMAGE2D_ARRAY)) 189 throw error(CL_INVALID_IMAGE_DESCRIPTOR); 190 191 if (!host_ptr && 192 (desc->image_row_pitch || desc->image_slice_pitch)) 193 throw error(CL_INVALID_IMAGE_DESCRIPTOR); 194 195 if (desc->num_mip_levels || desc->num_samples) 196 throw error(CL_INVALID_IMAGE_DESCRIPTOR); 197 198 if (bool(desc->buffer) != (desc->image_type == CL_MEM_OBJECT_IMAGE1D_BUFFER)) 199 throw error(CL_INVALID_IMAGE_DESCRIPTOR); 200 201 if (bool(host_ptr) != bool(d_flags & (CL_MEM_USE_HOST_PTR | 202 CL_MEM_COPY_HOST_PTR))) 203 throw error(CL_INVALID_HOST_PTR); 204 205 const cl_mem_flags flags = validate_flags(desc->buffer, d_flags, false); 206 207 if (!supported_formats(ctx, desc->image_type, d_flags).count(*format)) 208 throw error(CL_IMAGE_FORMAT_NOT_SUPPORTED); 209 210 std::vector<cl_mem_properties> properties = fill_properties(d_properties); 211 ret_error(r_errcode, CL_SUCCESS); 212 213 const size_t row_pitch = desc->image_row_pitch ? desc->image_row_pitch : 214 util_format_get_blocksize(translate_format(*format)) * desc->image_width; 215 216 switch (desc->image_type) { 217 case CL_MEM_OBJECT_IMAGE1D: 218 if (!desc->image_width) 219 throw error(CL_INVALID_IMAGE_SIZE); 220 221 if (all_of([=](const device &dev) { 222 const size_t max = dev.max_image_size(); 223 return (desc->image_width > max); 224 }, ctx.devices())) 225 throw error(CL_INVALID_IMAGE_SIZE); 226 227 return new image1d(ctx, properties, flags, format, 228 desc->image_width, 229 row_pitch, host_ptr); 230 231 case CL_MEM_OBJECT_IMAGE1D_BUFFER: 232 if (!desc->image_width) 233 throw error(CL_INVALID_IMAGE_SIZE); 234 235 if (all_of([=](const device &dev) { 236 const size_t max = dev.max_image_buffer_size(); 237 return (desc->image_width > max); 238 }, ctx.devices())) 239 throw error(CL_INVALID_IMAGE_SIZE); 240 241 return new image1d_buffer(ctx, properties, flags, format, 242 desc->image_width, 243 row_pitch, host_ptr, desc->buffer); 244 245 case CL_MEM_OBJECT_IMAGE1D_ARRAY: { 246 if (!desc->image_width) 247 throw error(CL_INVALID_IMAGE_SIZE); 248 249 if (all_of([=](const device &dev) { 250 const size_t max = dev.max_image_size(); 251 const size_t amax = dev.max_image_array_number(); 252 return (desc->image_width > max || 253 desc->image_array_size > amax); 254 }, ctx.devices())) 255 throw error(CL_INVALID_IMAGE_SIZE); 256 257 const size_t slice_pitch = desc->image_slice_pitch ? 258 desc->image_slice_pitch : row_pitch; 259 260 return new image1d_array(ctx, properties, flags, format, 261 desc->image_width, 262 desc->image_array_size, slice_pitch, 263 host_ptr); 264 } 265 266 case CL_MEM_OBJECT_IMAGE2D: 267 if (!desc->image_width || !desc->image_height) 268 throw error(CL_INVALID_IMAGE_SIZE); 269 270 if (all_of([=](const device &dev) { 271 const size_t max = dev.max_image_size(); 272 return (desc->image_width > max || 273 desc->image_height > max); 274 }, ctx.devices())) 275 throw error(CL_INVALID_IMAGE_SIZE); 276 277 return new image2d(ctx, properties, flags, format, 278 desc->image_width, desc->image_height, 279 row_pitch, host_ptr); 280 281 case CL_MEM_OBJECT_IMAGE2D_ARRAY: { 282 if (!desc->image_width || !desc->image_height || !desc->image_array_size) 283 throw error(CL_INVALID_IMAGE_SIZE); 284 285 if (all_of([=](const device &dev) { 286 const size_t max = dev.max_image_size(); 287 const size_t amax = dev.max_image_array_number(); 288 return (desc->image_width > max || 289 desc->image_height > max || 290 desc->image_array_size > amax); 291 }, ctx.devices())) 292 throw error(CL_INVALID_IMAGE_SIZE); 293 294 const size_t slice_pitch = desc->image_slice_pitch ? 295 desc->image_slice_pitch : row_pitch * desc->image_height; 296 297 return new image2d_array(ctx, properties, flags, format, 298 desc->image_width, desc->image_height, 299 desc->image_array_size, row_pitch, 300 slice_pitch, host_ptr); 301 } 302 303 case CL_MEM_OBJECT_IMAGE3D: { 304 if (!desc->image_width || !desc->image_height || !desc->image_depth) 305 throw error(CL_INVALID_IMAGE_SIZE); 306 307 if (all_of([=](const device &dev) { 308 const size_t max = dev.max_image_size_3d(); 309 return (desc->image_width > max || 310 desc->image_height > max || 311 desc->image_depth > max); 312 }, ctx.devices())) 313 throw error(CL_INVALID_IMAGE_SIZE); 314 315 const size_t slice_pitch = desc->image_slice_pitch ? 316 desc->image_slice_pitch : row_pitch * desc->image_height; 317 318 return new image3d(ctx, properties, flags, format, 319 desc->image_width, desc->image_height, 320 desc->image_depth, row_pitch, 321 slice_pitch, host_ptr); 322 } 323 324 default: 325 throw error(CL_INVALID_IMAGE_DESCRIPTOR); 326 } 327 328} catch (error &e) { 329 ret_error(r_errcode, e); 330 return NULL; 331} 332 333CLOVER_API cl_mem 334clCreateImage(cl_context d_ctx, 335 cl_mem_flags d_flags, 336 const cl_image_format *format, 337 const cl_image_desc *desc, 338 void *host_ptr, cl_int *r_errcode) { 339 return clCreateImageWithProperties(d_ctx, NULL, d_flags, format, desc, host_ptr, r_errcode); 340} 341 342 343CLOVER_API cl_mem 344clCreateImage2D(cl_context d_ctx, cl_mem_flags d_flags, 345 const cl_image_format *format, 346 size_t width, size_t height, size_t row_pitch, 347 void *host_ptr, cl_int *r_errcode) { 348 const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE2D, width, height, 0, 0, 349 row_pitch, 0, 0, 0, NULL }; 350 351 return clCreateImageWithProperties(d_ctx, NULL, d_flags, format, &desc, host_ptr, r_errcode); 352} 353 354CLOVER_API cl_mem 355clCreateImage3D(cl_context d_ctx, cl_mem_flags d_flags, 356 const cl_image_format *format, 357 size_t width, size_t height, size_t depth, 358 size_t row_pitch, size_t slice_pitch, 359 void *host_ptr, cl_int *r_errcode) { 360 const cl_image_desc desc = { CL_MEM_OBJECT_IMAGE3D, width, height, depth, 0, 361 row_pitch, slice_pitch, 0, 0, NULL }; 362 363 return clCreateImageWithProperties(d_ctx, NULL, d_flags, format, &desc, host_ptr, r_errcode); 364} 365 366CLOVER_API cl_int 367clGetSupportedImageFormats(cl_context d_ctx, cl_mem_flags flags, 368 cl_mem_object_type type, cl_uint count, 369 cl_image_format *r_buf, cl_uint *r_count) try { 370 auto &ctx = obj(d_ctx); 371 auto formats = supported_formats(ctx, type, flags); 372 373 if (flags & CL_MEM_KERNEL_READ_AND_WRITE) { 374 if (r_count) 375 *r_count = 0; 376 return CL_SUCCESS; 377 } 378 379 if (flags & (CL_MEM_WRITE_ONLY | CL_MEM_READ_WRITE) && 380 type == CL_MEM_OBJECT_IMAGE3D) { 381 if (r_count) 382 *r_count = 0; 383 return CL_SUCCESS; 384 } 385 386 validate_flags(NULL, flags, false); 387 388 if (r_buf && !count) 389 throw error(CL_INVALID_VALUE); 390 391 if (r_buf) 392 std::copy_n(formats.begin(), 393 std::min((cl_uint)formats.size(), count), 394 r_buf); 395 396 if (r_count) 397 *r_count = formats.size(); 398 399 return CL_SUCCESS; 400 401} catch (error &e) { 402 return e.get(); 403} 404 405CLOVER_API cl_int 406clGetMemObjectInfo(cl_mem d_mem, cl_mem_info param, 407 size_t size, void *r_buf, size_t *r_size) try { 408 property_buffer buf { r_buf, size, r_size }; 409 auto &mem = obj(d_mem); 410 411 switch (param) { 412 case CL_MEM_TYPE: 413 buf.as_scalar<cl_mem_object_type>() = mem.type(); 414 break; 415 416 case CL_MEM_FLAGS: 417 buf.as_scalar<cl_mem_flags>() = mem.flags(); 418 break; 419 420 case CL_MEM_SIZE: 421 buf.as_scalar<size_t>() = mem.size(); 422 break; 423 424 case CL_MEM_HOST_PTR: 425 buf.as_scalar<void *>() = mem.host_ptr(); 426 break; 427 428 case CL_MEM_MAP_COUNT: 429 buf.as_scalar<cl_uint>() = 0; 430 break; 431 432 case CL_MEM_REFERENCE_COUNT: 433 buf.as_scalar<cl_uint>() = mem.ref_count(); 434 break; 435 436 case CL_MEM_CONTEXT: 437 buf.as_scalar<cl_context>() = desc(mem.context()); 438 break; 439 440 case CL_MEM_ASSOCIATED_MEMOBJECT: { 441 sub_buffer *sub = dynamic_cast<sub_buffer *>(&mem); 442 if (sub) { 443 buf.as_scalar<cl_mem>() = desc(sub->parent()); 444 break; 445 } 446 447 image *img = dynamic_cast<image *>(&mem); 448 if (img) { 449 buf.as_scalar<cl_mem>() = desc(img->buffer()); 450 break; 451 } 452 453 buf.as_scalar<cl_mem>() = NULL; 454 break; 455 } 456 case CL_MEM_OFFSET: { 457 sub_buffer *sub = dynamic_cast<sub_buffer *>(&mem); 458 buf.as_scalar<size_t>() = (sub ? sub->offset() : 0); 459 break; 460 } 461 case CL_MEM_USES_SVM_POINTER: 462 case CL_MEM_USES_SVM_POINTER_ARM: { 463 // with system SVM all host ptrs are SVM pointers 464 // TODO: once we support devices with lower levels of SVM, we have to 465 // check the ptr in more detail 466 const bool system_svm = all_of(std::mem_fn(&device::has_system_svm), 467 mem.context().devices()); 468 buf.as_scalar<cl_bool>() = mem.host_ptr() && system_svm; 469 break; 470 } 471 case CL_MEM_PROPERTIES: 472 buf.as_vector<cl_mem_properties>() = mem.properties(); 473 break; 474 default: 475 throw error(CL_INVALID_VALUE); 476 } 477 478 return CL_SUCCESS; 479 480} catch (error &e) { 481 return e.get(); 482} 483 484CLOVER_API cl_int 485clGetImageInfo(cl_mem d_mem, cl_image_info param, 486 size_t size, void *r_buf, size_t *r_size) try { 487 property_buffer buf { r_buf, size, r_size }; 488 auto &img = obj<image>(d_mem); 489 490 switch (param) { 491 case CL_IMAGE_FORMAT: 492 buf.as_scalar<cl_image_format>() = img.format(); 493 break; 494 495 case CL_IMAGE_ELEMENT_SIZE: 496 buf.as_scalar<size_t>() = img.pixel_size(); 497 break; 498 499 case CL_IMAGE_ROW_PITCH: 500 buf.as_scalar<size_t>() = img.row_pitch(); 501 break; 502 503 case CL_IMAGE_SLICE_PITCH: 504 buf.as_scalar<size_t>() = img.slice_pitch(); 505 break; 506 507 case CL_IMAGE_WIDTH: 508 buf.as_scalar<size_t>() = img.width(); 509 break; 510 511 case CL_IMAGE_HEIGHT: 512 buf.as_scalar<size_t>() = img.dimensions() > 1 ? img.height() : 0; 513 break; 514 515 case CL_IMAGE_DEPTH: 516 buf.as_scalar<size_t>() = img.dimensions() > 2 ? img.depth() : 0; 517 break; 518 519 case CL_IMAGE_ARRAY_SIZE: 520 buf.as_scalar<size_t>() = img.array_size(); 521 break; 522 523 case CL_IMAGE_BUFFER: 524 buf.as_scalar<cl_mem>() = img.buffer(); 525 break; 526 527 case CL_IMAGE_NUM_MIP_LEVELS: 528 buf.as_scalar<cl_uint>() = 0; 529 break; 530 531 case CL_IMAGE_NUM_SAMPLES: 532 buf.as_scalar<cl_uint>() = 0; 533 break; 534 535 default: 536 throw error(CL_INVALID_VALUE); 537 } 538 539 return CL_SUCCESS; 540 541} catch (error &e) { 542 return e.get(); 543} 544 545CLOVER_API cl_int 546clRetainMemObject(cl_mem d_mem) try { 547 obj(d_mem).retain(); 548 return CL_SUCCESS; 549 550} catch (error &e) { 551 return e.get(); 552} 553 554CLOVER_API cl_int 555clReleaseMemObject(cl_mem d_mem) try { 556 if (obj(d_mem).release()) 557 delete pobj(d_mem); 558 559 return CL_SUCCESS; 560 561} catch (error &e) { 562 return e.get(); 563} 564 565CLOVER_API cl_int 566clSetMemObjectDestructorCallback(cl_mem d_mem, 567 void (CL_CALLBACK *pfn_notify)(cl_mem, void *), 568 void *user_data) try { 569 auto &mem = obj(d_mem); 570 571 if (!pfn_notify) 572 return CL_INVALID_VALUE; 573 574 mem.destroy_notify([=]{ pfn_notify(d_mem, user_data); }); 575 576 return CL_SUCCESS; 577 578} catch (error &e) { 579 return e.get(); 580} 581 582CLOVER_API void * 583clSVMAlloc(cl_context d_ctx, 584 cl_svm_mem_flags flags, 585 size_t size, 586 unsigned int alignment) try { 587 auto &ctx = obj(d_ctx); 588 589 if (!any_of(std::mem_fn(&device::svm_support), ctx.devices())) 590 return NULL; 591 592 validate_flags(NULL, flags, true); 593 594 if (!size || 595 size > fold(minimum(), cl_ulong(ULONG_MAX), 596 map(std::mem_fn(&device::max_mem_alloc_size), ctx.devices()))) 597 return nullptr; 598 599 if (!util_is_power_of_two_or_zero(alignment)) 600 return nullptr; 601 602 if (!alignment) 603 alignment = 0x80; // sizeof(long16) 604 605#if HAVE_POSIX_MEMALIGN 606 bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices()); 607 if (can_emulate) { 608 // we can ignore all the flags as it's not required to honor them. 609 void *ptr = nullptr; 610 if (alignment < sizeof(void*)) 611 alignment = sizeof(void*); 612 posix_memalign(&ptr, alignment, size); 613 614 if (ptr) 615 ctx.add_svm_allocation(ptr, size); 616 617 return ptr; 618 } 619#endif 620 621 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 622 return nullptr; 623 624} catch (error &) { 625 return nullptr; 626} 627 628CLOVER_API void 629clSVMFree(cl_context d_ctx, 630 void *svm_pointer) try { 631 auto &ctx = obj(d_ctx); 632 633 if (!any_of(std::mem_fn(&device::svm_support), ctx.devices())) 634 return; 635 636 bool can_emulate = all_of(std::mem_fn(&device::has_system_svm), ctx.devices()); 637 638 if (can_emulate) { 639 ctx.remove_svm_allocation(svm_pointer); 640 return free(svm_pointer); 641 } 642 643 CLOVER_NOT_SUPPORTED_UNTIL("2.0"); 644 645} catch (error &) { 646} 647