1/* 2 * This file is part of FFmpeg. 3 * 4 * FFmpeg is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU Lesser General Public 6 * License as published by the Free Software Foundation; either 7 * version 2.1 of the License, or (at your option) any later version. 8 * 9 * FFmpeg is distributed in the hope that it will be useful, 10 * but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * Lesser General Public License for more details. 13 * 14 * You should have received a copy of the GNU Lesser General Public 15 * License along with FFmpeg; if not, write to the Free Software 16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 17 */ 18 19#include "avassert.h" 20 21#include "vulkan.h" 22#include "vulkan_loader.h" 23 24#if CONFIG_LIBGLSLANG 25#include "vulkan_glslang.c" 26#elif CONFIG_LIBSHADERC 27#include "vulkan_shaderc.c" 28#endif 29 30/* Generic macro for creating contexts which need to keep their addresses 31 * if another context is created. */ 32#define FN_CREATING(ctx, type, shortname, array, num) \ 33static av_always_inline type *create_ ##shortname(ctx *dctx) \ 34{ \ 35 type **array, *sctx = av_mallocz(sizeof(*sctx)); \ 36 if (!sctx) \ 37 return NULL; \ 38 \ 39 array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\ 40 if (!array) { \ 41 av_free(sctx); \ 42 return NULL; \ 43 } \ 44 \ 45 dctx->array = array; \ 46 dctx->array[dctx->num++] = sctx; \ 47 \ 48 return sctx; \ 49} 50 51const VkComponentMapping ff_comp_identity_map = { 52 .r = VK_COMPONENT_SWIZZLE_IDENTITY, 53 .g = VK_COMPONENT_SWIZZLE_IDENTITY, 54 .b = VK_COMPONENT_SWIZZLE_IDENTITY, 55 .a = VK_COMPONENT_SWIZZLE_IDENTITY, 56}; 57 58/* Converts return values to strings */ 59const char *ff_vk_ret2str(VkResult res) 60{ 61#define CASE(VAL) case VAL: return #VAL 62 switch (res) { 63 CASE(VK_SUCCESS); 64 CASE(VK_NOT_READY); 65 CASE(VK_TIMEOUT); 66 CASE(VK_EVENT_SET); 67 CASE(VK_EVENT_RESET); 68 CASE(VK_INCOMPLETE); 69 CASE(VK_ERROR_OUT_OF_HOST_MEMORY); 70 CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY); 71 CASE(VK_ERROR_INITIALIZATION_FAILED); 72 CASE(VK_ERROR_DEVICE_LOST); 73 CASE(VK_ERROR_MEMORY_MAP_FAILED); 74 CASE(VK_ERROR_LAYER_NOT_PRESENT); 75 CASE(VK_ERROR_EXTENSION_NOT_PRESENT); 76 CASE(VK_ERROR_FEATURE_NOT_PRESENT); 77 CASE(VK_ERROR_INCOMPATIBLE_DRIVER); 78 CASE(VK_ERROR_TOO_MANY_OBJECTS); 79 CASE(VK_ERROR_FORMAT_NOT_SUPPORTED); 80 CASE(VK_ERROR_FRAGMENTED_POOL); 81 CASE(VK_ERROR_SURFACE_LOST_KHR); 82 CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR); 83 CASE(VK_SUBOPTIMAL_KHR); 84 CASE(VK_ERROR_OUT_OF_DATE_KHR); 85 CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR); 86 CASE(VK_ERROR_VALIDATION_FAILED_EXT); 87 CASE(VK_ERROR_INVALID_SHADER_NV); 88 CASE(VK_ERROR_OUT_OF_POOL_MEMORY); 89 CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE); 90 CASE(VK_ERROR_NOT_PERMITTED_EXT); 91 default: return "Unknown error"; 92 } 93#undef CASE 94} 95 96void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf, 97 VkQueueFlagBits dev_family, int nb_queues) 98{ 99 switch (dev_family) { 100 case VK_QUEUE_GRAPHICS_BIT: 101 qf->queue_family = s->hwctx->queue_family_index; 102 qf->actual_queues = s->hwctx->nb_graphics_queues; 103 break; 104 case VK_QUEUE_COMPUTE_BIT: 105 qf->queue_family = s->hwctx->queue_family_comp_index; 106 qf->actual_queues = s->hwctx->nb_comp_queues; 107 break; 108 case VK_QUEUE_TRANSFER_BIT: 109 qf->queue_family = s->hwctx->queue_family_tx_index; 110 qf->actual_queues = s->hwctx->nb_tx_queues; 111 break; 112 case VK_QUEUE_VIDEO_ENCODE_BIT_KHR: 113 qf->queue_family = s->hwctx->queue_family_encode_index; 114 qf->actual_queues = s->hwctx->nb_encode_queues; 115 break; 116 case VK_QUEUE_VIDEO_DECODE_BIT_KHR: 117 qf->queue_family = s->hwctx->queue_family_decode_index; 118 qf->actual_queues = s->hwctx->nb_decode_queues; 119 break; 120 default: 121 av_assert0(0); /* Should never happen */ 122 } 123 124 if (!nb_queues) 125 qf->nb_queues = qf->actual_queues; 126 else 127 qf->nb_queues = nb_queues; 128 129 return; 130} 131 132void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf) 133{ 134 qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues; 135} 136 137static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req, 138 VkMemoryPropertyFlagBits req_flags, void *alloc_extension, 139 VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem) 140{ 141 VkResult ret; 142 int index = -1; 143 FFVulkanFunctions *vk = &s->vkfn; 144 145 VkMemoryAllocateInfo alloc_info = { 146 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, 147 .pNext = alloc_extension, 148 }; 149 150 /* Align if we need to */ 151 if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT) 152 req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment); 153 154 alloc_info.allocationSize = req->size; 155 156 /* The vulkan spec requires memory types to be sorted in the "optimal" 157 * order, so the first matching type we find will be the best/fastest one */ 158 for (int i = 0; i < s->mprops.memoryTypeCount; i++) { 159 /* The memory type must be supported by the requirements (bitfield) */ 160 if (!(req->memoryTypeBits & (1 << i))) 161 continue; 162 163 /* The memory type flags must include our properties */ 164 if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags) 165 continue; 166 167 /* Found a suitable memory type */ 168 index = i; 169 break; 170 } 171 172 if (index < 0) { 173 av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n", 174 req_flags); 175 return AVERROR(EINVAL); 176 } 177 178 alloc_info.memoryTypeIndex = index; 179 180 ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info, 181 s->hwctx->alloc, mem); 182 if (ret != VK_SUCCESS) { 183 av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n", 184 ff_vk_ret2str(ret)); 185 return AVERROR(ENOMEM); 186 } 187 188 *mem_flags |= s->mprops.memoryTypes[index].propertyFlags; 189 190 return 0; 191} 192 193int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size, 194 VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags) 195{ 196 int err; 197 VkResult ret; 198 int use_ded_mem; 199 FFVulkanFunctions *vk = &s->vkfn; 200 201 VkBufferCreateInfo buf_spawn = { 202 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 203 .pNext = NULL, 204 .usage = usage, 205 .sharingMode = VK_SHARING_MODE_EXCLUSIVE, 206 .size = size, /* Gets FFALIGNED during alloc if host visible 207 but should be ok */ 208 }; 209 210 VkBufferMemoryRequirementsInfo2 req_desc = { 211 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2, 212 }; 213 VkMemoryDedicatedAllocateInfo ded_alloc = { 214 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO, 215 .pNext = NULL, 216 }; 217 VkMemoryDedicatedRequirements ded_req = { 218 .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS, 219 }; 220 VkMemoryRequirements2 req = { 221 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2, 222 .pNext = &ded_req, 223 }; 224 225 ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf); 226 if (ret != VK_SUCCESS) { 227 av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n", 228 ff_vk_ret2str(ret)); 229 return AVERROR_EXTERNAL; 230 } 231 232 req_desc.buffer = buf->buf; 233 234 vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req); 235 236 /* In case the implementation prefers/requires dedicated allocation */ 237 use_ded_mem = ded_req.prefersDedicatedAllocation | 238 ded_req.requiresDedicatedAllocation; 239 if (use_ded_mem) 240 ded_alloc.buffer = buf->buf; 241 242 err = vk_alloc_mem(s, &req.memoryRequirements, flags, 243 use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext, 244 &buf->flags, &buf->mem); 245 if (err) 246 return err; 247 248 ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0); 249 if (ret != VK_SUCCESS) { 250 av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n", 251 ff_vk_ret2str(ret)); 252 return AVERROR_EXTERNAL; 253 } 254 255 return 0; 256} 257 258int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[], 259 int nb_buffers, int invalidate) 260{ 261 VkResult ret; 262 FFVulkanFunctions *vk = &s->vkfn; 263 VkMappedMemoryRange *inval_list = NULL; 264 int inval_count = 0; 265 266 for (int i = 0; i < nb_buffers; i++) { 267 ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0, 268 VK_WHOLE_SIZE, 0, (void **)&mem[i]); 269 if (ret != VK_SUCCESS) { 270 av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n", 271 ff_vk_ret2str(ret)); 272 return AVERROR_EXTERNAL; 273 } 274 } 275 276 if (!invalidate) 277 return 0; 278 279 for (int i = 0; i < nb_buffers; i++) { 280 const VkMappedMemoryRange ival_buf = { 281 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 282 .memory = buf[i].mem, 283 .size = VK_WHOLE_SIZE, 284 }; 285 if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) 286 continue; 287 inval_list = av_fast_realloc(s->scratch, &s->scratch_size, 288 (++inval_count)*sizeof(*inval_list)); 289 if (!inval_list) 290 return AVERROR(ENOMEM); 291 inval_list[inval_count - 1] = ival_buf; 292 } 293 294 if (inval_count) { 295 ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count, 296 inval_list); 297 if (ret != VK_SUCCESS) { 298 av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n", 299 ff_vk_ret2str(ret)); 300 return AVERROR_EXTERNAL; 301 } 302 } 303 304 return 0; 305} 306 307int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers, 308 int flush) 309{ 310 int err = 0; 311 VkResult ret; 312 FFVulkanFunctions *vk = &s->vkfn; 313 VkMappedMemoryRange *flush_list = NULL; 314 int flush_count = 0; 315 316 if (flush) { 317 for (int i = 0; i < nb_buffers; i++) { 318 const VkMappedMemoryRange flush_buf = { 319 .sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE, 320 .memory = buf[i].mem, 321 .size = VK_WHOLE_SIZE, 322 }; 323 if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) 324 continue; 325 flush_list = av_fast_realloc(s->scratch, &s->scratch_size, 326 (++flush_count)*sizeof(*flush_list)); 327 if (!flush_list) 328 return AVERROR(ENOMEM); 329 flush_list[flush_count - 1] = flush_buf; 330 } 331 } 332 333 if (flush_count) { 334 ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count, 335 flush_list); 336 if (ret != VK_SUCCESS) { 337 av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n", 338 ff_vk_ret2str(ret)); 339 err = AVERROR_EXTERNAL; /* We still want to try to unmap them */ 340 } 341 } 342 343 for (int i = 0; i < nb_buffers; i++) 344 vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem); 345 346 return err; 347} 348 349void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf) 350{ 351 FFVulkanFunctions *vk = &s->vkfn; 352 353 if (!buf || !s->hwctx) 354 return; 355 356 vk->DeviceWaitIdle(s->hwctx->act_dev); 357 358 if (buf->buf != VK_NULL_HANDLE) 359 vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc); 360 if (buf->mem != VK_NULL_HANDLE) 361 vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc); 362} 363 364int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size, 365 VkShaderStageFlagBits stage) 366{ 367 VkPushConstantRange *pc; 368 369 pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts), 370 pl->push_consts_num + 1); 371 if (!pl->push_consts) 372 return AVERROR(ENOMEM); 373 374 pc = &pl->push_consts[pl->push_consts_num++]; 375 memset(pc, 0, sizeof(*pc)); 376 377 pc->stageFlags = stage; 378 pc->offset = offset; 379 pc->size = size; 380 381 return 0; 382} 383 384FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num) 385int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx, 386 FFVkQueueFamilyCtx *qf) 387{ 388 VkResult ret; 389 FFVkExecContext *e; 390 FFVulkanFunctions *vk = &s->vkfn; 391 392 VkCommandPoolCreateInfo cqueue_create = { 393 .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, 394 .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, 395 .queueFamilyIndex = qf->queue_family, 396 }; 397 VkCommandBufferAllocateInfo cbuf_create = { 398 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, 399 .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY, 400 .commandBufferCount = qf->nb_queues, 401 }; 402 403 e = create_exec_ctx(s); 404 if (!e) 405 return AVERROR(ENOMEM); 406 407 e->qf = qf; 408 409 e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues)); 410 if (!e->queues) 411 return AVERROR(ENOMEM); 412 413 e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs)); 414 if (!e->bufs) 415 return AVERROR(ENOMEM); 416 417 /* Create command pool */ 418 ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create, 419 s->hwctx->alloc, &e->pool); 420 if (ret != VK_SUCCESS) { 421 av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n", 422 ff_vk_ret2str(ret)); 423 return AVERROR_EXTERNAL; 424 } 425 426 cbuf_create.commandPool = e->pool; 427 428 /* Allocate command buffer */ 429 ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs); 430 if (ret != VK_SUCCESS) { 431 av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n", 432 ff_vk_ret2str(ret)); 433 return AVERROR_EXTERNAL; 434 } 435 436 for (int i = 0; i < qf->nb_queues; i++) { 437 FFVkQueueCtx *q = &e->queues[i]; 438 vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family, 439 i % qf->actual_queues, &q->queue); 440 } 441 442 *ctx = e; 443 444 return 0; 445} 446 447void ff_vk_discard_exec_deps(FFVkExecContext *e) 448{ 449 FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; 450 451 for (int j = 0; j < q->nb_buf_deps; j++) 452 av_buffer_unref(&q->buf_deps[j]); 453 q->nb_buf_deps = 0; 454 455 for (int j = 0; j < q->nb_frame_deps; j++) 456 av_frame_free(&q->frame_deps[j]); 457 q->nb_frame_deps = 0; 458 459 e->sem_wait_cnt = 0; 460 e->sem_sig_cnt = 0; 461} 462 463int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e) 464{ 465 VkResult ret; 466 FFVulkanFunctions *vk = &s->vkfn; 467 FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; 468 469 VkCommandBufferBeginInfo cmd_start = { 470 .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO, 471 .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT, 472 }; 473 474 /* Create the fence and don't wait for it initially */ 475 if (!q->fence) { 476 VkFenceCreateInfo fence_spawn = { 477 .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, 478 }; 479 ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc, 480 &q->fence); 481 if (ret != VK_SUCCESS) { 482 av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n", 483 ff_vk_ret2str(ret)); 484 return AVERROR_EXTERNAL; 485 } 486 } else { 487 vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); 488 vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); 489 } 490 491 /* Discard queue dependencies */ 492 ff_vk_discard_exec_deps(e); 493 494 ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start); 495 if (ret != VK_SUCCESS) { 496 av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n", 497 ff_vk_ret2str(ret)); 498 return AVERROR_EXTERNAL; 499 } 500 501 return 0; 502} 503 504VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e) 505{ 506 return e->bufs[e->qf->cur_queue]; 507} 508 509int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame, 510 VkPipelineStageFlagBits in_wait_dst_flag) 511{ 512 AVFrame **dst; 513 AVVkFrame *f = (AVVkFrame *)frame->data[0]; 514 FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; 515 AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data; 516 int planes = av_pix_fmt_count_planes(fc->sw_format); 517 518 for (int i = 0; i < planes; i++) { 519 e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc, 520 (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait)); 521 if (!e->sem_wait) { 522 ff_vk_discard_exec_deps(e); 523 return AVERROR(ENOMEM); 524 } 525 526 e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc, 527 (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst)); 528 if (!e->sem_wait_dst) { 529 ff_vk_discard_exec_deps(e); 530 return AVERROR(ENOMEM); 531 } 532 533 e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc, 534 (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val)); 535 if (!e->sem_wait_val) { 536 ff_vk_discard_exec_deps(e); 537 return AVERROR(ENOMEM); 538 } 539 540 e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc, 541 (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig)); 542 if (!e->sem_sig) { 543 ff_vk_discard_exec_deps(e); 544 return AVERROR(ENOMEM); 545 } 546 547 e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc, 548 (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val)); 549 if (!e->sem_sig_val) { 550 ff_vk_discard_exec_deps(e); 551 return AVERROR(ENOMEM); 552 } 553 554 e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc, 555 (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst)); 556 if (!e->sem_sig_val_dst) { 557 ff_vk_discard_exec_deps(e); 558 return AVERROR(ENOMEM); 559 } 560 561 e->sem_wait[e->sem_wait_cnt] = f->sem[i]; 562 e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag; 563 e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i]; 564 e->sem_wait_cnt++; 565 566 e->sem_sig[e->sem_sig_cnt] = f->sem[i]; 567 e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1; 568 e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i]; 569 e->sem_sig_cnt++; 570 } 571 572 dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size, 573 (q->nb_frame_deps + 1) * sizeof(*dst)); 574 if (!dst) { 575 ff_vk_discard_exec_deps(e); 576 return AVERROR(ENOMEM); 577 } 578 579 q->frame_deps = dst; 580 q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame); 581 if (!q->frame_deps[q->nb_frame_deps]) { 582 ff_vk_discard_exec_deps(e); 583 return AVERROR(ENOMEM); 584 } 585 q->nb_frame_deps++; 586 587 return 0; 588} 589 590int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e) 591{ 592 VkResult ret; 593 FFVulkanFunctions *vk = &s->vkfn; 594 FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; 595 596 VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = { 597 .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO, 598 .pWaitSemaphoreValues = e->sem_wait_val, 599 .pSignalSemaphoreValues = e->sem_sig_val, 600 .waitSemaphoreValueCount = e->sem_wait_cnt, 601 .signalSemaphoreValueCount = e->sem_sig_cnt, 602 }; 603 604 VkSubmitInfo s_info = { 605 .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO, 606 .pNext = &s_timeline_sem_info, 607 608 .commandBufferCount = 1, 609 .pCommandBuffers = &e->bufs[e->qf->cur_queue], 610 611 .pWaitSemaphores = e->sem_wait, 612 .pWaitDstStageMask = e->sem_wait_dst, 613 .waitSemaphoreCount = e->sem_wait_cnt, 614 615 .pSignalSemaphores = e->sem_sig, 616 .signalSemaphoreCount = e->sem_sig_cnt, 617 }; 618 619 ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]); 620 if (ret != VK_SUCCESS) { 621 av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n", 622 ff_vk_ret2str(ret)); 623 return AVERROR_EXTERNAL; 624 } 625 626 ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence); 627 if (ret != VK_SUCCESS) { 628 av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n", 629 ff_vk_ret2str(ret)); 630 return AVERROR_EXTERNAL; 631 } 632 633 for (int i = 0; i < e->sem_sig_cnt; i++) 634 *e->sem_sig_val_dst[i] += 1; 635 636 return 0; 637} 638 639int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e, 640 AVBufferRef **deps, int nb_deps) 641{ 642 AVBufferRef **dst; 643 FFVkQueueCtx *q = &e->queues[e->qf->cur_queue]; 644 645 if (!deps || !nb_deps) 646 return 0; 647 648 dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size, 649 (q->nb_buf_deps + nb_deps) * sizeof(*dst)); 650 if (!dst) 651 goto err; 652 653 q->buf_deps = dst; 654 655 for (int i = 0; i < nb_deps; i++) { 656 q->buf_deps[q->nb_buf_deps] = deps[i]; 657 if (!q->buf_deps[q->nb_buf_deps]) 658 goto err; 659 q->nb_buf_deps++; 660 } 661 662 return 0; 663 664err: 665 ff_vk_discard_exec_deps(e); 666 return AVERROR(ENOMEM); 667} 668 669FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num) 670FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s, 671 int unnorm_coords, VkFilter filt) 672{ 673 VkResult ret; 674 FFVulkanFunctions *vk = &s->vkfn; 675 676 VkSamplerCreateInfo sampler_info = { 677 .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, 678 .magFilter = filt, 679 .minFilter = sampler_info.magFilter, 680 .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST : 681 VK_SAMPLER_MIPMAP_MODE_LINEAR, 682 .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, 683 .addressModeV = sampler_info.addressModeU, 684 .addressModeW = sampler_info.addressModeU, 685 .anisotropyEnable = VK_FALSE, 686 .compareOp = VK_COMPARE_OP_NEVER, 687 .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, 688 .unnormalizedCoordinates = unnorm_coords, 689 }; 690 691 FFVkSampler *sctx = create_sampler(s); 692 if (!sctx) 693 return NULL; 694 695 ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info, 696 s->hwctx->alloc, &sctx->sampler[0]); 697 if (ret != VK_SUCCESS) { 698 av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n", 699 ff_vk_ret2str(ret)); 700 return NULL; 701 } 702 703 for (int i = 1; i < 4; i++) 704 sctx->sampler[i] = sctx->sampler[0]; 705 706 return sctx; 707} 708 709int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt) 710{ 711 if (pix_fmt == AV_PIX_FMT_ABGR || pix_fmt == AV_PIX_FMT_BGRA || 712 pix_fmt == AV_PIX_FMT_RGBA || pix_fmt == AV_PIX_FMT_RGB24 || 713 pix_fmt == AV_PIX_FMT_BGR24 || pix_fmt == AV_PIX_FMT_RGB48 || 714 pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 || 715 pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0 || 716 pix_fmt == AV_PIX_FMT_0BGR || pix_fmt == AV_PIX_FMT_RGB0) 717 return 1; 718 return 0; 719} 720 721const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt) 722{ 723 const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt); 724 const int high = desc->comp[0].depth > 8; 725 return high ? "rgba16f" : "rgba8"; 726} 727 728typedef struct ImageViewCtx { 729 VkImageView view; 730} ImageViewCtx; 731 732static void destroy_imageview(void *opaque, uint8_t *data) 733{ 734 FFVulkanContext *s = opaque; 735 FFVulkanFunctions *vk = &s->vkfn; 736 ImageViewCtx *iv = (ImageViewCtx *)data; 737 738 vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc); 739 av_free(iv); 740} 741 742int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e, 743 VkImageView *v, VkImage img, VkFormat fmt, 744 const VkComponentMapping map) 745{ 746 int err; 747 AVBufferRef *buf; 748 FFVulkanFunctions *vk = &s->vkfn; 749 750 VkImageViewCreateInfo imgview_spawn = { 751 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 752 .pNext = NULL, 753 .image = img, 754 .viewType = VK_IMAGE_VIEW_TYPE_2D, 755 .format = fmt, 756 .components = map, 757 .subresourceRange = { 758 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, 759 .baseMipLevel = 0, 760 .levelCount = 1, 761 .baseArrayLayer = 0, 762 .layerCount = 1, 763 }, 764 }; 765 766 ImageViewCtx *iv = av_mallocz(sizeof(*iv)); 767 768 VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn, 769 s->hwctx->alloc, &iv->view); 770 if (ret != VK_SUCCESS) { 771 av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n", 772 ff_vk_ret2str(ret)); 773 return AVERROR_EXTERNAL; 774 } 775 776 buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0); 777 if (!buf) { 778 destroy_imageview(s, (uint8_t *)iv); 779 return AVERROR(ENOMEM); 780 } 781 782 /* Add to queue dependencies */ 783 err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1); 784 if (err) { 785 av_buffer_unref(&buf); 786 return err; 787 } 788 789 *v = iv->view; 790 791 return 0; 792} 793 794FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num) 795FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name, 796 VkShaderStageFlags stage) 797{ 798 FFVkSPIRVShader *shd = create_shader(pl); 799 if (!shd) 800 return NULL; 801 802 av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED); 803 804 shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; 805 shd->shader.stage = stage; 806 807 shd->name = name; 808 809 GLSLF(0, #version %i ,460); 810 GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y)) ); 811 GLSLC(0, ); 812 813 return shd; 814} 815 816void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3]) 817{ 818 shd->local_size[0] = local_size[0]; 819 shd->local_size[1] = local_size[1]; 820 shd->local_size[2] = local_size[2]; 821 822 av_bprintf(&shd->src, "layout (local_size_x = %i, " 823 "local_size_y = %i, local_size_z = %i) in;\n\n", 824 shd->local_size[0], shd->local_size[1], shd->local_size[2]); 825} 826 827void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio) 828{ 829 int line = 0; 830 const char *p = shd->src.str; 831 const char *start = p; 832 const size_t len = strlen(p); 833 834 AVBPrint buf; 835 av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED); 836 837 for (int i = 0; i < len; i++) { 838 if (p[i] == '\n') { 839 av_bprintf(&buf, "%i\t", ++line); 840 av_bprint_append_data(&buf, start, &p[i] - start + 1); 841 start = &p[i + 1]; 842 } 843 } 844 845 av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str); 846 av_bprint_finalize(&buf, NULL); 847} 848 849int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd, 850 const char *entrypoint) 851{ 852 int err; 853 VkResult ret; 854 FFVulkanFunctions *vk = &s->vkfn; 855 VkShaderModuleCreateInfo shader_create; 856 uint8_t *spirv; 857 size_t spirv_size; 858 void *priv; 859 860 shd->shader.pName = entrypoint; 861 862 if (!s->spirv_compiler) { 863#if CONFIG_LIBGLSLANG 864 s->spirv_compiler = ff_vk_glslang_init(); 865#elif CONFIG_LIBSHADERC 866 s->spirv_compiler = ff_vk_shaderc_init(); 867#else 868 return AVERROR(ENOSYS); 869#endif 870 if (!s->spirv_compiler) 871 return AVERROR(ENOMEM); 872 } 873 874 err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv, 875 &spirv_size, entrypoint, &priv); 876 if (err < 0) 877 return err; 878 879 av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n", 880 shd->name, spirv_size); 881 882 shader_create.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; 883 shader_create.pNext = NULL; 884 shader_create.codeSize = spirv_size; 885 shader_create.flags = 0; 886 shader_create.pCode = (void *)spirv; 887 888 ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL, 889 &shd->shader.module); 890 891 s->spirv_compiler->free_shader(s->spirv_compiler, &priv); 892 893 if (ret != VK_SUCCESS) { 894 av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n", 895 ff_vk_ret2str(ret)); 896 return AVERROR_EXTERNAL; 897 } 898 899 return 0; 900} 901 902static const struct descriptor_props { 903 size_t struct_size; /* Size of the opaque which updates the descriptor */ 904 const char *type; 905 int is_uniform; 906 int mem_quali; /* Can use a memory qualifier */ 907 int dim_needed; /* Must indicate dimension */ 908 int buf_content; /* Must indicate buffer contents */ 909} descriptor_props[] = { 910 [VK_DESCRIPTOR_TYPE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 0, 0, }, 911 [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE] = { sizeof(VkDescriptorImageInfo), "texture", 1, 0, 1, 0, }, 912 [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE] = { sizeof(VkDescriptorImageInfo), "image", 1, 1, 1, 0, }, 913 [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT] = { sizeof(VkDescriptorImageInfo), "subpassInput", 1, 0, 0, 0, }, 914 [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo), "sampler", 1, 0, 1, 0, }, 915 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, 916 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, 917 [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), NULL, 1, 0, 0, 1, }, 918 [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer", 0, 1, 0, 1, }, 919 [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER] = { sizeof(VkBufferView), "samplerBuffer", 1, 0, 0, 0, }, 920 [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER] = { sizeof(VkBufferView), "imageBuffer", 1, 0, 0, 0, }, 921}; 922 923int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, 924 FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc, 925 int num, int only_print_to_shader) 926{ 927 VkResult ret; 928 VkDescriptorSetLayout *layout; 929 FFVulkanFunctions *vk = &s->vkfn; 930 931 if (only_print_to_shader) 932 goto print; 933 934 pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout), 935 pl->desc_layout_num + pl->qf->nb_queues); 936 if (!pl->desc_layout) 937 return AVERROR(ENOMEM); 938 939 pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized, 940 sizeof(*pl->desc_set_initialized), 941 pl->descriptor_sets_num + 1); 942 if (!pl->desc_set_initialized) 943 return AVERROR(ENOMEM); 944 945 pl->desc_set_initialized[pl->descriptor_sets_num] = 0; 946 layout = &pl->desc_layout[pl->desc_layout_num]; 947 948 { /* Create descriptor set layout descriptions */ 949 VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 }; 950 VkDescriptorSetLayoutBinding *desc_binding; 951 952 desc_binding = av_mallocz(sizeof(*desc_binding)*num); 953 if (!desc_binding) 954 return AVERROR(ENOMEM); 955 956 for (int i = 0; i < num; i++) { 957 desc_binding[i].binding = i; 958 desc_binding[i].descriptorType = desc[i].type; 959 desc_binding[i].descriptorCount = FFMAX(desc[i].elems, 1); 960 desc_binding[i].stageFlags = desc[i].stages; 961 desc_binding[i].pImmutableSamplers = desc[i].sampler ? 962 desc[i].sampler->sampler : 963 NULL; 964 } 965 966 desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; 967 desc_create_layout.pBindings = desc_binding; 968 desc_create_layout.bindingCount = num; 969 970 for (int i = 0; i < pl->qf->nb_queues; i++) { 971 ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout, 972 s->hwctx->alloc, &layout[i]); 973 if (ret != VK_SUCCESS) { 974 av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " 975 "layout: %s\n", ff_vk_ret2str(ret)); 976 av_free(desc_binding); 977 return AVERROR_EXTERNAL; 978 } 979 } 980 981 av_free(desc_binding); 982 } 983 984 { /* Pool each descriptor by type and update pool counts */ 985 for (int i = 0; i < num; i++) { 986 int j; 987 for (j = 0; j < pl->pool_size_desc_num; j++) 988 if (pl->pool_size_desc[j].type == desc[i].type) 989 break; 990 if (j >= pl->pool_size_desc_num) { 991 pl->pool_size_desc = av_realloc_array(pl->pool_size_desc, 992 sizeof(*pl->pool_size_desc), 993 ++pl->pool_size_desc_num); 994 if (!pl->pool_size_desc) 995 return AVERROR(ENOMEM); 996 memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize)); 997 } 998 pl->pool_size_desc[j].type = desc[i].type; 999 pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues; 1000 } 1001 } 1002 1003 { /* Create template creation struct */ 1004 VkDescriptorUpdateTemplateCreateInfo *dt; 1005 VkDescriptorUpdateTemplateEntry *des_entries; 1006 1007 /* Freed after descriptor set initialization */ 1008 des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry)); 1009 if (!des_entries) 1010 return AVERROR(ENOMEM); 1011 1012 for (int i = 0; i < num; i++) { 1013 des_entries[i].dstBinding = i; 1014 des_entries[i].descriptorType = desc[i].type; 1015 des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1); 1016 des_entries[i].dstArrayElement = 0; 1017 des_entries[i].offset = ((uint8_t *)desc[i].updater) - (uint8_t *)s; 1018 des_entries[i].stride = descriptor_props[desc[i].type].struct_size; 1019 } 1020 1021 pl->desc_template_info = av_realloc_array(pl->desc_template_info, 1022 sizeof(*pl->desc_template_info), 1023 pl->total_descriptor_sets + pl->qf->nb_queues); 1024 if (!pl->desc_template_info) 1025 return AVERROR(ENOMEM); 1026 1027 dt = &pl->desc_template_info[pl->total_descriptor_sets]; 1028 memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues); 1029 1030 for (int i = 0; i < pl->qf->nb_queues; i++) { 1031 dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO; 1032 dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET; 1033 dt[i].descriptorSetLayout = layout[i]; 1034 dt[i].pDescriptorUpdateEntries = des_entries; 1035 dt[i].descriptorUpdateEntryCount = num; 1036 } 1037 } 1038 1039 pl->descriptor_sets_num++; 1040 1041 pl->desc_layout_num += pl->qf->nb_queues; 1042 pl->total_descriptor_sets += pl->qf->nb_queues; 1043 1044print: 1045 /* Write shader info */ 1046 for (int i = 0; i < num; i++) { 1047 const struct descriptor_props *prop = &descriptor_props[desc[i].type]; 1048 GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i); 1049 1050 if (desc[i].mem_layout) 1051 GLSLA(", %s", desc[i].mem_layout); 1052 GLSLA(")"); 1053 1054 if (prop->is_uniform) 1055 GLSLA(" uniform"); 1056 1057 if (prop->mem_quali && desc[i].mem_quali) 1058 GLSLA(" %s", desc[i].mem_quali); 1059 1060 if (prop->type) 1061 GLSLA(" %s", prop->type); 1062 1063 if (prop->dim_needed) 1064 GLSLA("%iD", desc[i].dimensions); 1065 1066 GLSLA(" %s", desc[i].name); 1067 1068 if (prop->buf_content) 1069 GLSLA(" {\n %s\n}", desc[i].buf_content); 1070 else if (desc[i].elems > 0) 1071 GLSLA("[%i]", desc[i].elems); 1072 1073 GLSLA(";\n"); 1074 } 1075 GLSLA("\n"); 1076 1077 return 0; 1078} 1079 1080void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl, 1081 int set_id) 1082{ 1083 FFVulkanFunctions *vk = &s->vkfn; 1084 1085 /* If a set has never been updated, update all queues' sets. */ 1086 if (!pl->desc_set_initialized[set_id]) { 1087 for (int i = 0; i < pl->qf->nb_queues; i++) { 1088 int idx = set_id*pl->qf->nb_queues + i; 1089 vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, 1090 pl->desc_set[idx], 1091 pl->desc_template[idx], 1092 s); 1093 } 1094 pl->desc_set_initialized[set_id] = 1; 1095 return; 1096 } 1097 1098 set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue; 1099 1100 vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev, 1101 pl->desc_set[set_id], 1102 pl->desc_template[set_id], 1103 s); 1104} 1105 1106void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e, 1107 VkShaderStageFlagBits stage, int offset, 1108 size_t size, void *src) 1109{ 1110 FFVulkanFunctions *vk = &s->vkfn; 1111 1112 vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout, 1113 stage, offset, size, src); 1114} 1115 1116int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl) 1117{ 1118 VkResult ret; 1119 FFVulkanFunctions *vk = &s->vkfn; 1120 1121 pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging)); 1122 if (!pl->desc_staging) 1123 return AVERROR(ENOMEM); 1124 1125 { /* Init descriptor set pool */ 1126 VkDescriptorPoolCreateInfo pool_create_info = { 1127 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO, 1128 .poolSizeCount = pl->pool_size_desc_num, 1129 .pPoolSizes = pl->pool_size_desc, 1130 .maxSets = pl->total_descriptor_sets, 1131 }; 1132 1133 ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info, 1134 s->hwctx->alloc, &pl->desc_pool); 1135 av_freep(&pl->pool_size_desc); 1136 if (ret != VK_SUCCESS) { 1137 av_log(s, AV_LOG_ERROR, "Unable to init descriptor set " 1138 "pool: %s\n", ff_vk_ret2str(ret)); 1139 return AVERROR_EXTERNAL; 1140 } 1141 } 1142 1143 { /* Allocate descriptor sets */ 1144 VkDescriptorSetAllocateInfo alloc_info = { 1145 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, 1146 .descriptorPool = pl->desc_pool, 1147 .descriptorSetCount = pl->total_descriptor_sets, 1148 .pSetLayouts = pl->desc_layout, 1149 }; 1150 1151 pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set)); 1152 if (!pl->desc_set) 1153 return AVERROR(ENOMEM); 1154 1155 ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info, 1156 pl->desc_set); 1157 if (ret != VK_SUCCESS) { 1158 av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n", 1159 ff_vk_ret2str(ret)); 1160 return AVERROR_EXTERNAL; 1161 } 1162 } 1163 1164 { /* Finally create the pipeline layout */ 1165 VkPipelineLayoutCreateInfo spawn_pipeline_layout = { 1166 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, 1167 .pSetLayouts = (VkDescriptorSetLayout *)pl->desc_staging, 1168 .pushConstantRangeCount = pl->push_consts_num, 1169 .pPushConstantRanges = pl->push_consts, 1170 }; 1171 1172 for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) 1173 pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i]; 1174 1175 ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout, 1176 s->hwctx->alloc, &pl->pipeline_layout); 1177 av_freep(&pl->push_consts); 1178 pl->push_consts_num = 0; 1179 if (ret != VK_SUCCESS) { 1180 av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n", 1181 ff_vk_ret2str(ret)); 1182 return AVERROR_EXTERNAL; 1183 } 1184 } 1185 1186 { /* Descriptor template (for tightly packed descriptors) */ 1187 VkDescriptorUpdateTemplateCreateInfo *dt; 1188 1189 pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template)); 1190 if (!pl->desc_template) 1191 return AVERROR(ENOMEM); 1192 1193 /* Create update templates for the descriptor sets */ 1194 for (int i = 0; i < pl->total_descriptor_sets; i++) { 1195 dt = &pl->desc_template_info[i]; 1196 dt->pipelineLayout = pl->pipeline_layout; 1197 ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev, 1198 dt, s->hwctx->alloc, 1199 &pl->desc_template[i]); 1200 if (ret != VK_SUCCESS) { 1201 av_log(s, AV_LOG_ERROR, "Unable to init descriptor " 1202 "template: %s\n", ff_vk_ret2str(ret)); 1203 return AVERROR_EXTERNAL; 1204 } 1205 } 1206 1207 /* Free the duplicated memory used for the template entries */ 1208 for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { 1209 dt = &pl->desc_template_info[i]; 1210 av_free((void *)dt->pDescriptorUpdateEntries); 1211 } 1212 1213 av_freep(&pl->desc_template_info); 1214 } 1215 1216 return 0; 1217} 1218 1219FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num) 1220FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf) 1221{ 1222 FFVulkanPipeline *pl = create_pipeline(s); 1223 if (pl) 1224 pl->qf = qf; 1225 1226 return pl; 1227} 1228 1229int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) 1230{ 1231 int i; 1232 VkResult ret; 1233 FFVulkanFunctions *vk = &s->vkfn; 1234 1235 VkComputePipelineCreateInfo pipe = { 1236 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, 1237 .layout = pl->pipeline_layout, 1238 }; 1239 1240 for (i = 0; i < pl->shaders_num; i++) { 1241 if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) { 1242 pipe.stage = pl->shaders[i]->shader; 1243 break; 1244 } 1245 } 1246 if (i == pl->shaders_num) { 1247 av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n"); 1248 return AVERROR(EINVAL); 1249 } 1250 1251 ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe, 1252 s->hwctx->alloc, &pl->pipeline); 1253 if (ret != VK_SUCCESS) { 1254 av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n", 1255 ff_vk_ret2str(ret)); 1256 return AVERROR_EXTERNAL; 1257 } 1258 1259 pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE; 1260 1261 return 0; 1262} 1263 1264void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e, 1265 FFVulkanPipeline *pl) 1266{ 1267 FFVulkanFunctions *vk = &s->vkfn; 1268 1269 vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline); 1270 1271 for (int i = 0; i < pl->descriptor_sets_num; i++) 1272 pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue]; 1273 1274 vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point, 1275 pl->pipeline_layout, 0, 1276 pl->descriptor_sets_num, 1277 (VkDescriptorSet *)pl->desc_staging, 1278 0, NULL); 1279 1280 e->bound_pl = pl; 1281} 1282 1283static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e) 1284{ 1285 FFVulkanFunctions *vk = &s->vkfn; 1286 1287 /* Make sure all queues have finished executing */ 1288 for (int i = 0; i < e->qf->nb_queues; i++) { 1289 FFVkQueueCtx *q = &e->queues[i]; 1290 1291 if (q->fence) { 1292 vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX); 1293 vk->ResetFences(s->hwctx->act_dev, 1, &q->fence); 1294 } 1295 1296 /* Free the fence */ 1297 if (q->fence) 1298 vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc); 1299 1300 /* Free buffer dependencies */ 1301 for (int j = 0; j < q->nb_buf_deps; j++) 1302 av_buffer_unref(&q->buf_deps[j]); 1303 av_free(q->buf_deps); 1304 1305 /* Free frame dependencies */ 1306 for (int j = 0; j < q->nb_frame_deps; j++) 1307 av_frame_free(&q->frame_deps[j]); 1308 av_free(q->frame_deps); 1309 } 1310 1311 if (e->bufs) 1312 vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs); 1313 if (e->pool) 1314 vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc); 1315 1316 av_freep(&e->bufs); 1317 av_freep(&e->queues); 1318 av_freep(&e->sem_sig); 1319 av_freep(&e->sem_sig_val); 1320 av_freep(&e->sem_sig_val_dst); 1321 av_freep(&e->sem_wait); 1322 av_freep(&e->sem_wait_dst); 1323 av_freep(&e->sem_wait_val); 1324 av_free(e); 1325} 1326 1327static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl) 1328{ 1329 FFVulkanFunctions *vk = &s->vkfn; 1330 1331 for (int i = 0; i < pl->shaders_num; i++) { 1332 FFVkSPIRVShader *shd = pl->shaders[i]; 1333 av_bprint_finalize(&shd->src, NULL); 1334 vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module, 1335 s->hwctx->alloc); 1336 av_free(shd); 1337 } 1338 1339 vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc); 1340 vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout, 1341 s->hwctx->alloc); 1342 1343 for (int i = 0; i < pl->desc_layout_num; i++) { 1344 if (pl->desc_template && pl->desc_template[i]) 1345 vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i], 1346 s->hwctx->alloc); 1347 if (pl->desc_layout && pl->desc_layout[i]) 1348 vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i], 1349 s->hwctx->alloc); 1350 } 1351 1352 /* Also frees the descriptor sets */ 1353 if (pl->desc_pool) 1354 vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool, 1355 s->hwctx->alloc); 1356 1357 av_freep(&pl->desc_staging); 1358 av_freep(&pl->desc_set); 1359 av_freep(&pl->shaders); 1360 av_freep(&pl->desc_layout); 1361 av_freep(&pl->desc_template); 1362 av_freep(&pl->desc_set_initialized); 1363 av_freep(&pl->push_consts); 1364 pl->push_consts_num = 0; 1365 1366 /* Only freed in case of failure */ 1367 av_freep(&pl->pool_size_desc); 1368 if (pl->desc_template_info) { 1369 for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) { 1370 VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i]; 1371 av_free((void *)dt->pDescriptorUpdateEntries); 1372 } 1373 av_freep(&pl->desc_template_info); 1374 } 1375 1376 av_free(pl); 1377} 1378 1379void ff_vk_uninit(FFVulkanContext *s) 1380{ 1381 FFVulkanFunctions *vk = &s->vkfn; 1382 1383 if (s->spirv_compiler) 1384 s->spirv_compiler->uninit(&s->spirv_compiler); 1385 1386 for (int i = 0; i < s->exec_ctx_num; i++) 1387 free_exec_ctx(s, s->exec_ctx[i]); 1388 av_freep(&s->exec_ctx); 1389 1390 for (int i = 0; i < s->samplers_num; i++) { 1391 vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0], 1392 s->hwctx->alloc); 1393 av_free(s->samplers[i]); 1394 } 1395 av_freep(&s->samplers); 1396 1397 for (int i = 0; i < s->pipelines_num; i++) 1398 free_pipeline(s, s->pipelines[i]); 1399 av_freep(&s->pipelines); 1400 1401 av_freep(&s->scratch); 1402 s->scratch_size = 0; 1403 1404 av_buffer_unref(&s->device_ref); 1405 av_buffer_unref(&s->frames_ref); 1406} 1407