xref: /third_party/ffmpeg/libavutil/vulkan.c (revision cabdff1a)
1/*
2 * This file is part of FFmpeg.
3 *
4 * FFmpeg is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * FFmpeg is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with FFmpeg; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 */
18
19#include "avassert.h"
20
21#include "vulkan.h"
22#include "vulkan_loader.h"
23
24#if CONFIG_LIBGLSLANG
25#include "vulkan_glslang.c"
26#elif CONFIG_LIBSHADERC
27#include "vulkan_shaderc.c"
28#endif
29
30/* Generic macro for creating contexts which need to keep their addresses
31 * if another context is created. */
32#define FN_CREATING(ctx, type, shortname, array, num)                          \
33static av_always_inline type *create_ ##shortname(ctx *dctx)                   \
34{                                                                              \
35    type **array, *sctx = av_mallocz(sizeof(*sctx));                           \
36    if (!sctx)                                                                 \
37        return NULL;                                                           \
38                                                                               \
39    array = av_realloc_array(dctx->array, sizeof(*dctx->array), dctx->num + 1);\
40    if (!array) {                                                              \
41        av_free(sctx);                                                         \
42        return NULL;                                                           \
43    }                                                                          \
44                                                                               \
45    dctx->array = array;                                                       \
46    dctx->array[dctx->num++] = sctx;                                           \
47                                                                               \
48    return sctx;                                                               \
49}
50
51const VkComponentMapping ff_comp_identity_map = {
52    .r = VK_COMPONENT_SWIZZLE_IDENTITY,
53    .g = VK_COMPONENT_SWIZZLE_IDENTITY,
54    .b = VK_COMPONENT_SWIZZLE_IDENTITY,
55    .a = VK_COMPONENT_SWIZZLE_IDENTITY,
56};
57
58/* Converts return values to strings */
59const char *ff_vk_ret2str(VkResult res)
60{
61#define CASE(VAL) case VAL: return #VAL
62    switch (res) {
63    CASE(VK_SUCCESS);
64    CASE(VK_NOT_READY);
65    CASE(VK_TIMEOUT);
66    CASE(VK_EVENT_SET);
67    CASE(VK_EVENT_RESET);
68    CASE(VK_INCOMPLETE);
69    CASE(VK_ERROR_OUT_OF_HOST_MEMORY);
70    CASE(VK_ERROR_OUT_OF_DEVICE_MEMORY);
71    CASE(VK_ERROR_INITIALIZATION_FAILED);
72    CASE(VK_ERROR_DEVICE_LOST);
73    CASE(VK_ERROR_MEMORY_MAP_FAILED);
74    CASE(VK_ERROR_LAYER_NOT_PRESENT);
75    CASE(VK_ERROR_EXTENSION_NOT_PRESENT);
76    CASE(VK_ERROR_FEATURE_NOT_PRESENT);
77    CASE(VK_ERROR_INCOMPATIBLE_DRIVER);
78    CASE(VK_ERROR_TOO_MANY_OBJECTS);
79    CASE(VK_ERROR_FORMAT_NOT_SUPPORTED);
80    CASE(VK_ERROR_FRAGMENTED_POOL);
81    CASE(VK_ERROR_SURFACE_LOST_KHR);
82    CASE(VK_ERROR_NATIVE_WINDOW_IN_USE_KHR);
83    CASE(VK_SUBOPTIMAL_KHR);
84    CASE(VK_ERROR_OUT_OF_DATE_KHR);
85    CASE(VK_ERROR_INCOMPATIBLE_DISPLAY_KHR);
86    CASE(VK_ERROR_VALIDATION_FAILED_EXT);
87    CASE(VK_ERROR_INVALID_SHADER_NV);
88    CASE(VK_ERROR_OUT_OF_POOL_MEMORY);
89    CASE(VK_ERROR_INVALID_EXTERNAL_HANDLE);
90    CASE(VK_ERROR_NOT_PERMITTED_EXT);
91    default: return "Unknown error";
92    }
93#undef CASE
94}
95
96void ff_vk_qf_init(FFVulkanContext *s, FFVkQueueFamilyCtx *qf,
97                   VkQueueFlagBits dev_family, int nb_queues)
98{
99    switch (dev_family) {
100    case VK_QUEUE_GRAPHICS_BIT:
101        qf->queue_family = s->hwctx->queue_family_index;
102        qf->actual_queues = s->hwctx->nb_graphics_queues;
103        break;
104    case VK_QUEUE_COMPUTE_BIT:
105        qf->queue_family = s->hwctx->queue_family_comp_index;
106        qf->actual_queues = s->hwctx->nb_comp_queues;
107        break;
108    case VK_QUEUE_TRANSFER_BIT:
109        qf->queue_family = s->hwctx->queue_family_tx_index;
110        qf->actual_queues = s->hwctx->nb_tx_queues;
111        break;
112    case VK_QUEUE_VIDEO_ENCODE_BIT_KHR:
113        qf->queue_family = s->hwctx->queue_family_encode_index;
114        qf->actual_queues = s->hwctx->nb_encode_queues;
115        break;
116    case VK_QUEUE_VIDEO_DECODE_BIT_KHR:
117        qf->queue_family = s->hwctx->queue_family_decode_index;
118        qf->actual_queues = s->hwctx->nb_decode_queues;
119        break;
120    default:
121        av_assert0(0); /* Should never happen */
122    }
123
124    if (!nb_queues)
125        qf->nb_queues = qf->actual_queues;
126    else
127        qf->nb_queues = nb_queues;
128
129    return;
130}
131
132void ff_vk_qf_rotate(FFVkQueueFamilyCtx *qf)
133{
134    qf->cur_queue = (qf->cur_queue + 1) % qf->nb_queues;
135}
136
137static int vk_alloc_mem(FFVulkanContext *s, VkMemoryRequirements *req,
138                        VkMemoryPropertyFlagBits req_flags, void *alloc_extension,
139                        VkMemoryPropertyFlagBits *mem_flags, VkDeviceMemory *mem)
140{
141    VkResult ret;
142    int index = -1;
143    FFVulkanFunctions *vk = &s->vkfn;
144
145    VkMemoryAllocateInfo alloc_info = {
146        .sType           = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
147        .pNext           = alloc_extension,
148    };
149
150    /* Align if we need to */
151    if (req_flags & VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)
152        req->size = FFALIGN(req->size, s->props.limits.minMemoryMapAlignment);
153
154    alloc_info.allocationSize = req->size;
155
156    /* The vulkan spec requires memory types to be sorted in the "optimal"
157     * order, so the first matching type we find will be the best/fastest one */
158    for (int i = 0; i < s->mprops.memoryTypeCount; i++) {
159        /* The memory type must be supported by the requirements (bitfield) */
160        if (!(req->memoryTypeBits & (1 << i)))
161            continue;
162
163        /* The memory type flags must include our properties */
164        if ((s->mprops.memoryTypes[i].propertyFlags & req_flags) != req_flags)
165            continue;
166
167        /* Found a suitable memory type */
168        index = i;
169        break;
170    }
171
172    if (index < 0) {
173        av_log(s, AV_LOG_ERROR, "No memory type found for flags 0x%x\n",
174               req_flags);
175        return AVERROR(EINVAL);
176    }
177
178    alloc_info.memoryTypeIndex = index;
179
180    ret = vk->AllocateMemory(s->hwctx->act_dev, &alloc_info,
181                             s->hwctx->alloc, mem);
182    if (ret != VK_SUCCESS) {
183        av_log(s, AV_LOG_ERROR, "Failed to allocate memory: %s\n",
184               ff_vk_ret2str(ret));
185        return AVERROR(ENOMEM);
186    }
187
188    *mem_flags |= s->mprops.memoryTypes[index].propertyFlags;
189
190    return 0;
191}
192
193int ff_vk_create_buf(FFVulkanContext *s, FFVkBuffer *buf, size_t size,
194                     VkBufferUsageFlags usage, VkMemoryPropertyFlagBits flags)
195{
196    int err;
197    VkResult ret;
198    int use_ded_mem;
199    FFVulkanFunctions *vk = &s->vkfn;
200
201    VkBufferCreateInfo buf_spawn = {
202        .sType       = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
203        .pNext       = NULL,
204        .usage       = usage,
205        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
206        .size        = size, /* Gets FFALIGNED during alloc if host visible
207                                but should be ok */
208    };
209
210    VkBufferMemoryRequirementsInfo2 req_desc = {
211        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
212    };
213    VkMemoryDedicatedAllocateInfo ded_alloc = {
214        .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO,
215        .pNext = NULL,
216    };
217    VkMemoryDedicatedRequirements ded_req = {
218        .sType = VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS,
219    };
220    VkMemoryRequirements2 req = {
221        .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
222        .pNext = &ded_req,
223    };
224
225    ret = vk->CreateBuffer(s->hwctx->act_dev, &buf_spawn, NULL, &buf->buf);
226    if (ret != VK_SUCCESS) {
227        av_log(s, AV_LOG_ERROR, "Failed to create buffer: %s\n",
228               ff_vk_ret2str(ret));
229        return AVERROR_EXTERNAL;
230    }
231
232    req_desc.buffer = buf->buf;
233
234    vk->GetBufferMemoryRequirements2(s->hwctx->act_dev, &req_desc, &req);
235
236    /* In case the implementation prefers/requires dedicated allocation */
237    use_ded_mem = ded_req.prefersDedicatedAllocation |
238                  ded_req.requiresDedicatedAllocation;
239    if (use_ded_mem)
240        ded_alloc.buffer = buf->buf;
241
242    err = vk_alloc_mem(s, &req.memoryRequirements, flags,
243                       use_ded_mem ? &ded_alloc : (void *)ded_alloc.pNext,
244                       &buf->flags, &buf->mem);
245    if (err)
246        return err;
247
248    ret = vk->BindBufferMemory(s->hwctx->act_dev, buf->buf, buf->mem, 0);
249    if (ret != VK_SUCCESS) {
250        av_log(s, AV_LOG_ERROR, "Failed to bind memory to buffer: %s\n",
251               ff_vk_ret2str(ret));
252        return AVERROR_EXTERNAL;
253    }
254
255    return 0;
256}
257
258int ff_vk_map_buffers(FFVulkanContext *s, FFVkBuffer *buf, uint8_t *mem[],
259                      int nb_buffers, int invalidate)
260{
261    VkResult ret;
262    FFVulkanFunctions *vk = &s->vkfn;
263    VkMappedMemoryRange *inval_list = NULL;
264    int inval_count = 0;
265
266    for (int i = 0; i < nb_buffers; i++) {
267        ret = vk->MapMemory(s->hwctx->act_dev, buf[i].mem, 0,
268                            VK_WHOLE_SIZE, 0, (void **)&mem[i]);
269        if (ret != VK_SUCCESS) {
270            av_log(s, AV_LOG_ERROR, "Failed to map buffer memory: %s\n",
271                   ff_vk_ret2str(ret));
272            return AVERROR_EXTERNAL;
273        }
274    }
275
276    if (!invalidate)
277        return 0;
278
279    for (int i = 0; i < nb_buffers; i++) {
280        const VkMappedMemoryRange ival_buf = {
281            .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
282            .memory = buf[i].mem,
283            .size   = VK_WHOLE_SIZE,
284        };
285        if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
286            continue;
287        inval_list = av_fast_realloc(s->scratch, &s->scratch_size,
288                                     (++inval_count)*sizeof(*inval_list));
289        if (!inval_list)
290            return AVERROR(ENOMEM);
291        inval_list[inval_count - 1] = ival_buf;
292    }
293
294    if (inval_count) {
295        ret = vk->InvalidateMappedMemoryRanges(s->hwctx->act_dev, inval_count,
296                                               inval_list);
297        if (ret != VK_SUCCESS) {
298            av_log(s, AV_LOG_ERROR, "Failed to invalidate memory: %s\n",
299                   ff_vk_ret2str(ret));
300            return AVERROR_EXTERNAL;
301        }
302    }
303
304    return 0;
305}
306
307int ff_vk_unmap_buffers(FFVulkanContext *s, FFVkBuffer *buf, int nb_buffers,
308                        int flush)
309{
310    int err = 0;
311    VkResult ret;
312    FFVulkanFunctions *vk = &s->vkfn;
313    VkMappedMemoryRange *flush_list = NULL;
314    int flush_count = 0;
315
316    if (flush) {
317        for (int i = 0; i < nb_buffers; i++) {
318            const VkMappedMemoryRange flush_buf = {
319                .sType  = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
320                .memory = buf[i].mem,
321                .size   = VK_WHOLE_SIZE,
322            };
323            if (buf[i].flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT)
324                continue;
325            flush_list = av_fast_realloc(s->scratch, &s->scratch_size,
326                                         (++flush_count)*sizeof(*flush_list));
327            if (!flush_list)
328                return AVERROR(ENOMEM);
329            flush_list[flush_count - 1] = flush_buf;
330        }
331    }
332
333    if (flush_count) {
334        ret = vk->FlushMappedMemoryRanges(s->hwctx->act_dev, flush_count,
335                                          flush_list);
336        if (ret != VK_SUCCESS) {
337            av_log(s, AV_LOG_ERROR, "Failed to flush memory: %s\n",
338                   ff_vk_ret2str(ret));
339            err = AVERROR_EXTERNAL; /* We still want to try to unmap them */
340        }
341    }
342
343    for (int i = 0; i < nb_buffers; i++)
344        vk->UnmapMemory(s->hwctx->act_dev, buf[i].mem);
345
346    return err;
347}
348
349void ff_vk_free_buf(FFVulkanContext *s, FFVkBuffer *buf)
350{
351    FFVulkanFunctions *vk = &s->vkfn;
352
353    if (!buf || !s->hwctx)
354        return;
355
356    vk->DeviceWaitIdle(s->hwctx->act_dev);
357
358    if (buf->buf != VK_NULL_HANDLE)
359        vk->DestroyBuffer(s->hwctx->act_dev, buf->buf, s->hwctx->alloc);
360    if (buf->mem != VK_NULL_HANDLE)
361        vk->FreeMemory(s->hwctx->act_dev, buf->mem, s->hwctx->alloc);
362}
363
364int ff_vk_add_push_constant(FFVulkanPipeline *pl, int offset, int size,
365                            VkShaderStageFlagBits stage)
366{
367    VkPushConstantRange *pc;
368
369    pl->push_consts = av_realloc_array(pl->push_consts, sizeof(*pl->push_consts),
370                                       pl->push_consts_num + 1);
371    if (!pl->push_consts)
372        return AVERROR(ENOMEM);
373
374    pc = &pl->push_consts[pl->push_consts_num++];
375    memset(pc, 0, sizeof(*pc));
376
377    pc->stageFlags = stage;
378    pc->offset = offset;
379    pc->size = size;
380
381    return 0;
382}
383
384FN_CREATING(FFVulkanContext, FFVkExecContext, exec_ctx, exec_ctx, exec_ctx_num)
385int ff_vk_create_exec_ctx(FFVulkanContext *s, FFVkExecContext **ctx,
386                          FFVkQueueFamilyCtx *qf)
387{
388    VkResult ret;
389    FFVkExecContext *e;
390    FFVulkanFunctions *vk = &s->vkfn;
391
392    VkCommandPoolCreateInfo cqueue_create = {
393        .sType              = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
394        .flags              = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
395        .queueFamilyIndex   = qf->queue_family,
396    };
397    VkCommandBufferAllocateInfo cbuf_create = {
398        .sType              = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
399        .level              = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
400        .commandBufferCount = qf->nb_queues,
401    };
402
403    e = create_exec_ctx(s);
404    if (!e)
405        return AVERROR(ENOMEM);
406
407    e->qf = qf;
408
409    e->queues = av_mallocz(qf->nb_queues * sizeof(*e->queues));
410    if (!e->queues)
411        return AVERROR(ENOMEM);
412
413    e->bufs = av_mallocz(qf->nb_queues * sizeof(*e->bufs));
414    if (!e->bufs)
415        return AVERROR(ENOMEM);
416
417    /* Create command pool */
418    ret = vk->CreateCommandPool(s->hwctx->act_dev, &cqueue_create,
419                              s->hwctx->alloc, &e->pool);
420    if (ret != VK_SUCCESS) {
421        av_log(s, AV_LOG_ERROR, "Command pool creation failure: %s\n",
422               ff_vk_ret2str(ret));
423        return AVERROR_EXTERNAL;
424    }
425
426    cbuf_create.commandPool = e->pool;
427
428    /* Allocate command buffer */
429    ret = vk->AllocateCommandBuffers(s->hwctx->act_dev, &cbuf_create, e->bufs);
430    if (ret != VK_SUCCESS) {
431        av_log(s, AV_LOG_ERROR, "Command buffer alloc failure: %s\n",
432               ff_vk_ret2str(ret));
433        return AVERROR_EXTERNAL;
434    }
435
436    for (int i = 0; i < qf->nb_queues; i++) {
437        FFVkQueueCtx *q = &e->queues[i];
438        vk->GetDeviceQueue(s->hwctx->act_dev, qf->queue_family,
439                           i % qf->actual_queues, &q->queue);
440    }
441
442    *ctx = e;
443
444    return 0;
445}
446
447void ff_vk_discard_exec_deps(FFVkExecContext *e)
448{
449    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
450
451    for (int j = 0; j < q->nb_buf_deps; j++)
452        av_buffer_unref(&q->buf_deps[j]);
453    q->nb_buf_deps = 0;
454
455    for (int j = 0; j < q->nb_frame_deps; j++)
456        av_frame_free(&q->frame_deps[j]);
457    q->nb_frame_deps = 0;
458
459    e->sem_wait_cnt = 0;
460    e->sem_sig_cnt = 0;
461}
462
463int ff_vk_start_exec_recording(FFVulkanContext *s, FFVkExecContext *e)
464{
465    VkResult ret;
466    FFVulkanFunctions *vk = &s->vkfn;
467    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
468
469    VkCommandBufferBeginInfo cmd_start = {
470        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
471        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
472    };
473
474    /* Create the fence and don't wait for it initially */
475    if (!q->fence) {
476        VkFenceCreateInfo fence_spawn = {
477            .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
478        };
479        ret = vk->CreateFence(s->hwctx->act_dev, &fence_spawn, s->hwctx->alloc,
480                              &q->fence);
481        if (ret != VK_SUCCESS) {
482            av_log(s, AV_LOG_ERROR, "Failed to queue frame fence: %s\n",
483                   ff_vk_ret2str(ret));
484            return AVERROR_EXTERNAL;
485        }
486    } else {
487        vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
488        vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
489    }
490
491    /* Discard queue dependencies */
492    ff_vk_discard_exec_deps(e);
493
494    ret = vk->BeginCommandBuffer(e->bufs[e->qf->cur_queue], &cmd_start);
495    if (ret != VK_SUCCESS) {
496        av_log(s, AV_LOG_ERROR, "Failed to start command recoding: %s\n",
497               ff_vk_ret2str(ret));
498        return AVERROR_EXTERNAL;
499    }
500
501    return 0;
502}
503
504VkCommandBuffer ff_vk_get_exec_buf(FFVkExecContext *e)
505{
506    return e->bufs[e->qf->cur_queue];
507}
508
509int ff_vk_add_exec_dep(FFVulkanContext *s, FFVkExecContext *e, AVFrame *frame,
510                       VkPipelineStageFlagBits in_wait_dst_flag)
511{
512    AVFrame **dst;
513    AVVkFrame *f = (AVVkFrame *)frame->data[0];
514    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
515    AVHWFramesContext *fc = (AVHWFramesContext *)frame->hw_frames_ctx->data;
516    int planes = av_pix_fmt_count_planes(fc->sw_format);
517
518    for (int i = 0; i < planes; i++) {
519        e->sem_wait = av_fast_realloc(e->sem_wait, &e->sem_wait_alloc,
520                                      (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait));
521        if (!e->sem_wait) {
522            ff_vk_discard_exec_deps(e);
523            return AVERROR(ENOMEM);
524        }
525
526        e->sem_wait_dst = av_fast_realloc(e->sem_wait_dst, &e->sem_wait_dst_alloc,
527                                          (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_dst));
528        if (!e->sem_wait_dst) {
529            ff_vk_discard_exec_deps(e);
530            return AVERROR(ENOMEM);
531        }
532
533        e->sem_wait_val = av_fast_realloc(e->sem_wait_val, &e->sem_wait_val_alloc,
534                                          (e->sem_wait_cnt + 1)*sizeof(*e->sem_wait_val));
535        if (!e->sem_wait_val) {
536            ff_vk_discard_exec_deps(e);
537            return AVERROR(ENOMEM);
538        }
539
540        e->sem_sig = av_fast_realloc(e->sem_sig, &e->sem_sig_alloc,
541                                     (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig));
542        if (!e->sem_sig) {
543            ff_vk_discard_exec_deps(e);
544            return AVERROR(ENOMEM);
545        }
546
547        e->sem_sig_val = av_fast_realloc(e->sem_sig_val, &e->sem_sig_val_alloc,
548                                         (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val));
549        if (!e->sem_sig_val) {
550            ff_vk_discard_exec_deps(e);
551            return AVERROR(ENOMEM);
552        }
553
554        e->sem_sig_val_dst = av_fast_realloc(e->sem_sig_val_dst, &e->sem_sig_val_dst_alloc,
555                                             (e->sem_sig_cnt + 1)*sizeof(*e->sem_sig_val_dst));
556        if (!e->sem_sig_val_dst) {
557            ff_vk_discard_exec_deps(e);
558            return AVERROR(ENOMEM);
559        }
560
561        e->sem_wait[e->sem_wait_cnt] = f->sem[i];
562        e->sem_wait_dst[e->sem_wait_cnt] = in_wait_dst_flag;
563        e->sem_wait_val[e->sem_wait_cnt] = f->sem_value[i];
564        e->sem_wait_cnt++;
565
566        e->sem_sig[e->sem_sig_cnt] = f->sem[i];
567        e->sem_sig_val[e->sem_sig_cnt] = f->sem_value[i] + 1;
568        e->sem_sig_val_dst[e->sem_sig_cnt] = &f->sem_value[i];
569        e->sem_sig_cnt++;
570    }
571
572    dst = av_fast_realloc(q->frame_deps, &q->frame_deps_alloc_size,
573                          (q->nb_frame_deps + 1) * sizeof(*dst));
574    if (!dst) {
575        ff_vk_discard_exec_deps(e);
576        return AVERROR(ENOMEM);
577    }
578
579    q->frame_deps = dst;
580    q->frame_deps[q->nb_frame_deps] = av_frame_clone(frame);
581    if (!q->frame_deps[q->nb_frame_deps]) {
582        ff_vk_discard_exec_deps(e);
583        return AVERROR(ENOMEM);
584    }
585    q->nb_frame_deps++;
586
587    return 0;
588}
589
590int ff_vk_submit_exec_queue(FFVulkanContext *s, FFVkExecContext *e)
591{
592    VkResult ret;
593    FFVulkanFunctions *vk = &s->vkfn;
594    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
595
596    VkTimelineSemaphoreSubmitInfo s_timeline_sem_info = {
597        .sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO,
598        .pWaitSemaphoreValues = e->sem_wait_val,
599        .pSignalSemaphoreValues = e->sem_sig_val,
600        .waitSemaphoreValueCount = e->sem_wait_cnt,
601        .signalSemaphoreValueCount = e->sem_sig_cnt,
602    };
603
604    VkSubmitInfo s_info = {
605        .sType                = VK_STRUCTURE_TYPE_SUBMIT_INFO,
606        .pNext                = &s_timeline_sem_info,
607
608        .commandBufferCount   = 1,
609        .pCommandBuffers      = &e->bufs[e->qf->cur_queue],
610
611        .pWaitSemaphores      = e->sem_wait,
612        .pWaitDstStageMask    = e->sem_wait_dst,
613        .waitSemaphoreCount   = e->sem_wait_cnt,
614
615        .pSignalSemaphores    = e->sem_sig,
616        .signalSemaphoreCount = e->sem_sig_cnt,
617    };
618
619    ret = vk->EndCommandBuffer(e->bufs[e->qf->cur_queue]);
620    if (ret != VK_SUCCESS) {
621        av_log(s, AV_LOG_ERROR, "Unable to finish command buffer: %s\n",
622               ff_vk_ret2str(ret));
623        return AVERROR_EXTERNAL;
624    }
625
626    ret = vk->QueueSubmit(q->queue, 1, &s_info, q->fence);
627    if (ret != VK_SUCCESS) {
628        av_log(s, AV_LOG_ERROR, "Unable to submit command buffer: %s\n",
629               ff_vk_ret2str(ret));
630        return AVERROR_EXTERNAL;
631    }
632
633    for (int i = 0; i < e->sem_sig_cnt; i++)
634        *e->sem_sig_val_dst[i] += 1;
635
636    return 0;
637}
638
639int ff_vk_add_dep_exec_ctx(FFVulkanContext *s, FFVkExecContext *e,
640                           AVBufferRef **deps, int nb_deps)
641{
642    AVBufferRef **dst;
643    FFVkQueueCtx *q = &e->queues[e->qf->cur_queue];
644
645    if (!deps || !nb_deps)
646        return 0;
647
648    dst = av_fast_realloc(q->buf_deps, &q->buf_deps_alloc_size,
649                          (q->nb_buf_deps + nb_deps) * sizeof(*dst));
650    if (!dst)
651        goto err;
652
653    q->buf_deps = dst;
654
655    for (int i = 0; i < nb_deps; i++) {
656        q->buf_deps[q->nb_buf_deps] = deps[i];
657        if (!q->buf_deps[q->nb_buf_deps])
658            goto err;
659        q->nb_buf_deps++;
660    }
661
662    return 0;
663
664err:
665    ff_vk_discard_exec_deps(e);
666    return AVERROR(ENOMEM);
667}
668
669FN_CREATING(FFVulkanContext, FFVkSampler, sampler, samplers, samplers_num)
670FFVkSampler *ff_vk_init_sampler(FFVulkanContext *s,
671                                int unnorm_coords, VkFilter filt)
672{
673    VkResult ret;
674    FFVulkanFunctions *vk = &s->vkfn;
675
676    VkSamplerCreateInfo sampler_info = {
677        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
678        .magFilter = filt,
679        .minFilter = sampler_info.magFilter,
680        .mipmapMode = unnorm_coords ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
681                                      VK_SAMPLER_MIPMAP_MODE_LINEAR,
682        .addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
683        .addressModeV = sampler_info.addressModeU,
684        .addressModeW = sampler_info.addressModeU,
685        .anisotropyEnable = VK_FALSE,
686        .compareOp = VK_COMPARE_OP_NEVER,
687        .borderColor = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
688        .unnormalizedCoordinates = unnorm_coords,
689    };
690
691    FFVkSampler *sctx = create_sampler(s);
692    if (!sctx)
693        return NULL;
694
695    ret = vk->CreateSampler(s->hwctx->act_dev, &sampler_info,
696                            s->hwctx->alloc, &sctx->sampler[0]);
697    if (ret != VK_SUCCESS) {
698        av_log(s, AV_LOG_ERROR, "Unable to init sampler: %s\n",
699               ff_vk_ret2str(ret));
700        return NULL;
701    }
702
703    for (int i = 1; i < 4; i++)
704        sctx->sampler[i] = sctx->sampler[0];
705
706    return sctx;
707}
708
709int ff_vk_mt_is_np_rgb(enum AVPixelFormat pix_fmt)
710{
711    if (pix_fmt == AV_PIX_FMT_ABGR   || pix_fmt == AV_PIX_FMT_BGRA   ||
712        pix_fmt == AV_PIX_FMT_RGBA   || pix_fmt == AV_PIX_FMT_RGB24  ||
713        pix_fmt == AV_PIX_FMT_BGR24  || pix_fmt == AV_PIX_FMT_RGB48  ||
714        pix_fmt == AV_PIX_FMT_RGBA64 || pix_fmt == AV_PIX_FMT_RGB565 ||
715        pix_fmt == AV_PIX_FMT_BGR565 || pix_fmt == AV_PIX_FMT_BGR0   ||
716        pix_fmt == AV_PIX_FMT_0BGR   || pix_fmt == AV_PIX_FMT_RGB0)
717        return 1;
718    return 0;
719}
720
721const char *ff_vk_shader_rep_fmt(enum AVPixelFormat pixfmt)
722{
723    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pixfmt);
724    const int high = desc->comp[0].depth > 8;
725    return high ? "rgba16f" : "rgba8";
726}
727
728typedef struct ImageViewCtx {
729    VkImageView view;
730} ImageViewCtx;
731
732static void destroy_imageview(void *opaque, uint8_t *data)
733{
734    FFVulkanContext *s = opaque;
735    FFVulkanFunctions *vk = &s->vkfn;
736    ImageViewCtx *iv = (ImageViewCtx *)data;
737
738    vk->DestroyImageView(s->hwctx->act_dev, iv->view, s->hwctx->alloc);
739    av_free(iv);
740}
741
742int ff_vk_create_imageview(FFVulkanContext *s, FFVkExecContext *e,
743                           VkImageView *v, VkImage img, VkFormat fmt,
744                           const VkComponentMapping map)
745{
746    int err;
747    AVBufferRef *buf;
748    FFVulkanFunctions *vk = &s->vkfn;
749
750    VkImageViewCreateInfo imgview_spawn = {
751        .sType      = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
752        .pNext      = NULL,
753        .image      = img,
754        .viewType   = VK_IMAGE_VIEW_TYPE_2D,
755        .format     = fmt,
756        .components = map,
757        .subresourceRange = {
758            .aspectMask     = VK_IMAGE_ASPECT_COLOR_BIT,
759            .baseMipLevel   = 0,
760            .levelCount     = 1,
761            .baseArrayLayer = 0,
762            .layerCount     = 1,
763        },
764    };
765
766    ImageViewCtx *iv = av_mallocz(sizeof(*iv));
767
768    VkResult ret = vk->CreateImageView(s->hwctx->act_dev, &imgview_spawn,
769                                       s->hwctx->alloc, &iv->view);
770    if (ret != VK_SUCCESS) {
771        av_log(s, AV_LOG_ERROR, "Failed to create imageview: %s\n",
772               ff_vk_ret2str(ret));
773        return AVERROR_EXTERNAL;
774    }
775
776    buf = av_buffer_create((uint8_t *)iv, sizeof(*iv), destroy_imageview, s, 0);
777    if (!buf) {
778        destroy_imageview(s, (uint8_t *)iv);
779        return AVERROR(ENOMEM);
780    }
781
782    /* Add to queue dependencies */
783    err = ff_vk_add_dep_exec_ctx(s, e, &buf, 1);
784    if (err) {
785        av_buffer_unref(&buf);
786        return err;
787    }
788
789    *v = iv->view;
790
791    return 0;
792}
793
794FN_CREATING(FFVulkanPipeline, FFVkSPIRVShader, shader, shaders, shaders_num)
795FFVkSPIRVShader *ff_vk_init_shader(FFVulkanPipeline *pl, const char *name,
796                                   VkShaderStageFlags stage)
797{
798    FFVkSPIRVShader *shd = create_shader(pl);
799    if (!shd)
800        return NULL;
801
802    av_bprint_init(&shd->src, 0, AV_BPRINT_SIZE_UNLIMITED);
803
804    shd->shader.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
805    shd->shader.stage = stage;
806
807    shd->name = name;
808
809    GLSLF(0, #version %i                                                  ,460);
810    GLSLC(0, #define IS_WITHIN(v1, v2) ((v1.x < v2.x) && (v1.y < v2.y))       );
811    GLSLC(0,                                                                  );
812
813    return shd;
814}
815
816void ff_vk_set_compute_shader_sizes(FFVkSPIRVShader *shd, int local_size[3])
817{
818    shd->local_size[0] = local_size[0];
819    shd->local_size[1] = local_size[1];
820    shd->local_size[2] = local_size[2];
821
822    av_bprintf(&shd->src, "layout (local_size_x = %i, "
823               "local_size_y = %i, local_size_z = %i) in;\n\n",
824               shd->local_size[0], shd->local_size[1], shd->local_size[2]);
825}
826
827void ff_vk_print_shader(void *ctx, FFVkSPIRVShader *shd, int prio)
828{
829    int line = 0;
830    const char *p = shd->src.str;
831    const char *start = p;
832    const size_t len = strlen(p);
833
834    AVBPrint buf;
835    av_bprint_init(&buf, 0, AV_BPRINT_SIZE_UNLIMITED);
836
837    for (int i = 0; i < len; i++) {
838        if (p[i] == '\n') {
839            av_bprintf(&buf, "%i\t", ++line);
840            av_bprint_append_data(&buf, start, &p[i] - start + 1);
841            start = &p[i + 1];
842        }
843    }
844
845    av_log(ctx, prio, "Shader %s: \n%s", shd->name, buf.str);
846    av_bprint_finalize(&buf, NULL);
847}
848
849int ff_vk_compile_shader(FFVulkanContext *s, FFVkSPIRVShader *shd,
850                         const char *entrypoint)
851{
852    int err;
853    VkResult ret;
854    FFVulkanFunctions *vk = &s->vkfn;
855    VkShaderModuleCreateInfo shader_create;
856    uint8_t *spirv;
857    size_t spirv_size;
858    void *priv;
859
860    shd->shader.pName = entrypoint;
861
862    if (!s->spirv_compiler) {
863#if CONFIG_LIBGLSLANG
864        s->spirv_compiler = ff_vk_glslang_init();
865#elif CONFIG_LIBSHADERC
866        s->spirv_compiler = ff_vk_shaderc_init();
867#else
868        return AVERROR(ENOSYS);
869#endif
870        if (!s->spirv_compiler)
871            return AVERROR(ENOMEM);
872    }
873
874    err = s->spirv_compiler->compile_shader(s->spirv_compiler, s, shd, &spirv,
875                                            &spirv_size, entrypoint, &priv);
876    if (err < 0)
877        return err;
878
879    av_log(s, AV_LOG_VERBOSE, "Shader %s compiled! Size: %zu bytes\n",
880           shd->name, spirv_size);
881
882    shader_create.sType    = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
883    shader_create.pNext    = NULL;
884    shader_create.codeSize = spirv_size;
885    shader_create.flags    = 0;
886    shader_create.pCode    = (void *)spirv;
887
888    ret = vk->CreateShaderModule(s->hwctx->act_dev, &shader_create, NULL,
889                                 &shd->shader.module);
890
891    s->spirv_compiler->free_shader(s->spirv_compiler, &priv);
892
893    if (ret != VK_SUCCESS) {
894        av_log(s, AV_LOG_ERROR, "Unable to create shader module: %s\n",
895               ff_vk_ret2str(ret));
896        return AVERROR_EXTERNAL;
897    }
898
899    return 0;
900}
901
902static const struct descriptor_props {
903    size_t struct_size; /* Size of the opaque which updates the descriptor */
904    const char *type;
905    int is_uniform;
906    int mem_quali;      /* Can use a memory qualifier */
907    int dim_needed;     /* Must indicate dimension */
908    int buf_content;    /* Must indicate buffer contents */
909} descriptor_props[] = {
910    [VK_DESCRIPTOR_TYPE_SAMPLER]                = { sizeof(VkDescriptorImageInfo),  "sampler",       1, 0, 0, 0, },
911    [VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE]          = { sizeof(VkDescriptorImageInfo),  "texture",       1, 0, 1, 0, },
912    [VK_DESCRIPTOR_TYPE_STORAGE_IMAGE]          = { sizeof(VkDescriptorImageInfo),  "image",         1, 1, 1, 0, },
913    [VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT]       = { sizeof(VkDescriptorImageInfo),  "subpassInput",  1, 0, 0, 0, },
914    [VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER] = { sizeof(VkDescriptorImageInfo),  "sampler",       1, 0, 1, 0, },
915    [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER]         = { sizeof(VkDescriptorBufferInfo),  NULL,           1, 0, 0, 1, },
916    [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER]         = { sizeof(VkDescriptorBufferInfo), "buffer",        0, 1, 0, 1, },
917    [VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo),  NULL,           1, 0, 0, 1, },
918    [VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC] = { sizeof(VkDescriptorBufferInfo), "buffer",        0, 1, 0, 1, },
919    [VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER]   = { sizeof(VkBufferView),           "samplerBuffer", 1, 0, 0, 0, },
920    [VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER]   = { sizeof(VkBufferView),           "imageBuffer",   1, 0, 0, 0, },
921};
922
923int ff_vk_add_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
924                             FFVkSPIRVShader *shd, FFVulkanDescriptorSetBinding *desc,
925                             int num, int only_print_to_shader)
926{
927    VkResult ret;
928    VkDescriptorSetLayout *layout;
929    FFVulkanFunctions *vk = &s->vkfn;
930
931    if (only_print_to_shader)
932        goto print;
933
934    pl->desc_layout = av_realloc_array(pl->desc_layout, sizeof(*pl->desc_layout),
935                                       pl->desc_layout_num + pl->qf->nb_queues);
936    if (!pl->desc_layout)
937        return AVERROR(ENOMEM);
938
939    pl->desc_set_initialized = av_realloc_array(pl->desc_set_initialized,
940                                                sizeof(*pl->desc_set_initialized),
941                                                pl->descriptor_sets_num + 1);
942    if (!pl->desc_set_initialized)
943        return AVERROR(ENOMEM);
944
945    pl->desc_set_initialized[pl->descriptor_sets_num] = 0;
946    layout = &pl->desc_layout[pl->desc_layout_num];
947
948    { /* Create descriptor set layout descriptions */
949        VkDescriptorSetLayoutCreateInfo desc_create_layout = { 0 };
950        VkDescriptorSetLayoutBinding *desc_binding;
951
952        desc_binding = av_mallocz(sizeof(*desc_binding)*num);
953        if (!desc_binding)
954            return AVERROR(ENOMEM);
955
956        for (int i = 0; i < num; i++) {
957            desc_binding[i].binding            = i;
958            desc_binding[i].descriptorType     = desc[i].type;
959            desc_binding[i].descriptorCount    = FFMAX(desc[i].elems, 1);
960            desc_binding[i].stageFlags         = desc[i].stages;
961            desc_binding[i].pImmutableSamplers = desc[i].sampler ?
962                                                 desc[i].sampler->sampler :
963                                                 NULL;
964        }
965
966        desc_create_layout.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
967        desc_create_layout.pBindings = desc_binding;
968        desc_create_layout.bindingCount = num;
969
970        for (int i = 0; i < pl->qf->nb_queues; i++) {
971            ret = vk->CreateDescriptorSetLayout(s->hwctx->act_dev, &desc_create_layout,
972                                                s->hwctx->alloc, &layout[i]);
973            if (ret != VK_SUCCESS) {
974                av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
975                       "layout: %s\n", ff_vk_ret2str(ret));
976                av_free(desc_binding);
977                return AVERROR_EXTERNAL;
978            }
979        }
980
981        av_free(desc_binding);
982    }
983
984    { /* Pool each descriptor by type and update pool counts */
985        for (int i = 0; i < num; i++) {
986            int j;
987            for (j = 0; j < pl->pool_size_desc_num; j++)
988                if (pl->pool_size_desc[j].type == desc[i].type)
989                    break;
990            if (j >= pl->pool_size_desc_num) {
991                pl->pool_size_desc = av_realloc_array(pl->pool_size_desc,
992                                                      sizeof(*pl->pool_size_desc),
993                                                      ++pl->pool_size_desc_num);
994                if (!pl->pool_size_desc)
995                    return AVERROR(ENOMEM);
996                memset(&pl->pool_size_desc[j], 0, sizeof(VkDescriptorPoolSize));
997            }
998            pl->pool_size_desc[j].type             = desc[i].type;
999            pl->pool_size_desc[j].descriptorCount += FFMAX(desc[i].elems, 1)*pl->qf->nb_queues;
1000        }
1001    }
1002
1003    { /* Create template creation struct */
1004        VkDescriptorUpdateTemplateCreateInfo *dt;
1005        VkDescriptorUpdateTemplateEntry *des_entries;
1006
1007        /* Freed after descriptor set initialization */
1008        des_entries = av_mallocz(num*sizeof(VkDescriptorUpdateTemplateEntry));
1009        if (!des_entries)
1010            return AVERROR(ENOMEM);
1011
1012        for (int i = 0; i < num; i++) {
1013            des_entries[i].dstBinding      = i;
1014            des_entries[i].descriptorType  = desc[i].type;
1015            des_entries[i].descriptorCount = FFMAX(desc[i].elems, 1);
1016            des_entries[i].dstArrayElement = 0;
1017            des_entries[i].offset          = ((uint8_t *)desc[i].updater) - (uint8_t *)s;
1018            des_entries[i].stride          = descriptor_props[desc[i].type].struct_size;
1019        }
1020
1021        pl->desc_template_info = av_realloc_array(pl->desc_template_info,
1022                                                  sizeof(*pl->desc_template_info),
1023                                                  pl->total_descriptor_sets + pl->qf->nb_queues);
1024        if (!pl->desc_template_info)
1025            return AVERROR(ENOMEM);
1026
1027        dt = &pl->desc_template_info[pl->total_descriptor_sets];
1028        memset(dt, 0, sizeof(*dt)*pl->qf->nb_queues);
1029
1030        for (int i = 0; i < pl->qf->nb_queues; i++) {
1031            dt[i].sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO;
1032            dt[i].templateType = VK_DESCRIPTOR_UPDATE_TEMPLATE_TYPE_DESCRIPTOR_SET;
1033            dt[i].descriptorSetLayout = layout[i];
1034            dt[i].pDescriptorUpdateEntries = des_entries;
1035            dt[i].descriptorUpdateEntryCount = num;
1036        }
1037    }
1038
1039    pl->descriptor_sets_num++;
1040
1041    pl->desc_layout_num += pl->qf->nb_queues;
1042    pl->total_descriptor_sets += pl->qf->nb_queues;
1043
1044print:
1045    /* Write shader info */
1046    for (int i = 0; i < num; i++) {
1047        const struct descriptor_props *prop = &descriptor_props[desc[i].type];
1048        GLSLA("layout (set = %i, binding = %i", pl->descriptor_sets_num - 1, i);
1049
1050        if (desc[i].mem_layout)
1051            GLSLA(", %s", desc[i].mem_layout);
1052        GLSLA(")");
1053
1054        if (prop->is_uniform)
1055            GLSLA(" uniform");
1056
1057        if (prop->mem_quali && desc[i].mem_quali)
1058            GLSLA(" %s", desc[i].mem_quali);
1059
1060        if (prop->type)
1061            GLSLA(" %s", prop->type);
1062
1063        if (prop->dim_needed)
1064            GLSLA("%iD", desc[i].dimensions);
1065
1066        GLSLA(" %s", desc[i].name);
1067
1068        if (prop->buf_content)
1069            GLSLA(" {\n    %s\n}", desc[i].buf_content);
1070        else if (desc[i].elems > 0)
1071            GLSLA("[%i]", desc[i].elems);
1072
1073        GLSLA(";\n");
1074    }
1075    GLSLA("\n");
1076
1077    return 0;
1078}
1079
1080void ff_vk_update_descriptor_set(FFVulkanContext *s, FFVulkanPipeline *pl,
1081                                 int set_id)
1082{
1083    FFVulkanFunctions *vk = &s->vkfn;
1084
1085    /* If a set has never been updated, update all queues' sets. */
1086    if (!pl->desc_set_initialized[set_id]) {
1087        for (int i = 0; i < pl->qf->nb_queues; i++) {
1088            int idx = set_id*pl->qf->nb_queues + i;
1089            vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
1090                                                pl->desc_set[idx],
1091                                                pl->desc_template[idx],
1092                                                s);
1093        }
1094        pl->desc_set_initialized[set_id] = 1;
1095        return;
1096    }
1097
1098    set_id = set_id*pl->qf->nb_queues + pl->qf->cur_queue;
1099
1100    vk->UpdateDescriptorSetWithTemplate(s->hwctx->act_dev,
1101                                        pl->desc_set[set_id],
1102                                        pl->desc_template[set_id],
1103                                        s);
1104}
1105
1106void ff_vk_update_push_exec(FFVulkanContext *s, FFVkExecContext *e,
1107                            VkShaderStageFlagBits stage, int offset,
1108                            size_t size, void *src)
1109{
1110    FFVulkanFunctions *vk = &s->vkfn;
1111
1112    vk->CmdPushConstants(e->bufs[e->qf->cur_queue], e->bound_pl->pipeline_layout,
1113                         stage, offset, size, src);
1114}
1115
1116int ff_vk_init_pipeline_layout(FFVulkanContext *s, FFVulkanPipeline *pl)
1117{
1118    VkResult ret;
1119    FFVulkanFunctions *vk = &s->vkfn;
1120
1121    pl->desc_staging = av_malloc(pl->descriptor_sets_num*sizeof(*pl->desc_staging));
1122    if (!pl->desc_staging)
1123        return AVERROR(ENOMEM);
1124
1125    { /* Init descriptor set pool */
1126        VkDescriptorPoolCreateInfo pool_create_info = {
1127            .sType         = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1128            .poolSizeCount = pl->pool_size_desc_num,
1129            .pPoolSizes    = pl->pool_size_desc,
1130            .maxSets       = pl->total_descriptor_sets,
1131        };
1132
1133        ret = vk->CreateDescriptorPool(s->hwctx->act_dev, &pool_create_info,
1134                                       s->hwctx->alloc, &pl->desc_pool);
1135        av_freep(&pl->pool_size_desc);
1136        if (ret != VK_SUCCESS) {
1137            av_log(s, AV_LOG_ERROR, "Unable to init descriptor set "
1138                   "pool: %s\n", ff_vk_ret2str(ret));
1139            return AVERROR_EXTERNAL;
1140        }
1141    }
1142
1143    { /* Allocate descriptor sets */
1144        VkDescriptorSetAllocateInfo alloc_info = {
1145            .sType              = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1146            .descriptorPool     = pl->desc_pool,
1147            .descriptorSetCount = pl->total_descriptor_sets,
1148            .pSetLayouts        = pl->desc_layout,
1149        };
1150
1151        pl->desc_set = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_set));
1152        if (!pl->desc_set)
1153            return AVERROR(ENOMEM);
1154
1155        ret = vk->AllocateDescriptorSets(s->hwctx->act_dev, &alloc_info,
1156                                         pl->desc_set);
1157        if (ret != VK_SUCCESS) {
1158            av_log(s, AV_LOG_ERROR, "Unable to allocate descriptor set: %s\n",
1159                   ff_vk_ret2str(ret));
1160            return AVERROR_EXTERNAL;
1161        }
1162    }
1163
1164    { /* Finally create the pipeline layout */
1165        VkPipelineLayoutCreateInfo spawn_pipeline_layout = {
1166            .sType                  = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1167            .pSetLayouts            = (VkDescriptorSetLayout *)pl->desc_staging,
1168            .pushConstantRangeCount = pl->push_consts_num,
1169            .pPushConstantRanges    = pl->push_consts,
1170        };
1171
1172        for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues)
1173            pl->desc_staging[spawn_pipeline_layout.setLayoutCount++] = pl->desc_layout[i];
1174
1175        ret = vk->CreatePipelineLayout(s->hwctx->act_dev, &spawn_pipeline_layout,
1176                                       s->hwctx->alloc, &pl->pipeline_layout);
1177        av_freep(&pl->push_consts);
1178        pl->push_consts_num = 0;
1179        if (ret != VK_SUCCESS) {
1180            av_log(s, AV_LOG_ERROR, "Unable to init pipeline layout: %s\n",
1181                   ff_vk_ret2str(ret));
1182            return AVERROR_EXTERNAL;
1183        }
1184    }
1185
1186    { /* Descriptor template (for tightly packed descriptors) */
1187        VkDescriptorUpdateTemplateCreateInfo *dt;
1188
1189        pl->desc_template = av_malloc(pl->total_descriptor_sets*sizeof(*pl->desc_template));
1190        if (!pl->desc_template)
1191            return AVERROR(ENOMEM);
1192
1193        /* Create update templates for the descriptor sets */
1194        for (int i = 0; i < pl->total_descriptor_sets; i++) {
1195            dt = &pl->desc_template_info[i];
1196            dt->pipelineLayout = pl->pipeline_layout;
1197            ret = vk->CreateDescriptorUpdateTemplate(s->hwctx->act_dev,
1198                                                     dt, s->hwctx->alloc,
1199                                                     &pl->desc_template[i]);
1200            if (ret != VK_SUCCESS) {
1201                av_log(s, AV_LOG_ERROR, "Unable to init descriptor "
1202                       "template: %s\n", ff_vk_ret2str(ret));
1203                return AVERROR_EXTERNAL;
1204            }
1205        }
1206
1207        /* Free the duplicated memory used for the template entries */
1208        for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
1209            dt = &pl->desc_template_info[i];
1210            av_free((void *)dt->pDescriptorUpdateEntries);
1211        }
1212
1213        av_freep(&pl->desc_template_info);
1214    }
1215
1216    return 0;
1217}
1218
1219FN_CREATING(FFVulkanContext, FFVulkanPipeline, pipeline, pipelines, pipelines_num)
1220FFVulkanPipeline *ff_vk_create_pipeline(FFVulkanContext *s, FFVkQueueFamilyCtx *qf)
1221{
1222    FFVulkanPipeline *pl = create_pipeline(s);
1223    if (pl)
1224        pl->qf = qf;
1225
1226    return pl;
1227}
1228
1229int ff_vk_init_compute_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
1230{
1231    int i;
1232    VkResult ret;
1233    FFVulkanFunctions *vk = &s->vkfn;
1234
1235    VkComputePipelineCreateInfo pipe = {
1236        .sType  = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1237        .layout = pl->pipeline_layout,
1238    };
1239
1240    for (i = 0; i < pl->shaders_num; i++) {
1241        if (pl->shaders[i]->shader.stage & VK_SHADER_STAGE_COMPUTE_BIT) {
1242            pipe.stage = pl->shaders[i]->shader;
1243            break;
1244        }
1245    }
1246    if (i == pl->shaders_num) {
1247        av_log(s, AV_LOG_ERROR, "Can't init compute pipeline, no shader\n");
1248        return AVERROR(EINVAL);
1249    }
1250
1251    ret = vk->CreateComputePipelines(s->hwctx->act_dev, VK_NULL_HANDLE, 1, &pipe,
1252                                     s->hwctx->alloc, &pl->pipeline);
1253    if (ret != VK_SUCCESS) {
1254        av_log(s, AV_LOG_ERROR, "Unable to init compute pipeline: %s\n",
1255               ff_vk_ret2str(ret));
1256        return AVERROR_EXTERNAL;
1257    }
1258
1259    pl->bind_point = VK_PIPELINE_BIND_POINT_COMPUTE;
1260
1261    return 0;
1262}
1263
1264void ff_vk_bind_pipeline_exec(FFVulkanContext *s, FFVkExecContext *e,
1265                              FFVulkanPipeline *pl)
1266{
1267    FFVulkanFunctions *vk = &s->vkfn;
1268
1269    vk->CmdBindPipeline(e->bufs[e->qf->cur_queue], pl->bind_point, pl->pipeline);
1270
1271    for (int i = 0; i < pl->descriptor_sets_num; i++)
1272        pl->desc_staging[i] = pl->desc_set[i*pl->qf->nb_queues + pl->qf->cur_queue];
1273
1274    vk->CmdBindDescriptorSets(e->bufs[e->qf->cur_queue], pl->bind_point,
1275                              pl->pipeline_layout, 0,
1276                              pl->descriptor_sets_num,
1277                              (VkDescriptorSet *)pl->desc_staging,
1278                              0, NULL);
1279
1280    e->bound_pl = pl;
1281}
1282
1283static void free_exec_ctx(FFVulkanContext *s, FFVkExecContext *e)
1284{
1285    FFVulkanFunctions *vk = &s->vkfn;
1286
1287    /* Make sure all queues have finished executing */
1288    for (int i = 0; i < e->qf->nb_queues; i++) {
1289        FFVkQueueCtx *q = &e->queues[i];
1290
1291        if (q->fence) {
1292            vk->WaitForFences(s->hwctx->act_dev, 1, &q->fence, VK_TRUE, UINT64_MAX);
1293            vk->ResetFences(s->hwctx->act_dev, 1, &q->fence);
1294        }
1295
1296        /* Free the fence */
1297        if (q->fence)
1298            vk->DestroyFence(s->hwctx->act_dev, q->fence, s->hwctx->alloc);
1299
1300        /* Free buffer dependencies */
1301        for (int j = 0; j < q->nb_buf_deps; j++)
1302            av_buffer_unref(&q->buf_deps[j]);
1303        av_free(q->buf_deps);
1304
1305        /* Free frame dependencies */
1306        for (int j = 0; j < q->nb_frame_deps; j++)
1307            av_frame_free(&q->frame_deps[j]);
1308        av_free(q->frame_deps);
1309    }
1310
1311    if (e->bufs)
1312        vk->FreeCommandBuffers(s->hwctx->act_dev, e->pool, e->qf->nb_queues, e->bufs);
1313    if (e->pool)
1314        vk->DestroyCommandPool(s->hwctx->act_dev, e->pool, s->hwctx->alloc);
1315
1316    av_freep(&e->bufs);
1317    av_freep(&e->queues);
1318    av_freep(&e->sem_sig);
1319    av_freep(&e->sem_sig_val);
1320    av_freep(&e->sem_sig_val_dst);
1321    av_freep(&e->sem_wait);
1322    av_freep(&e->sem_wait_dst);
1323    av_freep(&e->sem_wait_val);
1324    av_free(e);
1325}
1326
1327static void free_pipeline(FFVulkanContext *s, FFVulkanPipeline *pl)
1328{
1329    FFVulkanFunctions *vk = &s->vkfn;
1330
1331    for (int i = 0; i < pl->shaders_num; i++) {
1332        FFVkSPIRVShader *shd = pl->shaders[i];
1333        av_bprint_finalize(&shd->src, NULL);
1334        vk->DestroyShaderModule(s->hwctx->act_dev, shd->shader.module,
1335                                s->hwctx->alloc);
1336        av_free(shd);
1337    }
1338
1339    vk->DestroyPipeline(s->hwctx->act_dev, pl->pipeline, s->hwctx->alloc);
1340    vk->DestroyPipelineLayout(s->hwctx->act_dev, pl->pipeline_layout,
1341                              s->hwctx->alloc);
1342
1343    for (int i = 0; i < pl->desc_layout_num; i++) {
1344        if (pl->desc_template && pl->desc_template[i])
1345            vk->DestroyDescriptorUpdateTemplate(s->hwctx->act_dev, pl->desc_template[i],
1346                                                s->hwctx->alloc);
1347        if (pl->desc_layout && pl->desc_layout[i])
1348            vk->DestroyDescriptorSetLayout(s->hwctx->act_dev, pl->desc_layout[i],
1349                                           s->hwctx->alloc);
1350    }
1351
1352    /* Also frees the descriptor sets */
1353    if (pl->desc_pool)
1354        vk->DestroyDescriptorPool(s->hwctx->act_dev, pl->desc_pool,
1355                                  s->hwctx->alloc);
1356
1357    av_freep(&pl->desc_staging);
1358    av_freep(&pl->desc_set);
1359    av_freep(&pl->shaders);
1360    av_freep(&pl->desc_layout);
1361    av_freep(&pl->desc_template);
1362    av_freep(&pl->desc_set_initialized);
1363    av_freep(&pl->push_consts);
1364    pl->push_consts_num = 0;
1365
1366    /* Only freed in case of failure */
1367    av_freep(&pl->pool_size_desc);
1368    if (pl->desc_template_info) {
1369        for (int i = 0; i < pl->total_descriptor_sets; i += pl->qf->nb_queues) {
1370            VkDescriptorUpdateTemplateCreateInfo *dt = &pl->desc_template_info[i];
1371            av_free((void *)dt->pDescriptorUpdateEntries);
1372        }
1373        av_freep(&pl->desc_template_info);
1374    }
1375
1376    av_free(pl);
1377}
1378
1379void ff_vk_uninit(FFVulkanContext *s)
1380{
1381    FFVulkanFunctions *vk = &s->vkfn;
1382
1383    if (s->spirv_compiler)
1384        s->spirv_compiler->uninit(&s->spirv_compiler);
1385
1386    for (int i = 0; i < s->exec_ctx_num; i++)
1387        free_exec_ctx(s, s->exec_ctx[i]);
1388    av_freep(&s->exec_ctx);
1389
1390    for (int i = 0; i < s->samplers_num; i++) {
1391        vk->DestroySampler(s->hwctx->act_dev, s->samplers[i]->sampler[0],
1392                           s->hwctx->alloc);
1393        av_free(s->samplers[i]);
1394    }
1395    av_freep(&s->samplers);
1396
1397    for (int i = 0; i < s->pipelines_num; i++)
1398        free_pipeline(s, s->pipelines[i]);
1399    av_freep(&s->pipelines);
1400
1401    av_freep(&s->scratch);
1402    s->scratch_size = 0;
1403
1404    av_buffer_unref(&s->device_ref);
1405    av_buffer_unref(&s->frames_ref);
1406}
1407