/* * Copyright © 2021 Collabora Ltd. * * Derived from tu_cmd_buffer.c which is: * Copyright © 2016 Red Hat. * Copyright © 2016 Bas Nieuwenhuizen * Copyright © 2015 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ #include "genxml/gen_macros.h" #include "panvk_cs.h" #include "panvk_private.h" #include "pan_blitter.h" #include "pan_cs.h" #include "pan_encoder.h" #include "util/rounding.h" #include "util/u_pack_color.h" #include "vk_format.h" static uint32_t panvk_debug_adjust_bo_flags(const struct panvk_device *device, uint32_t bo_flags) { uint32_t debug_flags = device->physical_device->instance->debug_flags; if (debug_flags & PANVK_DEBUG_DUMP) bo_flags &= ~PAN_BO_INVISIBLE; return bo_flags; } static void panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf) { const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; struct panvk_batch *batch = cmdbuf->state.batch; struct panfrost_ptr job_ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB); GENX(pan_emit_fragment_job)(fbinfo, batch->fb.desc.gpu, job_ptr.cpu), batch->fragment_job = job_ptr.gpu; util_dynarray_append(&batch->jobs, void *, job_ptr.cpu); } void panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) { struct panvk_batch *batch = cmdbuf->state.batch; if (!batch) return; const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; assert(batch); bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s; for (unsigned i = 0; i < fbinfo->rt_count; i++) clear |= fbinfo->rts[i].clear; if (!clear && !batch->scoreboard.first_job) { if (util_dynarray_num_elements(&batch->event_ops, struct panvk_event_op) == 0) { /* Content-less batch, let's drop it */ vk_free(&cmdbuf->pool->vk.alloc, batch); } else { /* Batch has no jobs but is needed for synchronization, let's add a * NULL job so the SUBMIT ioctl doesn't choke on it. */ struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, JOB_HEADER); util_dynarray_append(&batch->jobs, void *, ptr.cpu); panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, MALI_JOB_TYPE_NULL, false, false, 0, 0, &ptr, false); list_addtail(&batch->node, &cmdbuf->batches); } cmdbuf->state.batch = NULL; return; } struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; list_addtail(&batch->node, &cmdbuf->batches); if (batch->scoreboard.first_tiler) { struct panfrost_ptr preload_jobs[2]; unsigned num_preload_jobs = GENX(pan_preload_fb)(&cmdbuf->desc_pool.base, &batch->scoreboard, &cmdbuf->state.fb.info, batch->tls.gpu, batch->tiler.descs.gpu, preload_jobs); for (unsigned i = 0; i < num_preload_jobs; i++) util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu); } if (batch->tlsinfo.tls.size) { unsigned size = panfrost_get_total_stack_size(batch->tlsinfo.tls.size, pdev->thread_tls_alloc, pdev->core_id_range); batch->tlsinfo.tls.ptr = pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, size, 4096).gpu; } if (batch->tlsinfo.wls.size) { assert(batch->wls_total_size); batch->tlsinfo.wls.ptr = pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, batch->wls_total_size, 4096).gpu; } if (batch->tls.cpu) GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu); if (batch->fb.desc.cpu) { batch->fb.desc.gpu |= GENX(pan_emit_fbd)(pdev, &cmdbuf->state.fb.info, &batch->tlsinfo, &batch->tiler.ctx, batch->fb.desc.cpu); panvk_cmd_prepare_fragment_job(cmdbuf); } cmdbuf->state.batch = NULL; } void panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo, const VkSubpassEndInfo *pSubpassEndInfo) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); panvk_per_arch(cmd_close_batch)(cmdbuf); cmdbuf->state.subpass++; panvk_cmd_fb_info_set_subpass(cmdbuf); panvk_cmd_open_batch(cmdbuf); } void panvk_per_arch(CmdNextSubpass)(VkCommandBuffer cmd, VkSubpassContents contents) { VkSubpassBeginInfo binfo = { .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, .contents = contents }; VkSubpassEndInfo einfo = { .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, }; panvk_per_arch(CmdNextSubpass2)(cmd, &binfo, &einfo); } void panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf) { struct panvk_batch *batch = cmdbuf->state.batch; if (batch->fb.desc.gpu) return; const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s; unsigned tags = MALI_FBD_TAG_IS_MFBD; batch->fb.info = cmdbuf->state.framebuffer; batch->fb.desc = pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, PAN_DESC(FRAMEBUFFER), PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION), PAN_DESC_ARRAY(MAX2(fbinfo->rt_count, 1), RENDER_TARGET)); /* Tag the pointer */ batch->fb.desc.gpu |= tags; memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0, sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds)); } void panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx) { struct panvk_batch *batch = cmdbuf->state.batch; assert(batch); if (!batch->tls.gpu) { batch->tls = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE); } } static void panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state, struct panvk_draw_info *draw) { struct panvk_sysvals *sysvals = &bind_point_state->desc_state.sysvals; unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0; if (sysvals->first_vertex != draw->offset_start || sysvals->base_vertex != base_vertex || sysvals->base_instance != draw->first_instance) { sysvals->first_vertex = draw->offset_start; sysvals->base_vertex = base_vertex; sysvals->base_instance = draw->first_instance; bind_point_state->desc_state.sysvals_ptr = 0; } if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) { memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants, sizeof(cmdbuf->state.blend.constants)); bind_point_state->desc_state.sysvals_ptr = 0; } if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) { panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport, &sysvals->viewport_scale); panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, &sysvals->viewport_offset); bind_point_state->desc_state.sysvals_ptr = 0; } } static void panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; if (desc_state->sysvals_ptr) return; struct panfrost_ptr sysvals = pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, sizeof(desc_state->sysvals), 16); memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals)); desc_state->sysvals_ptr = sysvals.gpu; } static void panvk_cmd_prepare_push_constants(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; if (!pipeline->layout->push_constants.size || desc_state->push_constants) return; struct panfrost_ptr push_constants = pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, ALIGN_POT(pipeline->layout->push_constants.size, 16), 16); memcpy(push_constants.cpu, cmdbuf->push_constants, pipeline->layout->push_constants.size); desc_state->push_constants = push_constants.gpu; } static void panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; if (!pipeline->num_ubos || desc_state->ubos) return; panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state); panvk_cmd_prepare_push_constants(cmdbuf, bind_point_state); struct panfrost_ptr ubos = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, pipeline->num_ubos, UNIFORM_BUFFER); panvk_per_arch(emit_ubos)(pipeline, desc_state, ubos.cpu); desc_state->ubos = ubos.gpu; } static void panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; unsigned num_textures = pipeline->layout->num_textures; if (!num_textures || desc_state->textures) return; struct panfrost_ptr textures = pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, num_textures * pan_size(TEXTURE), pan_size(TEXTURE)); void *texture = textures.cpu; for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { if (!desc_state->sets[i]) continue; memcpy(texture, desc_state->sets[i]->textures, desc_state->sets[i]->layout->num_textures * pan_size(TEXTURE)); texture += desc_state->sets[i]->layout->num_textures * pan_size(TEXTURE); } desc_state->textures = textures.gpu; } static void panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; unsigned num_samplers = pipeline->layout->num_samplers; if (!num_samplers || desc_state->samplers) return; struct panfrost_ptr samplers = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, num_samplers, SAMPLER); void *sampler = samplers.cpu; /* Prepare the dummy sampler */ pan_pack(sampler, SAMPLER, cfg) { cfg.seamless_cube_map = false; cfg.magnify_nearest = true; cfg.minify_nearest = true; cfg.normalized_coordinates = false; } sampler += pan_size(SAMPLER); for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { if (!desc_state->sets[i]) continue; memcpy(sampler, desc_state->sets[i]->samplers, desc_state->sets[i]->layout->num_samplers * pan_size(SAMPLER)); sampler += desc_state->sets[i]->layout->num_samplers * pan_size(SAMPLER); } desc_state->samplers = samplers.gpu; } static void panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); if (!pipeline->fs.dynamic_rsd) { draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT]; return; } if (!cmdbuf->state.fs_rsd) { struct panfrost_ptr rsd = pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, PAN_DESC(RENDERER_STATE), PAN_DESC_ARRAY(pipeline->blend.state.rt_count, BLEND)); struct mali_renderer_state_packed rsd_dyn; struct mali_renderer_state_packed *rsd_templ = (struct mali_renderer_state_packed *)&pipeline->fs.rsd_template; STATIC_ASSERT(sizeof(pipeline->fs.rsd_template) >= sizeof(*rsd_templ)); panvk_per_arch(emit_dyn_fs_rsd)(pipeline, &cmdbuf->state, &rsd_dyn); pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE); memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn)); void *bd = rsd.cpu + pan_size(RENDERER_STATE); for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) { if (pipeline->blend.constant[i].index != (uint8_t)~0) { struct mali_blend_packed bd_dyn; struct mali_blend_packed *bd_templ = (struct mali_blend_packed *)&pipeline->blend.bd_template[i]; STATIC_ASSERT(sizeof(pipeline->blend.bd_template[0]) >= sizeof(*bd_templ)); panvk_per_arch(emit_blend_constant)(cmdbuf->device, pipeline, i, cmdbuf->state.blend.constants, &bd_dyn); pan_merge(bd_dyn, (*bd_templ), BLEND); memcpy(bd, &bd_dyn, sizeof(bd_dyn)); } bd += pan_size(BLEND); } cmdbuf->state.fs_rsd = rsd.gpu; } draw->fs_rsd = cmdbuf->state.fs_rsd; } void panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf, unsigned width, unsigned height) { struct panvk_batch *batch = cmdbuf->state.batch; if (batch->tiler.descs.cpu) return; batch->tiler.descs = pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, PAN_DESC(TILER_CONTEXT), PAN_DESC(TILER_HEAP)); STATIC_ASSERT(sizeof(batch->tiler.templ) >= pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); struct panfrost_ptr desc = { .gpu = batch->tiler.descs.gpu, .cpu = batch->tiler.templ, }; panvk_per_arch(emit_tiler_context)(cmdbuf->device, width, height, &desc); memcpy(batch->tiler.descs.cpu, batch->tiler.templ, pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); batch->tiler.ctx.bifrost = batch->tiler.descs.gpu; } void panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf) { const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; panvk_per_arch(cmd_get_tiler_context)(cmdbuf, fbinfo->width, fbinfo->height); } static void panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { struct panvk_batch *batch = cmdbuf->state.batch; panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf); draw->tiler_ctx = &batch->tiler.ctx; } static void panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); struct panvk_varyings_info *varyings = &cmdbuf->state.varyings; panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base, draw->padded_vertex_count * draw->instance_count); unsigned buf_count = panvk_varyings_buf_count(varyings); struct panfrost_ptr bufs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, buf_count + 1, ATTRIBUTE_BUFFER); panvk_per_arch(emit_varying_bufs)(varyings, bufs.cpu); /* We need an empty entry to stop prefetching on Bifrost */ memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * buf_count), 0, pan_size(ATTRIBUTE_BUFFER)); if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) { draw->position = varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address + varyings->varying[VARYING_SLOT_POS].offset; } if (pipeline->ia.writes_point_size) { draw->psiz = varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address + varyings->varying[VARYING_SLOT_POS].offset; } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH ? cmdbuf->state.rast.line_width : pipeline->rast.line_width; } else { draw->line_width = 1.0f; } draw->varying_bufs = bufs.gpu; for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { if (!varyings->stage[s].count) continue; struct panfrost_ptr attribs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, varyings->stage[s].count, ATTRIBUTE); panvk_per_arch(emit_varyings)(cmdbuf->device, varyings, s, attribs.cpu); draw->stages[s].varyings = attribs.gpu; } } static void panvk_fill_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state, void *attrib_bufs, void *attribs, unsigned first_buf) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; for (unsigned s = 0; s < pipeline->layout->num_sets; s++) { const struct panvk_descriptor_set *set = desc_state->sets[s]; if (!set) continue; const struct panvk_descriptor_set_layout *layout = set->layout; unsigned img_idx = pipeline->layout->sets[s].img_offset; unsigned offset = img_idx * pan_size(ATTRIBUTE_BUFFER) * 2; unsigned size = layout->num_imgs * pan_size(ATTRIBUTE_BUFFER) * 2; memcpy(attrib_bufs + offset, desc_state->sets[s]->img_attrib_bufs, size); offset = img_idx * pan_size(ATTRIBUTE); for (unsigned i = 0; i < layout->num_imgs; i++) { pan_pack(attribs + offset, ATTRIBUTE, cfg) { cfg.buffer_index = first_buf + (img_idx + i) * 2; cfg.format = desc_state->sets[s]->img_fmts[i]; } offset += pan_size(ATTRIBUTE); } } } static void panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf, struct panvk_cmd_bind_point_state *bind_point_state) { struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; if (desc_state->non_vs_attribs || !pipeline->img_access_mask) return; unsigned attrib_count = pipeline->layout->num_imgs; unsigned attrib_buf_count = (pipeline->layout->num_imgs * 2); struct panfrost_ptr bufs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_buf_count + 1, ATTRIBUTE_BUFFER); struct panfrost_ptr attribs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_count, ATTRIBUTE); panvk_fill_non_vs_attribs(cmdbuf, bind_point_state, bufs.cpu, attribs.cpu, 0); desc_state->non_vs_attrib_bufs = bufs.gpu; desc_state->non_vs_attribs = attribs.gpu; } static void panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { struct panvk_cmd_bind_point_state *bind_point_state = panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS); struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; unsigned num_imgs = pipeline->img_access_mask & BITFIELD_BIT(MESA_SHADER_VERTEX) ? pipeline->layout->num_imgs : 0; unsigned attrib_count = pipeline->attribs.attrib_count + num_imgs; if (desc_state->vs_attribs || !attrib_count) return; if (!pipeline->attribs.buf_count) { panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state); desc_state->vs_attrib_bufs = desc_state->non_vs_attrib_bufs; desc_state->vs_attribs = desc_state->non_vs_attribs; return; } unsigned attrib_buf_count = pipeline->attribs.buf_count * 2; struct panfrost_ptr bufs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_buf_count + 1, ATTRIBUTE_BUFFER); struct panfrost_ptr attribs = pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_count, ATTRIBUTE); panvk_per_arch(emit_attrib_bufs)(&pipeline->attribs, cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, draw, bufs.cpu); panvk_per_arch(emit_attribs)(cmdbuf->device, draw, &pipeline->attribs, cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, attribs.cpu); if (attrib_count > pipeline->attribs.buf_count) { unsigned bufs_offset = pipeline->attribs.buf_count * pan_size(ATTRIBUTE_BUFFER) * 2; unsigned attribs_offset = pipeline->attribs.buf_count * pan_size(ATTRIBUTE); panvk_fill_non_vs_attribs(cmdbuf, bind_point_state, bufs.cpu + bufs_offset, attribs.cpu + attribs_offset, pipeline->attribs.buf_count * 2); } /* A NULL entry is needed to stop prefecting on Bifrost */ memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0, pan_size(ATTRIBUTE_BUFFER)); desc_state->vs_attrib_bufs = bufs.gpu; desc_state->vs_attribs = attribs.gpu; } static void panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { struct panvk_cmd_bind_point_state *bind_point_state = panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS); struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; for (unsigned i = 0; i < ARRAY_SIZE(draw->stages); i++) { if (i == MESA_SHADER_VERTEX) { panvk_draw_prepare_vs_attribs(cmdbuf, draw); draw->stages[i].attributes = desc_state->vs_attribs; draw->stages[i].attribute_bufs = desc_state->vs_attrib_bufs; } else if (pipeline->img_access_mask & BITFIELD_BIT(i)) { panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state); draw->stages[i].attributes = desc_state->non_vs_attribs; draw->stages[i].attribute_bufs = desc_state->non_vs_attrib_bufs; } } } static void panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); if (pipeline->vpd) { draw->viewport = pipeline->vpd; } else if (cmdbuf->state.vpd) { draw->viewport = cmdbuf->state.vpd; } else { struct panfrost_ptr vp = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT); const VkViewport *viewport = pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT ? &cmdbuf->state.viewport : &pipeline->viewport; const VkRect2D *scissor = pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR ? &cmdbuf->state.scissor : &pipeline->scissor; panvk_per_arch(emit_viewport)(viewport, scissor, vp.cpu); draw->viewport = cmdbuf->state.vpd = vp.gpu; } } static void panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); struct panvk_batch *batch = cmdbuf->state.batch; struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.vertex = ptr; panvk_per_arch(emit_vertex_job)(pipeline, draw, ptr.cpu); } static void panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); struct panvk_batch *batch = cmdbuf->state.batch; struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB); util_dynarray_append(&batch->jobs, void *, ptr.cpu); draw->jobs.tiler = ptr; panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu); } static void panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, struct panvk_draw_info *draw) { struct panvk_batch *batch = cmdbuf->state.batch; struct panvk_cmd_bind_point_state *bind_point_state = panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS); const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); /* There are only 16 bits in the descriptor for the job ID, make sure all * the 3 (2 in Bifrost) jobs in this draw are in the same batch. */ if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) { panvk_per_arch(cmd_close_batch)(cmdbuf); panvk_cmd_preload_fb_after_batch_split(cmdbuf); batch = panvk_cmd_open_batch(cmdbuf); } if (pipeline->rast.enable) panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); panvk_cmd_prepare_draw_sysvals(cmdbuf, bind_point_state, draw); panvk_cmd_prepare_ubos(cmdbuf, bind_point_state); panvk_cmd_prepare_textures(cmdbuf, bind_point_state); panvk_cmd_prepare_samplers(cmdbuf, bind_point_state); /* TODO: indexed draws */ struct panvk_descriptor_state *desc_state = panvk_cmd_get_desc_state(cmdbuf, GRAPHICS); draw->tls = batch->tls.gpu; draw->fb = batch->fb.desc.gpu; draw->ubos = desc_state->ubos; draw->textures = desc_state->textures; draw->samplers = desc_state->samplers; STATIC_ASSERT(sizeof(draw->invocation) >= sizeof(struct mali_invocation_packed)); panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw->invocation, 1, draw->vertex_range, draw->instance_count, 1, 1, 1, true, false); panvk_draw_prepare_fs_rsd(cmdbuf, draw); panvk_draw_prepare_varyings(cmdbuf, draw); panvk_draw_prepare_attributes(cmdbuf, draw); panvk_draw_prepare_viewport(cmdbuf, draw); panvk_draw_prepare_tiler_context(cmdbuf, draw); panvk_draw_prepare_vertex_job(cmdbuf, draw); panvk_draw_prepare_tiler_job(cmdbuf, draw); batch->tlsinfo.tls.size = MAX2(pipeline->tls_size, batch->tlsinfo.tls.size); assert(!pipeline->wls_size); unsigned vjob_id = panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, MALI_JOB_TYPE_VERTEX, false, false, 0, 0, &draw->jobs.vertex, false); if (pipeline->rast.enable) { panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, MALI_JOB_TYPE_TILER, false, false, vjob_id, 0, &draw->jobs.tiler, false); } /* Clear the dirty flags all at once */ desc_state->dirty = cmdbuf->state.dirty = 0; } void panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, uint32_t firstVertex, uint32_t firstInstance) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); if (instanceCount == 0 || vertexCount == 0) return; struct panvk_draw_info draw = { .first_vertex = firstVertex, .vertex_count = vertexCount, .vertex_range = vertexCount, .first_instance = firstInstance, .instance_count = instanceCount, .padded_vertex_count = instanceCount > 1 ? panfrost_padded_vertex_count(vertexCount) : vertexCount, .offset_start = firstVertex, }; panvk_cmd_draw(cmdbuf, &draw); } static void panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf, uint32_t start, uint32_t count, bool restart, uint32_t *min, uint32_t *max) { void *ptr = cmdbuf->state.ib.buffer->bo->ptr.cpu + cmdbuf->state.ib.buffer->bo_offset + cmdbuf->state.ib.offset; fprintf(stderr, "WARNING: Crawling index buffers from the CPU isn't valid in Vulkan\n"); assert(cmdbuf->state.ib.buffer); assert(cmdbuf->state.ib.buffer->bo); assert(cmdbuf->state.ib.buffer->bo->ptr.cpu); *max = 0; /* TODO: Use panfrost_minmax_cache */ /* TODO: Read full cacheline of data to mitigate the uncached * mapping slowness. */ switch (cmdbuf->state.ib.index_size) { #define MINMAX_SEARCH_CASE(sz) \ case sz: { \ uint ## sz ## _t *indices = ptr; \ *min = UINT ## sz ## _MAX; \ for (uint32_t i = 0; i < count; i++) { \ if (restart && indices[i + start] == UINT ## sz ##_MAX) continue; \ *min = MIN2(indices[i + start], *min); \ *max = MAX2(indices[i + start], *max); \ } \ break; \ } MINMAX_SEARCH_CASE(32) MINMAX_SEARCH_CASE(16) MINMAX_SEARCH_CASE(8) #undef MINMAX_SEARCH_CASE default: unreachable("Invalid index size"); } } void panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); uint32_t min_vertex, max_vertex; if (instanceCount == 0 || indexCount == 0) return; const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); bool primitive_restart = pipeline->ia.primitive_restart; panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, primitive_restart, &min_vertex, &max_vertex); unsigned vertex_range = max_vertex - min_vertex + 1; struct panvk_draw_info draw = { .index_size = cmdbuf->state.ib.index_size, .first_index = firstIndex, .index_count = indexCount, .vertex_offset = vertexOffset, .first_instance = firstInstance, .instance_count = instanceCount, .vertex_range = vertex_range, .vertex_count = indexCount + abs(vertexOffset), .padded_vertex_count = instanceCount > 1 ? panfrost_padded_vertex_count(vertex_range) : vertex_range, .offset_start = min_vertex + vertexOffset, .indices = panvk_buffer_gpu_ptr(cmdbuf->state.ib.buffer, cmdbuf->state.ib.offset) + (firstIndex * (cmdbuf->state.ib.index_size / 8)), }; panvk_cmd_draw(cmdbuf, &draw); } VkResult panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); VkResult ret = cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY ? cmdbuf->vk.cmd_queue.error : cmdbuf->record_result; panvk_per_arch(cmd_close_batch)(cmdbuf); cmdbuf->status = ret == VK_SUCCESS ? PANVK_CMD_BUFFER_STATUS_EXECUTABLE : PANVK_CMD_BUFFER_STATUS_INVALID; return ret; } void panvk_per_arch(CmdEndRenderPass2)(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); panvk_per_arch(cmd_close_batch)(cmdbuf); vk_free(&cmdbuf->pool->vk.alloc, cmdbuf->state.clear); cmdbuf->state.batch = NULL; cmdbuf->state.pass = NULL; cmdbuf->state.subpass = NULL; cmdbuf->state.framebuffer = NULL; cmdbuf->state.clear = NULL; } void panvk_per_arch(CmdEndRenderPass)(VkCommandBuffer cmd) { VkSubpassEndInfo einfo = { .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, }; panvk_per_arch(CmdEndRenderPass2)(cmd, &einfo); } void panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer, const VkDependencyInfo *pDependencyInfo) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); /* Caches are flushed/invalidated at batch boundaries for now, nothing to do * for memory barriers assuming we implement barriers with the creation of a * new batch. * FIXME: We can probably do better with a CacheFlush job that has the * barrier flag set to true. */ if (cmdbuf->state.batch) { panvk_per_arch(cmd_close_batch)(cmdbuf); panvk_cmd_preload_fb_after_batch_split(cmdbuf); panvk_cmd_open_batch(cmdbuf); } } static void panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf, struct panvk_event *event, enum panvk_event_op_type type) { struct panvk_event_op op = { .type = type, .event = event, }; if (cmdbuf->state.batch == NULL) { /* No open batch, let's create a new one so this operation happens in * the right order. */ panvk_cmd_open_batch(cmdbuf); util_dynarray_append(&cmdbuf->state.batch->event_ops, struct panvk_event_op, op); panvk_per_arch(cmd_close_batch)(cmdbuf); } else { /* Let's close the current batch so the operation executes before any * future commands. */ util_dynarray_append(&cmdbuf->state.batch->event_ops, struct panvk_event_op, op); panvk_per_arch(cmd_close_batch)(cmdbuf); panvk_cmd_preload_fb_after_batch_split(cmdbuf); panvk_cmd_open_batch(cmdbuf); } } static void panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf, struct panvk_event *event) { struct panvk_event_op op = { .type = PANVK_EVENT_OP_WAIT, .event = event, }; if (cmdbuf->state.batch == NULL) { /* No open batch, let's create a new one and have it wait for this event. */ panvk_cmd_open_batch(cmdbuf); util_dynarray_append(&cmdbuf->state.batch->event_ops, struct panvk_event_op, op); } else { /* Let's close the current batch so any future commands wait on the * event signal operation. */ if (cmdbuf->state.batch->fragment_job || cmdbuf->state.batch->scoreboard.first_job) { panvk_per_arch(cmd_close_batch)(cmdbuf); panvk_cmd_preload_fb_after_batch_split(cmdbuf); panvk_cmd_open_batch(cmdbuf); } util_dynarray_append(&cmdbuf->state.batch->event_ops, struct panvk_event_op, op); } } void panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event, const VkDependencyInfo *pDependencyInfo) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); VK_FROM_HANDLE(panvk_event, event, _event); /* vkCmdSetEvent cannot be called inside a render pass */ assert(cmdbuf->state.pass == NULL); panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_SET); } void panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer, VkEvent _event, VkPipelineStageFlags2 stageMask) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); VK_FROM_HANDLE(panvk_event, event, _event); /* vkCmdResetEvent cannot be called inside a render pass */ assert(cmdbuf->state.pass == NULL); panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_RESET); } void panvk_per_arch(CmdWaitEvents2)(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, const VkDependencyInfo *pDependencyInfos) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); assert(eventCount > 0); for (uint32_t i = 0; i < eventCount; i++) { VK_FROM_HANDLE(panvk_event, event, pEvents[i]); panvk_add_wait_event_operation(cmdbuf, event); } } static VkResult panvk_reset_cmdbuf(struct panvk_cmd_buffer *cmdbuf) { vk_command_buffer_reset(&cmdbuf->vk); cmdbuf->record_result = VK_SUCCESS; list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { list_del(&batch->node); util_dynarray_fini(&batch->jobs); util_dynarray_fini(&batch->event_ops); vk_free(&cmdbuf->pool->vk.alloc, batch); } panvk_pool_reset(&cmdbuf->desc_pool); panvk_pool_reset(&cmdbuf->tls_pool); panvk_pool_reset(&cmdbuf->varying_pool); cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; for (unsigned i = 0; i < MAX_BIND_POINTS; i++) memset(&cmdbuf->bind_points[i].desc_state.sets, 0, sizeof(cmdbuf->bind_points[0].desc_state.sets)); return cmdbuf->record_result; } static void panvk_destroy_cmdbuf(struct panvk_cmd_buffer *cmdbuf) { struct panvk_device *device = cmdbuf->device; list_del(&cmdbuf->pool_link); list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { list_del(&batch->node); util_dynarray_fini(&batch->jobs); util_dynarray_fini(&batch->event_ops); vk_free(&cmdbuf->pool->vk.alloc, batch); } panvk_pool_cleanup(&cmdbuf->desc_pool); panvk_pool_cleanup(&cmdbuf->tls_pool); panvk_pool_cleanup(&cmdbuf->varying_pool); vk_command_buffer_finish(&cmdbuf->vk); vk_free(&device->vk.alloc, cmdbuf); } static VkResult panvk_create_cmdbuf(struct panvk_device *device, struct panvk_cmd_pool *pool, VkCommandBufferLevel level, struct panvk_cmd_buffer **cmdbuf_out) { struct panvk_cmd_buffer *cmdbuf; cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); if (!cmdbuf) return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); VkResult result = vk_command_buffer_init(&cmdbuf->vk, &pool->vk, level); if (result != VK_SUCCESS) { vk_free(&device->vk.alloc, cmdbuf); return result; } cmdbuf->device = device; cmdbuf->pool = pool; if (pool) { list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers); cmdbuf->queue_family_index = pool->vk.queue_family_index; } else { /* Init the pool_link so we can safely call list_del when we destroy * the command buffer */ list_inithead(&cmdbuf->pool_link); cmdbuf->queue_family_index = PANVK_QUEUE_GENERAL; } panvk_pool_init(&cmdbuf->desc_pool, &device->physical_device->pdev, pool ? &pool->desc_bo_pool : NULL, 0, 64 * 1024, "Command buffer descriptor pool", true); panvk_pool_init(&cmdbuf->tls_pool, &device->physical_device->pdev, pool ? &pool->tls_bo_pool : NULL, panvk_debug_adjust_bo_flags(device, PAN_BO_INVISIBLE), 64 * 1024, "TLS pool", false); panvk_pool_init(&cmdbuf->varying_pool, &device->physical_device->pdev, pool ? &pool->varying_bo_pool : NULL, panvk_debug_adjust_bo_flags(device, PAN_BO_INVISIBLE), 64 * 1024, "Varyings pool", false); list_inithead(&cmdbuf->batches); cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; *cmdbuf_out = cmdbuf; return VK_SUCCESS; } VkResult panvk_per_arch(AllocateCommandBuffers)(VkDevice _device, const VkCommandBufferAllocateInfo *pAllocateInfo, VkCommandBuffer *pCommandBuffers) { VK_FROM_HANDLE(panvk_device, device, _device); VK_FROM_HANDLE(panvk_cmd_pool, pool, pAllocateInfo->commandPool); VkResult result = VK_SUCCESS; unsigned i; for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { struct panvk_cmd_buffer *cmdbuf = NULL; if (!list_is_empty(&pool->free_cmd_buffers)) { cmdbuf = list_first_entry( &pool->free_cmd_buffers, struct panvk_cmd_buffer, pool_link); list_del(&cmdbuf->pool_link); list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers); vk_command_buffer_finish(&cmdbuf->vk); result = vk_command_buffer_init(&cmdbuf->vk, &pool->vk, pAllocateInfo->level); } else { result = panvk_create_cmdbuf(device, pool, pAllocateInfo->level, &cmdbuf); } if (result != VK_SUCCESS) goto err_free_cmd_bufs; pCommandBuffers[i] = panvk_cmd_buffer_to_handle(cmdbuf); } return VK_SUCCESS; err_free_cmd_bufs: panvk_per_arch(FreeCommandBuffers)(_device, pAllocateInfo->commandPool, i, pCommandBuffers); for (unsigned j = 0; j < i; j++) pCommandBuffers[j] = VK_NULL_HANDLE; return result; } void panvk_per_arch(FreeCommandBuffers)(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, const VkCommandBuffer *pCommandBuffers) { for (uint32_t i = 0; i < commandBufferCount; i++) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, pCommandBuffers[i]); if (cmdbuf) { if (cmdbuf->pool) { list_del(&cmdbuf->pool_link); panvk_reset_cmdbuf(cmdbuf); list_addtail(&cmdbuf->pool_link, &cmdbuf->pool->free_cmd_buffers); } else panvk_destroy_cmdbuf(cmdbuf); } } } VkResult panvk_per_arch(ResetCommandBuffer)(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); return panvk_reset_cmdbuf(cmdbuf); } VkResult panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); VkResult result = VK_SUCCESS; if (cmdbuf->status != PANVK_CMD_BUFFER_STATUS_INITIAL) { /* If the command buffer has already been reset with * vkResetCommandBuffer, no need to do it again. */ result = panvk_reset_cmdbuf(cmdbuf); if (result != VK_SUCCESS) return result; } memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); cmdbuf->status = PANVK_CMD_BUFFER_STATUS_RECORDING; return VK_SUCCESS; } void panvk_per_arch(DestroyCommandPool)(VkDevice _device, VkCommandPool commandPool, const VkAllocationCallbacks *pAllocator) { VK_FROM_HANDLE(panvk_device, device, _device); VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, &pool->active_cmd_buffers, pool_link) panvk_destroy_cmdbuf(cmdbuf); list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, &pool->free_cmd_buffers, pool_link) panvk_destroy_cmdbuf(cmdbuf); panvk_bo_pool_cleanup(&pool->desc_bo_pool); panvk_bo_pool_cleanup(&pool->varying_bo_pool); panvk_bo_pool_cleanup(&pool->tls_bo_pool); vk_command_pool_finish(&pool->vk); vk_free2(&device->vk.alloc, pAllocator, pool); } VkResult panvk_per_arch(ResetCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags) { VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); VkResult result; list_for_each_entry(struct panvk_cmd_buffer, cmdbuf, &pool->active_cmd_buffers, pool_link) { result = panvk_reset_cmdbuf(cmdbuf); if (result != VK_SUCCESS) return result; } return VK_SUCCESS; } void panvk_per_arch(TrimCommandPool)(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlags flags) { VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); if (!pool) return; list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, &pool->free_cmd_buffers, pool_link) panvk_destroy_cmdbuf(cmdbuf); } void panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) { VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); const struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; struct panvk_dispatch_info dispatch = { .wg_count = { x, y, z }, }; panvk_per_arch(cmd_close_batch)(cmdbuf); struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); struct panvk_cmd_bind_point_state *bind_point_state = panvk_cmd_get_bind_point_state(cmdbuf, COMPUTE); struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; const struct panvk_pipeline *pipeline = bind_point_state->pipeline; struct panfrost_ptr job = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); struct panvk_sysvals *sysvals = &desc_state->sysvals; sysvals->num_work_groups.u32[0] = x; sysvals->num_work_groups.u32[1] = y; sysvals->num_work_groups.u32[2] = z; sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x; sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y; sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z; desc_state->sysvals_ptr = 0; panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); dispatch.tsd = batch->tls.gpu; panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state); dispatch.attributes = desc_state->non_vs_attribs; dispatch.attribute_bufs = desc_state->non_vs_attrib_bufs; panvk_cmd_prepare_ubos(cmdbuf, bind_point_state); dispatch.ubos = desc_state->ubos; panvk_cmd_prepare_textures(cmdbuf, bind_point_state); dispatch.textures = desc_state->textures; panvk_cmd_prepare_samplers(cmdbuf, bind_point_state); dispatch.samplers = desc_state->samplers; panvk_per_arch(emit_compute_job)(pipeline, &dispatch, job.cpu); panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, MALI_JOB_TYPE_COMPUTE, false, false, 0, 0, &job, false); batch->tlsinfo.tls.size = pipeline->tls_size; batch->tlsinfo.wls.size = pipeline->wls_size; if (batch->tlsinfo.wls.size) { batch->wls_total_size = pan_wls_mem_size(pdev, &dispatch.wg_count, batch->tlsinfo.wls.size); } panvk_per_arch(cmd_close_batch)(cmdbuf); desc_state->dirty = 0; }