1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2019 Raspberry Pi Ltd 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "v3dv_private.h" 25bf215546Sopenharmony_ci#include "util/u_pack_color.h" 26bf215546Sopenharmony_ci#include "vk_util.h" 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_civoid 29bf215546Sopenharmony_civ3dv_job_add_bo(struct v3dv_job *job, struct v3dv_bo *bo) 30bf215546Sopenharmony_ci{ 31bf215546Sopenharmony_ci if (!bo) 32bf215546Sopenharmony_ci return; 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci if (job->bo_handle_mask & bo->handle_bit) { 35bf215546Sopenharmony_ci if (_mesa_set_search(job->bos, bo)) 36bf215546Sopenharmony_ci return; 37bf215546Sopenharmony_ci } 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci _mesa_set_add(job->bos, bo); 40bf215546Sopenharmony_ci job->bo_count++; 41bf215546Sopenharmony_ci job->bo_handle_mask |= bo->handle_bit; 42bf215546Sopenharmony_ci} 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_civoid 45bf215546Sopenharmony_civ3dv_job_add_bo_unchecked(struct v3dv_job *job, struct v3dv_bo *bo) 46bf215546Sopenharmony_ci{ 47bf215546Sopenharmony_ci assert(bo); 48bf215546Sopenharmony_ci _mesa_set_add(job->bos, bo); 49bf215546Sopenharmony_ci job->bo_count++; 50bf215546Sopenharmony_ci job->bo_handle_mask |= bo->handle_bit; 51bf215546Sopenharmony_ci} 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_cistatic void 54bf215546Sopenharmony_cicmd_buffer_init(struct v3dv_cmd_buffer *cmd_buffer, 55bf215546Sopenharmony_ci struct v3dv_device *device) 56bf215546Sopenharmony_ci{ 57bf215546Sopenharmony_ci /* Do not reset the base object! If we are calling this from a command 58bf215546Sopenharmony_ci * buffer reset that would reset the loader's dispatch table for the 59bf215546Sopenharmony_ci * command buffer, and any other relevant info from vk_object_base 60bf215546Sopenharmony_ci */ 61bf215546Sopenharmony_ci const uint32_t base_size = sizeof(struct vk_command_buffer); 62bf215546Sopenharmony_ci uint8_t *cmd_buffer_driver_start = ((uint8_t *) cmd_buffer) + base_size; 63bf215546Sopenharmony_ci memset(cmd_buffer_driver_start, 0, sizeof(*cmd_buffer) - base_size); 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci cmd_buffer->device = device; 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci list_inithead(&cmd_buffer->private_objs); 68bf215546Sopenharmony_ci list_inithead(&cmd_buffer->jobs); 69bf215546Sopenharmony_ci list_inithead(&cmd_buffer->list_link); 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx = -1; 72bf215546Sopenharmony_ci cmd_buffer->state.meta.subpass_idx = -1; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_ci cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_INITIALIZED; 75bf215546Sopenharmony_ci} 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_cistatic void cmd_buffer_destroy(struct vk_command_buffer *cmd_buffer); 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_cistatic VkResult 80bf215546Sopenharmony_cicmd_buffer_create(struct v3dv_device *device, 81bf215546Sopenharmony_ci struct vk_command_pool *pool, 82bf215546Sopenharmony_ci VkCommandBufferLevel level, 83bf215546Sopenharmony_ci VkCommandBuffer *pCommandBuffer) 84bf215546Sopenharmony_ci{ 85bf215546Sopenharmony_ci struct v3dv_cmd_buffer *cmd_buffer; 86bf215546Sopenharmony_ci cmd_buffer = vk_zalloc(&pool->alloc, 87bf215546Sopenharmony_ci sizeof(*cmd_buffer), 88bf215546Sopenharmony_ci 8, 89bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 90bf215546Sopenharmony_ci if (cmd_buffer == NULL) 91bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci VkResult result; 94bf215546Sopenharmony_ci result = vk_command_buffer_init(&cmd_buffer->vk, pool, level); 95bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 96bf215546Sopenharmony_ci vk_free(&pool->alloc, cmd_buffer); 97bf215546Sopenharmony_ci return result; 98bf215546Sopenharmony_ci } 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci cmd_buffer->vk.destroy = cmd_buffer_destroy; 101bf215546Sopenharmony_ci cmd_buffer_init(cmd_buffer, device); 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci *pCommandBuffer = v3dv_cmd_buffer_to_handle(cmd_buffer); 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci return VK_SUCCESS; 106bf215546Sopenharmony_ci} 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_cistatic void 109bf215546Sopenharmony_cijob_destroy_gpu_cl_resources(struct v3dv_job *job) 110bf215546Sopenharmony_ci{ 111bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_GPU_CL || 112bf215546Sopenharmony_ci job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY); 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci v3dv_cl_destroy(&job->bcl); 115bf215546Sopenharmony_ci v3dv_cl_destroy(&job->rcl); 116bf215546Sopenharmony_ci v3dv_cl_destroy(&job->indirect); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci /* Since we don't ref BOs when we add them to the command buffer, don't 119bf215546Sopenharmony_ci * unref them here either. Bo's will be freed when their corresponding API 120bf215546Sopenharmony_ci * objects are destroyed. 121bf215546Sopenharmony_ci */ 122bf215546Sopenharmony_ci _mesa_set_destroy(job->bos, NULL); 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci v3dv_bo_free(job->device, job->tile_alloc); 125bf215546Sopenharmony_ci v3dv_bo_free(job->device, job->tile_state); 126bf215546Sopenharmony_ci} 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_cistatic void 129bf215546Sopenharmony_cijob_destroy_cloned_gpu_cl_resources(struct v3dv_job *job) 130bf215546Sopenharmony_ci{ 131bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_GPU_CL); 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci list_for_each_entry_safe(struct v3dv_bo, bo, &job->bcl.bo_list, list_link) { 134bf215546Sopenharmony_ci list_del(&bo->list_link); 135bf215546Sopenharmony_ci vk_free(&job->device->vk.alloc, bo); 136bf215546Sopenharmony_ci } 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci list_for_each_entry_safe(struct v3dv_bo, bo, &job->rcl.bo_list, list_link) { 139bf215546Sopenharmony_ci list_del(&bo->list_link); 140bf215546Sopenharmony_ci vk_free(&job->device->vk.alloc, bo); 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci list_for_each_entry_safe(struct v3dv_bo, bo, &job->indirect.bo_list, list_link) { 144bf215546Sopenharmony_ci list_del(&bo->list_link); 145bf215546Sopenharmony_ci vk_free(&job->device->vk.alloc, bo); 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci} 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_cistatic void 150bf215546Sopenharmony_cijob_destroy_gpu_csd_resources(struct v3dv_job *job) 151bf215546Sopenharmony_ci{ 152bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_GPU_CSD); 153bf215546Sopenharmony_ci assert(job->cmd_buffer); 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci v3dv_cl_destroy(&job->indirect); 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci _mesa_set_destroy(job->bos, NULL); 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci if (job->csd.shared_memory) 160bf215546Sopenharmony_ci v3dv_bo_free(job->device, job->csd.shared_memory); 161bf215546Sopenharmony_ci} 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_cistatic void 164bf215546Sopenharmony_cijob_destroy_cpu_wait_events_resources(struct v3dv_job *job) 165bf215546Sopenharmony_ci{ 166bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_CPU_WAIT_EVENTS); 167bf215546Sopenharmony_ci assert(job->cmd_buffer); 168bf215546Sopenharmony_ci vk_free(&job->cmd_buffer->device->vk.alloc, job->cpu.event_wait.events); 169bf215546Sopenharmony_ci} 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_civoid 172bf215546Sopenharmony_civ3dv_job_destroy(struct v3dv_job *job) 173bf215546Sopenharmony_ci{ 174bf215546Sopenharmony_ci assert(job); 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci list_del(&job->list_link); 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci /* Cloned jobs don't make deep copies of the original jobs, so they don't 179bf215546Sopenharmony_ci * own any of their resources. However, they do allocate clones of BO 180bf215546Sopenharmony_ci * structs, so make sure we free those. 181bf215546Sopenharmony_ci */ 182bf215546Sopenharmony_ci if (!job->is_clone) { 183bf215546Sopenharmony_ci switch (job->type) { 184bf215546Sopenharmony_ci case V3DV_JOB_TYPE_GPU_CL: 185bf215546Sopenharmony_ci case V3DV_JOB_TYPE_GPU_CL_SECONDARY: 186bf215546Sopenharmony_ci job_destroy_gpu_cl_resources(job); 187bf215546Sopenharmony_ci break; 188bf215546Sopenharmony_ci case V3DV_JOB_TYPE_GPU_CSD: 189bf215546Sopenharmony_ci job_destroy_gpu_csd_resources(job); 190bf215546Sopenharmony_ci break; 191bf215546Sopenharmony_ci case V3DV_JOB_TYPE_CPU_WAIT_EVENTS: 192bf215546Sopenharmony_ci job_destroy_cpu_wait_events_resources(job); 193bf215546Sopenharmony_ci break; 194bf215546Sopenharmony_ci default: 195bf215546Sopenharmony_ci break; 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci } else { 198bf215546Sopenharmony_ci /* Cloned jobs */ 199bf215546Sopenharmony_ci if (job->type == V3DV_JOB_TYPE_GPU_CL) 200bf215546Sopenharmony_ci job_destroy_cloned_gpu_cl_resources(job); 201bf215546Sopenharmony_ci } 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci vk_free(&job->device->vk.alloc, job); 204bf215546Sopenharmony_ci} 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_civoid 207bf215546Sopenharmony_civ3dv_cmd_buffer_add_private_obj(struct v3dv_cmd_buffer *cmd_buffer, 208bf215546Sopenharmony_ci uint64_t obj, 209bf215546Sopenharmony_ci v3dv_cmd_buffer_private_obj_destroy_cb destroy_cb) 210bf215546Sopenharmony_ci{ 211bf215546Sopenharmony_ci struct v3dv_cmd_buffer_private_obj *pobj = 212bf215546Sopenharmony_ci vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(*pobj), 8, 213bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 214bf215546Sopenharmony_ci if (!pobj) { 215bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 216bf215546Sopenharmony_ci return; 217bf215546Sopenharmony_ci } 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_ci pobj->obj = obj; 220bf215546Sopenharmony_ci pobj->destroy_cb = destroy_cb; 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci list_addtail(&pobj->list_link, &cmd_buffer->private_objs); 223bf215546Sopenharmony_ci} 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_cistatic void 226bf215546Sopenharmony_cicmd_buffer_destroy_private_obj(struct v3dv_cmd_buffer *cmd_buffer, 227bf215546Sopenharmony_ci struct v3dv_cmd_buffer_private_obj *pobj) 228bf215546Sopenharmony_ci{ 229bf215546Sopenharmony_ci assert(pobj && pobj->obj && pobj->destroy_cb); 230bf215546Sopenharmony_ci pobj->destroy_cb(v3dv_device_to_handle(cmd_buffer->device), 231bf215546Sopenharmony_ci pobj->obj, 232bf215546Sopenharmony_ci &cmd_buffer->device->vk.alloc); 233bf215546Sopenharmony_ci list_del(&pobj->list_link); 234bf215546Sopenharmony_ci vk_free(&cmd_buffer->device->vk.alloc, pobj); 235bf215546Sopenharmony_ci} 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_cistatic void 238bf215546Sopenharmony_cicmd_buffer_free_resources(struct v3dv_cmd_buffer *cmd_buffer) 239bf215546Sopenharmony_ci{ 240bf215546Sopenharmony_ci list_for_each_entry_safe(struct v3dv_job, job, 241bf215546Sopenharmony_ci &cmd_buffer->jobs, list_link) { 242bf215546Sopenharmony_ci v3dv_job_destroy(job); 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci if (cmd_buffer->state.job) 246bf215546Sopenharmony_ci v3dv_job_destroy(cmd_buffer->state.job); 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci if (cmd_buffer->state.attachments) 249bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer->state.attachments); 250bf215546Sopenharmony_ci 251bf215546Sopenharmony_ci if (cmd_buffer->state.query.end.alloc_count > 0) 252bf215546Sopenharmony_ci vk_free(&cmd_buffer->device->vk.alloc, cmd_buffer->state.query.end.states); 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci if (cmd_buffer->push_constants_resource.bo) 255bf215546Sopenharmony_ci v3dv_bo_free(cmd_buffer->device, cmd_buffer->push_constants_resource.bo); 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci list_for_each_entry_safe(struct v3dv_cmd_buffer_private_obj, pobj, 258bf215546Sopenharmony_ci &cmd_buffer->private_objs, list_link) { 259bf215546Sopenharmony_ci cmd_buffer_destroy_private_obj(cmd_buffer, pobj); 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci if (cmd_buffer->state.meta.attachments) { 263bf215546Sopenharmony_ci assert(cmd_buffer->state.meta.attachment_alloc_count > 0); 264bf215546Sopenharmony_ci vk_free(&cmd_buffer->device->vk.alloc, cmd_buffer->state.meta.attachments); 265bf215546Sopenharmony_ci } 266bf215546Sopenharmony_ci} 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_cistatic void 269bf215546Sopenharmony_cicmd_buffer_destroy(struct vk_command_buffer *vk_cmd_buffer) 270bf215546Sopenharmony_ci{ 271bf215546Sopenharmony_ci struct v3dv_cmd_buffer *cmd_buffer = 272bf215546Sopenharmony_ci container_of(vk_cmd_buffer, struct v3dv_cmd_buffer, vk); 273bf215546Sopenharmony_ci 274bf215546Sopenharmony_ci cmd_buffer_free_resources(cmd_buffer); 275bf215546Sopenharmony_ci vk_command_buffer_finish(&cmd_buffer->vk); 276bf215546Sopenharmony_ci vk_free(&cmd_buffer->vk.pool->alloc, cmd_buffer); 277bf215546Sopenharmony_ci} 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_cistatic bool 280bf215546Sopenharmony_cicmd_buffer_can_merge_subpass(struct v3dv_cmd_buffer *cmd_buffer, 281bf215546Sopenharmony_ci uint32_t subpass_idx) 282bf215546Sopenharmony_ci{ 283bf215546Sopenharmony_ci const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 284bf215546Sopenharmony_ci assert(state->pass); 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci const struct v3dv_physical_device *physical_device = 287bf215546Sopenharmony_ci &cmd_buffer->device->instance->physicalDevice; 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_ci if (cmd_buffer->vk.level != VK_COMMAND_BUFFER_LEVEL_PRIMARY) 290bf215546Sopenharmony_ci return false; 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci if (!cmd_buffer->state.job) 293bf215546Sopenharmony_ci return false; 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci if (cmd_buffer->state.job->always_flush) 296bf215546Sopenharmony_ci return false; 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci if (!physical_device->options.merge_jobs) 299bf215546Sopenharmony_ci return false; 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci /* Each render pass starts a new job */ 302bf215546Sopenharmony_ci if (subpass_idx == 0) 303bf215546Sopenharmony_ci return false; 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_ci /* Two subpasses can be merged in the same job if we can emit a single RCL 306bf215546Sopenharmony_ci * for them (since the RCL includes the END_OF_RENDERING command that 307bf215546Sopenharmony_ci * triggers the "render job finished" interrupt). We can do this so long 308bf215546Sopenharmony_ci * as both subpasses render against the same attachments. 309bf215546Sopenharmony_ci */ 310bf215546Sopenharmony_ci assert(state->subpass_idx == subpass_idx - 1); 311bf215546Sopenharmony_ci struct v3dv_subpass *prev_subpass = &state->pass->subpasses[state->subpass_idx]; 312bf215546Sopenharmony_ci struct v3dv_subpass *subpass = &state->pass->subpasses[subpass_idx]; 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci if (subpass->ds_attachment.attachment != 315bf215546Sopenharmony_ci prev_subpass->ds_attachment.attachment) 316bf215546Sopenharmony_ci return false; 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_ci if (subpass->color_count != prev_subpass->color_count) 319bf215546Sopenharmony_ci return false; 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; i++) { 322bf215546Sopenharmony_ci if (subpass->color_attachments[i].attachment != 323bf215546Sopenharmony_ci prev_subpass->color_attachments[i].attachment) { 324bf215546Sopenharmony_ci return false; 325bf215546Sopenharmony_ci } 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci /* Don't merge if the subpasses have different view masks, since in that 329bf215546Sopenharmony_ci * case the framebuffer setup is different and we need to emit different 330bf215546Sopenharmony_ci * RCLs. 331bf215546Sopenharmony_ci */ 332bf215546Sopenharmony_ci if (subpass->view_mask != prev_subpass->view_mask) 333bf215546Sopenharmony_ci return false; 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci /* FIXME: Since some attachment formats can't be resolved using the TLB we 336bf215546Sopenharmony_ci * need to emit separate resolve jobs for them and that would not be 337bf215546Sopenharmony_ci * compatible with subpass merges. We could fix that by testing if any of 338bf215546Sopenharmony_ci * the attachments to resolve doesn't support TLB resolves. 339bf215546Sopenharmony_ci */ 340bf215546Sopenharmony_ci if (prev_subpass->resolve_attachments || subpass->resolve_attachments || 341bf215546Sopenharmony_ci prev_subpass->resolve_depth || prev_subpass->resolve_stencil || 342bf215546Sopenharmony_ci subpass->resolve_depth || subpass->resolve_stencil) { 343bf215546Sopenharmony_ci return false; 344bf215546Sopenharmony_ci } 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci return true; 347bf215546Sopenharmony_ci} 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci/** 350bf215546Sopenharmony_ci * Computes and sets the job frame tiling information required to setup frame 351bf215546Sopenharmony_ci * binning and rendering. 352bf215546Sopenharmony_ci */ 353bf215546Sopenharmony_cistatic struct v3dv_frame_tiling * 354bf215546Sopenharmony_cijob_compute_frame_tiling(struct v3dv_job *job, 355bf215546Sopenharmony_ci uint32_t width, 356bf215546Sopenharmony_ci uint32_t height, 357bf215546Sopenharmony_ci uint32_t layers, 358bf215546Sopenharmony_ci uint32_t render_target_count, 359bf215546Sopenharmony_ci uint8_t max_internal_bpp, 360bf215546Sopenharmony_ci bool msaa) 361bf215546Sopenharmony_ci{ 362bf215546Sopenharmony_ci assert(job); 363bf215546Sopenharmony_ci struct v3dv_frame_tiling *tiling = &job->frame_tiling; 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_ci tiling->width = width; 366bf215546Sopenharmony_ci tiling->height = height; 367bf215546Sopenharmony_ci tiling->layers = layers; 368bf215546Sopenharmony_ci tiling->render_target_count = render_target_count; 369bf215546Sopenharmony_ci tiling->msaa = msaa; 370bf215546Sopenharmony_ci tiling->internal_bpp = max_internal_bpp; 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ci /* We can use double-buffer when MSAA is disabled to reduce tile store 373bf215546Sopenharmony_ci * overhead. 374bf215546Sopenharmony_ci * 375bf215546Sopenharmony_ci * FIXME: if we are emitting any tile loads the hardware will serialize 376bf215546Sopenharmony_ci * loads and stores across tiles effectivley disabling double buffering, 377bf215546Sopenharmony_ci * so we would want to check for that and not enable it in that case to 378bf215546Sopenharmony_ci * avoid reducing the tile size. 379bf215546Sopenharmony_ci */ 380bf215546Sopenharmony_ci tiling->double_buffer = 381bf215546Sopenharmony_ci unlikely(V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa; 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci assert(!tiling->msaa || !tiling->double_buffer); 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci v3d_choose_tile_size(render_target_count, max_internal_bpp, 386bf215546Sopenharmony_ci tiling->msaa, tiling->double_buffer, 387bf215546Sopenharmony_ci &tiling->tile_width, &tiling->tile_height); 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci tiling->draw_tiles_x = DIV_ROUND_UP(width, tiling->tile_width); 390bf215546Sopenharmony_ci tiling->draw_tiles_y = DIV_ROUND_UP(height, tiling->tile_height); 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_ci /* Size up our supertiles until we get under the limit */ 393bf215546Sopenharmony_ci const uint32_t max_supertiles = 256; 394bf215546Sopenharmony_ci tiling->supertile_width = 1; 395bf215546Sopenharmony_ci tiling->supertile_height = 1; 396bf215546Sopenharmony_ci for (;;) { 397bf215546Sopenharmony_ci tiling->frame_width_in_supertiles = 398bf215546Sopenharmony_ci DIV_ROUND_UP(tiling->draw_tiles_x, tiling->supertile_width); 399bf215546Sopenharmony_ci tiling->frame_height_in_supertiles = 400bf215546Sopenharmony_ci DIV_ROUND_UP(tiling->draw_tiles_y, tiling->supertile_height); 401bf215546Sopenharmony_ci const uint32_t num_supertiles = tiling->frame_width_in_supertiles * 402bf215546Sopenharmony_ci tiling->frame_height_in_supertiles; 403bf215546Sopenharmony_ci if (num_supertiles < max_supertiles) 404bf215546Sopenharmony_ci break; 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci if (tiling->supertile_width < tiling->supertile_height) 407bf215546Sopenharmony_ci tiling->supertile_width++; 408bf215546Sopenharmony_ci else 409bf215546Sopenharmony_ci tiling->supertile_height++; 410bf215546Sopenharmony_ci } 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci return tiling; 413bf215546Sopenharmony_ci} 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_civoid 416bf215546Sopenharmony_civ3dv_job_start_frame(struct v3dv_job *job, 417bf215546Sopenharmony_ci uint32_t width, 418bf215546Sopenharmony_ci uint32_t height, 419bf215546Sopenharmony_ci uint32_t layers, 420bf215546Sopenharmony_ci bool allocate_tile_state_for_all_layers, 421bf215546Sopenharmony_ci uint32_t render_target_count, 422bf215546Sopenharmony_ci uint8_t max_internal_bpp, 423bf215546Sopenharmony_ci bool msaa) 424bf215546Sopenharmony_ci{ 425bf215546Sopenharmony_ci assert(job); 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_ci /* Start by computing frame tiling spec for this job */ 428bf215546Sopenharmony_ci const struct v3dv_frame_tiling *tiling = 429bf215546Sopenharmony_ci job_compute_frame_tiling(job, 430bf215546Sopenharmony_ci width, height, layers, 431bf215546Sopenharmony_ci render_target_count, max_internal_bpp, msaa); 432bf215546Sopenharmony_ci 433bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, 256); 434bf215546Sopenharmony_ci v3dv_return_if_oom(NULL, job); 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci /* We only need to allocate tile state for all layers if the binner 437bf215546Sopenharmony_ci * writes primitives to layers other than the first. This can only be 438bf215546Sopenharmony_ci * done using layered rendering (writing gl_Layer from a geometry shader), 439bf215546Sopenharmony_ci * so for other cases of multilayered framebuffers (typically with 440bf215546Sopenharmony_ci * meta copy/clear operations) that won't use layered rendering, we only 441bf215546Sopenharmony_ci * need one layer worth of of tile state for the binner. 442bf215546Sopenharmony_ci */ 443bf215546Sopenharmony_ci if (!allocate_tile_state_for_all_layers) 444bf215546Sopenharmony_ci layers = 1; 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci /* The PTB will request the tile alloc initial size per tile at start 447bf215546Sopenharmony_ci * of tile binning. 448bf215546Sopenharmony_ci */ 449bf215546Sopenharmony_ci uint32_t tile_alloc_size = 64 * tiling->layers * 450bf215546Sopenharmony_ci tiling->draw_tiles_x * 451bf215546Sopenharmony_ci tiling->draw_tiles_y; 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci /* The PTB allocates in aligned 4k chunks after the initial setup. */ 454bf215546Sopenharmony_ci tile_alloc_size = align(tile_alloc_size, 4096); 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci /* Include the first two chunk allocations that the PTB does so that 457bf215546Sopenharmony_ci * we definitely clear the OOM condition before triggering one (the HW 458bf215546Sopenharmony_ci * won't trigger OOM during the first allocations). 459bf215546Sopenharmony_ci */ 460bf215546Sopenharmony_ci tile_alloc_size += 8192; 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci /* For performance, allocate some extra initial memory after the PTB's 463bf215546Sopenharmony_ci * minimal allocations, so that we hopefully don't have to block the 464bf215546Sopenharmony_ci * GPU on the kernel handling an OOM signal. 465bf215546Sopenharmony_ci */ 466bf215546Sopenharmony_ci tile_alloc_size += 512 * 1024; 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci job->tile_alloc = v3dv_bo_alloc(job->device, tile_alloc_size, 469bf215546Sopenharmony_ci "tile_alloc", true); 470bf215546Sopenharmony_ci if (!job->tile_alloc) { 471bf215546Sopenharmony_ci v3dv_flag_oom(NULL, job); 472bf215546Sopenharmony_ci return; 473bf215546Sopenharmony_ci } 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci v3dv_job_add_bo_unchecked(job, job->tile_alloc); 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci const uint32_t tsda_per_tile_size = 256; 478bf215546Sopenharmony_ci const uint32_t tile_state_size = tiling->layers * 479bf215546Sopenharmony_ci tiling->draw_tiles_x * 480bf215546Sopenharmony_ci tiling->draw_tiles_y * 481bf215546Sopenharmony_ci tsda_per_tile_size; 482bf215546Sopenharmony_ci job->tile_state = v3dv_bo_alloc(job->device, tile_state_size, "TSDA", true); 483bf215546Sopenharmony_ci if (!job->tile_state) { 484bf215546Sopenharmony_ci v3dv_flag_oom(NULL, job); 485bf215546Sopenharmony_ci return; 486bf215546Sopenharmony_ci } 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci v3dv_job_add_bo_unchecked(job, job->tile_state); 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci v3dv_X(job->device, job_emit_binning_prolog)(job, tiling, layers); 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci job->ez_state = V3D_EZ_UNDECIDED; 493bf215546Sopenharmony_ci job->first_ez_state = V3D_EZ_UNDECIDED; 494bf215546Sopenharmony_ci} 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_cistatic void 497bf215546Sopenharmony_cicmd_buffer_end_render_pass_frame(struct v3dv_cmd_buffer *cmd_buffer) 498bf215546Sopenharmony_ci{ 499bf215546Sopenharmony_ci assert(cmd_buffer->state.job); 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci /* Typically, we have a single job for each subpass and we emit the job's RCL 502bf215546Sopenharmony_ci * here when we are ending the frame for the subpass. However, some commands 503bf215546Sopenharmony_ci * such as vkCmdClearAttachments need to run in their own separate job and 504bf215546Sopenharmony_ci * they emit their own RCL even if they execute inside a subpass. In this 505bf215546Sopenharmony_ci * scenario, we don't want to emit subpass RCL when we end the frame for 506bf215546Sopenharmony_ci * those jobs, so we only emit the subpass RCL if the job has not recorded 507bf215546Sopenharmony_ci * any RCL commands of its own. 508bf215546Sopenharmony_ci */ 509bf215546Sopenharmony_ci if (v3dv_cl_offset(&cmd_buffer->state.job->rcl) == 0) 510bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, cmd_buffer_emit_render_pass_rcl)(cmd_buffer); 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, job_emit_binning_flush)(cmd_buffer->state.job); 513bf215546Sopenharmony_ci} 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_cistruct v3dv_job * 516bf215546Sopenharmony_civ3dv_cmd_buffer_create_cpu_job(struct v3dv_device *device, 517bf215546Sopenharmony_ci enum v3dv_job_type type, 518bf215546Sopenharmony_ci struct v3dv_cmd_buffer *cmd_buffer, 519bf215546Sopenharmony_ci uint32_t subpass_idx) 520bf215546Sopenharmony_ci{ 521bf215546Sopenharmony_ci struct v3dv_job *job = vk_zalloc(&device->vk.alloc, 522bf215546Sopenharmony_ci sizeof(struct v3dv_job), 8, 523bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 524bf215546Sopenharmony_ci if (!job) { 525bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 526bf215546Sopenharmony_ci return NULL; 527bf215546Sopenharmony_ci } 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_ci v3dv_job_init(job, type, device, cmd_buffer, subpass_idx); 530bf215546Sopenharmony_ci return job; 531bf215546Sopenharmony_ci} 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_cistatic void 534bf215546Sopenharmony_cicmd_buffer_add_cpu_jobs_for_pending_state(struct v3dv_cmd_buffer *cmd_buffer) 535bf215546Sopenharmony_ci{ 536bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_ci if (state->query.end.used_count > 0) { 539bf215546Sopenharmony_ci const uint32_t query_count = state->query.end.used_count; 540bf215546Sopenharmony_ci for (uint32_t i = 0; i < query_count; i++) { 541bf215546Sopenharmony_ci assert(i < state->query.end.used_count); 542bf215546Sopenharmony_ci struct v3dv_job *job = 543bf215546Sopenharmony_ci v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 544bf215546Sopenharmony_ci V3DV_JOB_TYPE_CPU_END_QUERY, 545bf215546Sopenharmony_ci cmd_buffer, -1); 546bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci job->cpu.query_end = state->query.end.states[i]; 549bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 550bf215546Sopenharmony_ci } 551bf215546Sopenharmony_ci } 552bf215546Sopenharmony_ci} 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_civoid 555bf215546Sopenharmony_civ3dv_cmd_buffer_finish_job(struct v3dv_cmd_buffer *cmd_buffer) 556bf215546Sopenharmony_ci{ 557bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 558bf215546Sopenharmony_ci if (!job) 559bf215546Sopenharmony_ci return; 560bf215546Sopenharmony_ci 561bf215546Sopenharmony_ci /* Always clear BCL state after a job has been finished if we don't have 562bf215546Sopenharmony_ci * a pending graphics barrier that could consume it (BCL barriers only 563bf215546Sopenharmony_ci * apply to graphics jobs). This can happen if the application recorded 564bf215546Sopenharmony_ci * a barrier involving geometry stages but none of the draw calls in the 565bf215546Sopenharmony_ci * job actually required a binning sync. 566bf215546Sopenharmony_ci */ 567bf215546Sopenharmony_ci if (!(cmd_buffer->state.barrier.dst_mask & V3DV_BARRIER_GRAPHICS_BIT)) { 568bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_buffer_access = 0; 569bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_image_access = 0; 570bf215546Sopenharmony_ci } 571bf215546Sopenharmony_ci 572bf215546Sopenharmony_ci if (cmd_buffer->state.oom) { 573bf215546Sopenharmony_ci v3dv_job_destroy(job); 574bf215546Sopenharmony_ci cmd_buffer->state.job = NULL; 575bf215546Sopenharmony_ci return; 576bf215546Sopenharmony_ci } 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci /* If we have created a job for a command buffer then we should have 579bf215546Sopenharmony_ci * recorded something into it: if the job was started in a render pass, it 580bf215546Sopenharmony_ci * should at least have the start frame commands, otherwise, it should have 581bf215546Sopenharmony_ci * a transfer command. The only exception are secondary command buffers 582bf215546Sopenharmony_ci * inside a render pass. 583bf215546Sopenharmony_ci */ 584bf215546Sopenharmony_ci assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY || 585bf215546Sopenharmony_ci v3dv_cl_offset(&job->bcl) > 0); 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci /* When we merge multiple subpasses into the same job we must only emit one 588bf215546Sopenharmony_ci * RCL, so we do that here, when we decided that we need to finish the job. 589bf215546Sopenharmony_ci * Any rendering that happens outside a render pass is never merged, so 590bf215546Sopenharmony_ci * the RCL should have been emitted by the time we got here. 591bf215546Sopenharmony_ci */ 592bf215546Sopenharmony_ci assert(v3dv_cl_offset(&job->rcl) != 0 || cmd_buffer->state.pass); 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci /* If we are finishing a job inside a render pass we have two scenarios: 595bf215546Sopenharmony_ci * 596bf215546Sopenharmony_ci * 1. It is a regular CL, in which case we will submit the job to the GPU, 597bf215546Sopenharmony_ci * so we may need to generate an RCL and add a binning flush. 598bf215546Sopenharmony_ci * 599bf215546Sopenharmony_ci * 2. It is a partial CL recorded in a secondary command buffer, in which 600bf215546Sopenharmony_ci * case we are not submitting it directly to the GPU but rather branch to 601bf215546Sopenharmony_ci * it from a primary command buffer. In this case we just want to end 602bf215546Sopenharmony_ci * the BCL with a RETURN_FROM_SUB_LIST and the RCL and binning flush 603bf215546Sopenharmony_ci * will be the primary job that branches to this CL. 604bf215546Sopenharmony_ci */ 605bf215546Sopenharmony_ci if (cmd_buffer->state.pass) { 606bf215546Sopenharmony_ci if (job->type == V3DV_JOB_TYPE_GPU_CL) { 607bf215546Sopenharmony_ci cmd_buffer_end_render_pass_frame(cmd_buffer); 608bf215546Sopenharmony_ci } else { 609bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY); 610bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, cmd_buffer_end_render_pass_secondary)(cmd_buffer); 611bf215546Sopenharmony_ci } 612bf215546Sopenharmony_ci } 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 615bf215546Sopenharmony_ci cmd_buffer->state.job = NULL; 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_ci /* If we have recorded any state with this last GPU job that requires to 618bf215546Sopenharmony_ci * emit CPU jobs after the job is completed, add them now. The only 619bf215546Sopenharmony_ci * exception is secondary command buffers inside a render pass, because in 620bf215546Sopenharmony_ci * that case we want to defer this until we finish recording the primary 621bf215546Sopenharmony_ci * job into which we execute the secondary. 622bf215546Sopenharmony_ci */ 623bf215546Sopenharmony_ci if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY || 624bf215546Sopenharmony_ci !cmd_buffer->state.pass) { 625bf215546Sopenharmony_ci cmd_buffer_add_cpu_jobs_for_pending_state(cmd_buffer); 626bf215546Sopenharmony_ci } 627bf215546Sopenharmony_ci} 628bf215546Sopenharmony_ci 629bf215546Sopenharmony_cibool 630bf215546Sopenharmony_civ3dv_job_type_is_gpu(struct v3dv_job *job) 631bf215546Sopenharmony_ci{ 632bf215546Sopenharmony_ci switch (job->type) { 633bf215546Sopenharmony_ci case V3DV_JOB_TYPE_GPU_CL: 634bf215546Sopenharmony_ci case V3DV_JOB_TYPE_GPU_CL_SECONDARY: 635bf215546Sopenharmony_ci case V3DV_JOB_TYPE_GPU_TFU: 636bf215546Sopenharmony_ci case V3DV_JOB_TYPE_GPU_CSD: 637bf215546Sopenharmony_ci return true; 638bf215546Sopenharmony_ci default: 639bf215546Sopenharmony_ci return false; 640bf215546Sopenharmony_ci } 641bf215546Sopenharmony_ci} 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_cistatic void 644bf215546Sopenharmony_cicmd_buffer_serialize_job_if_needed(struct v3dv_cmd_buffer *cmd_buffer, 645bf215546Sopenharmony_ci struct v3dv_job *job) 646bf215546Sopenharmony_ci{ 647bf215546Sopenharmony_ci assert(cmd_buffer && job); 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci /* Serialization only affects GPU jobs, CPU jobs are always automatically 650bf215546Sopenharmony_ci * serialized. 651bf215546Sopenharmony_ci */ 652bf215546Sopenharmony_ci if (!v3dv_job_type_is_gpu(job)) 653bf215546Sopenharmony_ci return; 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_ci uint8_t barrier_mask = cmd_buffer->state.barrier.dst_mask; 656bf215546Sopenharmony_ci if (barrier_mask == 0) 657bf215546Sopenharmony_ci return; 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci uint8_t bit = 0; 660bf215546Sopenharmony_ci uint8_t *src_mask; 661bf215546Sopenharmony_ci if (job->type == V3DV_JOB_TYPE_GPU_CSD) { 662bf215546Sopenharmony_ci assert(!job->is_transfer); 663bf215546Sopenharmony_ci bit = V3DV_BARRIER_COMPUTE_BIT; 664bf215546Sopenharmony_ci src_mask = &cmd_buffer->state.barrier.src_mask_compute; 665bf215546Sopenharmony_ci } else if (job->is_transfer) { 666bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_GPU_CL || 667bf215546Sopenharmony_ci job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY || 668bf215546Sopenharmony_ci job->type == V3DV_JOB_TYPE_GPU_TFU); 669bf215546Sopenharmony_ci bit = V3DV_BARRIER_TRANSFER_BIT; 670bf215546Sopenharmony_ci src_mask = &cmd_buffer->state.barrier.src_mask_transfer; 671bf215546Sopenharmony_ci } else { 672bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_GPU_CL || 673bf215546Sopenharmony_ci job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY); 674bf215546Sopenharmony_ci bit = V3DV_BARRIER_GRAPHICS_BIT; 675bf215546Sopenharmony_ci src_mask = &cmd_buffer->state.barrier.src_mask_graphics; 676bf215546Sopenharmony_ci } 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci if (barrier_mask & bit) { 679bf215546Sopenharmony_ci job->serialize = *src_mask; 680bf215546Sopenharmony_ci *src_mask = 0; 681bf215546Sopenharmony_ci cmd_buffer->state.barrier.dst_mask &= ~bit; 682bf215546Sopenharmony_ci } 683bf215546Sopenharmony_ci} 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_civoid 686bf215546Sopenharmony_civ3dv_job_init(struct v3dv_job *job, 687bf215546Sopenharmony_ci enum v3dv_job_type type, 688bf215546Sopenharmony_ci struct v3dv_device *device, 689bf215546Sopenharmony_ci struct v3dv_cmd_buffer *cmd_buffer, 690bf215546Sopenharmony_ci int32_t subpass_idx) 691bf215546Sopenharmony_ci{ 692bf215546Sopenharmony_ci assert(job); 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci /* Make sure we haven't made this new job current before calling here */ 695bf215546Sopenharmony_ci assert(!cmd_buffer || cmd_buffer->state.job != job); 696bf215546Sopenharmony_ci 697bf215546Sopenharmony_ci job->type = type; 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci job->device = device; 700bf215546Sopenharmony_ci job->cmd_buffer = cmd_buffer; 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci list_inithead(&job->list_link); 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci if (type == V3DV_JOB_TYPE_GPU_CL || 705bf215546Sopenharmony_ci type == V3DV_JOB_TYPE_GPU_CL_SECONDARY || 706bf215546Sopenharmony_ci type == V3DV_JOB_TYPE_GPU_CSD) { 707bf215546Sopenharmony_ci job->bos = 708bf215546Sopenharmony_ci _mesa_set_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal); 709bf215546Sopenharmony_ci job->bo_count = 0; 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci v3dv_cl_init(job, &job->indirect); 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci if (unlikely(V3D_DEBUG & V3D_DEBUG_ALWAYS_FLUSH)) 714bf215546Sopenharmony_ci job->always_flush = true; 715bf215546Sopenharmony_ci } 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci if (type == V3DV_JOB_TYPE_GPU_CL || 718bf215546Sopenharmony_ci type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) { 719bf215546Sopenharmony_ci v3dv_cl_init(job, &job->bcl); 720bf215546Sopenharmony_ci v3dv_cl_init(job, &job->rcl); 721bf215546Sopenharmony_ci } 722bf215546Sopenharmony_ci 723bf215546Sopenharmony_ci if (cmd_buffer) { 724bf215546Sopenharmony_ci /* Flag all state as dirty. Generally, we need to re-emit state for each 725bf215546Sopenharmony_ci * new job. 726bf215546Sopenharmony_ci * 727bf215546Sopenharmony_ci * FIXME: there may be some exceptions, in which case we could skip some 728bf215546Sopenharmony_ci * bits. 729bf215546Sopenharmony_ci */ 730bf215546Sopenharmony_ci cmd_buffer->state.dirty = ~0; 731bf215546Sopenharmony_ci cmd_buffer->state.dirty_descriptor_stages = ~0; 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci /* Honor inheritance of occlussion queries in secondaries if requested */ 734bf215546Sopenharmony_ci if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && 735bf215546Sopenharmony_ci cmd_buffer->state.inheritance.occlusion_query_enable) { 736bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY; 737bf215546Sopenharmony_ci } 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci /* Keep track of the first subpass that we are recording in this new job. 740bf215546Sopenharmony_ci * We will use this when we emit the RCL to decide how to emit our loads 741bf215546Sopenharmony_ci * and stores. 742bf215546Sopenharmony_ci */ 743bf215546Sopenharmony_ci if (cmd_buffer->state.pass) 744bf215546Sopenharmony_ci job->first_subpass = subpass_idx; 745bf215546Sopenharmony_ci 746bf215546Sopenharmony_ci job->is_transfer = cmd_buffer->state.is_transfer; 747bf215546Sopenharmony_ci 748bf215546Sopenharmony_ci cmd_buffer_serialize_job_if_needed(cmd_buffer, job); 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci job->perf = cmd_buffer->state.query.active_query.perf; 751bf215546Sopenharmony_ci } 752bf215546Sopenharmony_ci} 753bf215546Sopenharmony_ci 754bf215546Sopenharmony_cistruct v3dv_job * 755bf215546Sopenharmony_civ3dv_cmd_buffer_start_job(struct v3dv_cmd_buffer *cmd_buffer, 756bf215546Sopenharmony_ci int32_t subpass_idx, 757bf215546Sopenharmony_ci enum v3dv_job_type type) 758bf215546Sopenharmony_ci{ 759bf215546Sopenharmony_ci /* Don't create a new job if we can merge the current subpass into 760bf215546Sopenharmony_ci * the current job. 761bf215546Sopenharmony_ci */ 762bf215546Sopenharmony_ci if (cmd_buffer->state.pass && 763bf215546Sopenharmony_ci subpass_idx != -1 && 764bf215546Sopenharmony_ci cmd_buffer_can_merge_subpass(cmd_buffer, subpass_idx)) { 765bf215546Sopenharmony_ci cmd_buffer->state.job->is_subpass_finish = false; 766bf215546Sopenharmony_ci return cmd_buffer->state.job; 767bf215546Sopenharmony_ci } 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci /* Ensure we are not starting a new job without finishing a previous one */ 770bf215546Sopenharmony_ci if (cmd_buffer->state.job != NULL) 771bf215546Sopenharmony_ci v3dv_cmd_buffer_finish_job(cmd_buffer); 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci assert(cmd_buffer->state.job == NULL); 774bf215546Sopenharmony_ci struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc, 775bf215546Sopenharmony_ci sizeof(struct v3dv_job), 8, 776bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 777bf215546Sopenharmony_ci 778bf215546Sopenharmony_ci if (!job) { 779bf215546Sopenharmony_ci fprintf(stderr, "Error: failed to allocate CPU memory for job\n"); 780bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 781bf215546Sopenharmony_ci return NULL; 782bf215546Sopenharmony_ci } 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci v3dv_job_init(job, type, cmd_buffer->device, cmd_buffer, subpass_idx); 785bf215546Sopenharmony_ci cmd_buffer->state.job = job; 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_ci return job; 788bf215546Sopenharmony_ci} 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_cistatic VkResult 791bf215546Sopenharmony_cicmd_buffer_reset(struct v3dv_cmd_buffer *cmd_buffer, 792bf215546Sopenharmony_ci VkCommandBufferResetFlags flags) 793bf215546Sopenharmony_ci{ 794bf215546Sopenharmony_ci vk_command_buffer_reset(&cmd_buffer->vk); 795bf215546Sopenharmony_ci if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_INITIALIZED) { 796bf215546Sopenharmony_ci struct v3dv_device *device = cmd_buffer->device; 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci /* FIXME: For now we always free all resources as if 799bf215546Sopenharmony_ci * VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT was set. 800bf215546Sopenharmony_ci */ 801bf215546Sopenharmony_ci if (cmd_buffer->status != V3DV_CMD_BUFFER_STATUS_NEW) 802bf215546Sopenharmony_ci cmd_buffer_free_resources(cmd_buffer); 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_ci cmd_buffer_init(cmd_buffer, device); 805bf215546Sopenharmony_ci } 806bf215546Sopenharmony_ci 807bf215546Sopenharmony_ci assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_INITIALIZED); 808bf215546Sopenharmony_ci return VK_SUCCESS; 809bf215546Sopenharmony_ci} 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 812bf215546Sopenharmony_civ3dv_AllocateCommandBuffers(VkDevice _device, 813bf215546Sopenharmony_ci const VkCommandBufferAllocateInfo *pAllocateInfo, 814bf215546Sopenharmony_ci VkCommandBuffer *pCommandBuffers) 815bf215546Sopenharmony_ci{ 816bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_device, device, _device); 817bf215546Sopenharmony_ci VK_FROM_HANDLE(vk_command_pool, pool, pAllocateInfo->commandPool); 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 820bf215546Sopenharmony_ci uint32_t i; 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { 823bf215546Sopenharmony_ci result = cmd_buffer_create(device, pool, pAllocateInfo->level, 824bf215546Sopenharmony_ci &pCommandBuffers[i]); 825bf215546Sopenharmony_ci if (result != VK_SUCCESS) 826bf215546Sopenharmony_ci break; 827bf215546Sopenharmony_ci } 828bf215546Sopenharmony_ci 829bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 830bf215546Sopenharmony_ci while (i--) { 831bf215546Sopenharmony_ci VK_FROM_HANDLE(vk_command_buffer, cmd_buffer, pCommandBuffers[i]); 832bf215546Sopenharmony_ci cmd_buffer_destroy(cmd_buffer); 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci for (i = 0; i < pAllocateInfo->commandBufferCount; i++) 835bf215546Sopenharmony_ci pCommandBuffers[i] = VK_NULL_HANDLE; 836bf215546Sopenharmony_ci } 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci return result; 839bf215546Sopenharmony_ci} 840bf215546Sopenharmony_ci 841bf215546Sopenharmony_cistatic void 842bf215546Sopenharmony_cicmd_buffer_subpass_handle_pending_resolves(struct v3dv_cmd_buffer *cmd_buffer) 843bf215546Sopenharmony_ci{ 844bf215546Sopenharmony_ci assert(cmd_buffer->state.subpass_idx < cmd_buffer->state.pass->subpass_count); 845bf215546Sopenharmony_ci const struct v3dv_render_pass *pass = cmd_buffer->state.pass; 846bf215546Sopenharmony_ci const struct v3dv_subpass *subpass = 847bf215546Sopenharmony_ci &pass->subpasses[cmd_buffer->state.subpass_idx]; 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci if (!subpass->resolve_attachments) 850bf215546Sopenharmony_ci return; 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci /* At this point we have already ended the current subpass and now we are 853bf215546Sopenharmony_ci * about to emit vkCmdResolveImage calls to get the resolves we can't handle 854bf215546Sopenharmony_ci * handle in the subpass RCL. 855bf215546Sopenharmony_ci * 856bf215546Sopenharmony_ci * vkCmdResolveImage is not supposed to be called inside a render pass so 857bf215546Sopenharmony_ci * before we call that we need to make sure our command buffer state reflects 858bf215546Sopenharmony_ci * that we are no longer in a subpass by finishing the current job and 859bf215546Sopenharmony_ci * resetting the framebuffer and render pass state temporarily and then 860bf215546Sopenharmony_ci * restoring it after we are done with the resolves. 861bf215546Sopenharmony_ci */ 862bf215546Sopenharmony_ci if (cmd_buffer->state.job) 863bf215546Sopenharmony_ci v3dv_cmd_buffer_finish_job(cmd_buffer); 864bf215546Sopenharmony_ci struct v3dv_framebuffer *restore_fb = cmd_buffer->state.framebuffer; 865bf215546Sopenharmony_ci struct v3dv_render_pass *restore_pass = cmd_buffer->state.pass; 866bf215546Sopenharmony_ci uint32_t restore_subpass_idx = cmd_buffer->state.subpass_idx; 867bf215546Sopenharmony_ci cmd_buffer->state.framebuffer = NULL; 868bf215546Sopenharmony_ci cmd_buffer->state.pass = NULL; 869bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx = -1; 870bf215546Sopenharmony_ci 871bf215546Sopenharmony_ci VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer); 872bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; i++) { 873bf215546Sopenharmony_ci const uint32_t src_attachment_idx = 874bf215546Sopenharmony_ci subpass->color_attachments[i].attachment; 875bf215546Sopenharmony_ci if (src_attachment_idx == VK_ATTACHMENT_UNUSED) 876bf215546Sopenharmony_ci continue; 877bf215546Sopenharmony_ci 878bf215546Sopenharmony_ci /* Skip if this attachment doesn't have a resolve or if it was already 879bf215546Sopenharmony_ci * implemented as a TLB resolve. 880bf215546Sopenharmony_ci */ 881bf215546Sopenharmony_ci if (!cmd_buffer->state.attachments[src_attachment_idx].has_resolve || 882bf215546Sopenharmony_ci cmd_buffer->state.attachments[src_attachment_idx].use_tlb_resolve) { 883bf215546Sopenharmony_ci continue; 884bf215546Sopenharmony_ci } 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci const uint32_t dst_attachment_idx = 887bf215546Sopenharmony_ci subpass->resolve_attachments[i].attachment; 888bf215546Sopenharmony_ci assert(dst_attachment_idx != VK_ATTACHMENT_UNUSED); 889bf215546Sopenharmony_ci 890bf215546Sopenharmony_ci struct v3dv_image_view *src_iview = 891bf215546Sopenharmony_ci cmd_buffer->state.attachments[src_attachment_idx].image_view; 892bf215546Sopenharmony_ci struct v3dv_image_view *dst_iview = 893bf215546Sopenharmony_ci cmd_buffer->state.attachments[dst_attachment_idx].image_view; 894bf215546Sopenharmony_ci 895bf215546Sopenharmony_ci VkImageResolve2 region = { 896bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2, 897bf215546Sopenharmony_ci .srcSubresource = { 898bf215546Sopenharmony_ci VK_IMAGE_ASPECT_COLOR_BIT, 899bf215546Sopenharmony_ci src_iview->vk.base_mip_level, 900bf215546Sopenharmony_ci src_iview->vk.base_array_layer, 901bf215546Sopenharmony_ci src_iview->vk.layer_count, 902bf215546Sopenharmony_ci }, 903bf215546Sopenharmony_ci .srcOffset = { 0, 0, 0 }, 904bf215546Sopenharmony_ci .dstSubresource = { 905bf215546Sopenharmony_ci VK_IMAGE_ASPECT_COLOR_BIT, 906bf215546Sopenharmony_ci dst_iview->vk.base_mip_level, 907bf215546Sopenharmony_ci dst_iview->vk.base_array_layer, 908bf215546Sopenharmony_ci dst_iview->vk.layer_count, 909bf215546Sopenharmony_ci }, 910bf215546Sopenharmony_ci .dstOffset = { 0, 0, 0 }, 911bf215546Sopenharmony_ci .extent = src_iview->vk.image->extent, 912bf215546Sopenharmony_ci }; 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci struct v3dv_image *src_image = (struct v3dv_image *) src_iview->vk.image; 915bf215546Sopenharmony_ci struct v3dv_image *dst_image = (struct v3dv_image *) dst_iview->vk.image; 916bf215546Sopenharmony_ci VkResolveImageInfo2 resolve_info = { 917bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_RESOLVE_IMAGE_INFO_2, 918bf215546Sopenharmony_ci .srcImage = v3dv_image_to_handle(src_image), 919bf215546Sopenharmony_ci .srcImageLayout = VK_IMAGE_LAYOUT_GENERAL, 920bf215546Sopenharmony_ci .dstImage = v3dv_image_to_handle(dst_image), 921bf215546Sopenharmony_ci .dstImageLayout = VK_IMAGE_LAYOUT_GENERAL, 922bf215546Sopenharmony_ci .regionCount = 1, 923bf215546Sopenharmony_ci .pRegions = ®ion, 924bf215546Sopenharmony_ci }; 925bf215546Sopenharmony_ci v3dv_CmdResolveImage2KHR(cmd_buffer_handle, &resolve_info); 926bf215546Sopenharmony_ci } 927bf215546Sopenharmony_ci 928bf215546Sopenharmony_ci cmd_buffer->state.framebuffer = restore_fb; 929bf215546Sopenharmony_ci cmd_buffer->state.pass = restore_pass; 930bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx = restore_subpass_idx; 931bf215546Sopenharmony_ci} 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_cistatic VkResult 934bf215546Sopenharmony_cicmd_buffer_begin_render_pass_secondary( 935bf215546Sopenharmony_ci struct v3dv_cmd_buffer *cmd_buffer, 936bf215546Sopenharmony_ci const VkCommandBufferInheritanceInfo *inheritance_info) 937bf215546Sopenharmony_ci{ 938bf215546Sopenharmony_ci assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 939bf215546Sopenharmony_ci assert(cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT); 940bf215546Sopenharmony_ci assert(inheritance_info); 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci cmd_buffer->state.pass = 943bf215546Sopenharmony_ci v3dv_render_pass_from_handle(inheritance_info->renderPass); 944bf215546Sopenharmony_ci assert(cmd_buffer->state.pass); 945bf215546Sopenharmony_ci 946bf215546Sopenharmony_ci cmd_buffer->state.framebuffer = 947bf215546Sopenharmony_ci v3dv_framebuffer_from_handle(inheritance_info->framebuffer); 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci assert(inheritance_info->subpass < cmd_buffer->state.pass->subpass_count); 950bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx = inheritance_info->subpass; 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_ci cmd_buffer->state.inheritance.occlusion_query_enable = 953bf215546Sopenharmony_ci inheritance_info->occlusionQueryEnable; 954bf215546Sopenharmony_ci 955bf215546Sopenharmony_ci /* Secondaries that execute inside a render pass won't start subpasses 956bf215546Sopenharmony_ci * so we want to create a job for them here. 957bf215546Sopenharmony_ci */ 958bf215546Sopenharmony_ci struct v3dv_job *job = 959bf215546Sopenharmony_ci v3dv_cmd_buffer_start_job(cmd_buffer, inheritance_info->subpass, 960bf215546Sopenharmony_ci V3DV_JOB_TYPE_GPU_CL_SECONDARY); 961bf215546Sopenharmony_ci if (!job) { 962bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 963bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 964bf215546Sopenharmony_ci } 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci /* Secondary command buffers don't know about the render area, but our 967bf215546Sopenharmony_ci * scissor setup accounts for it, so let's make sure we make it large 968bf215546Sopenharmony_ci * enough that it doesn't actually constrain any rendering. This should 969bf215546Sopenharmony_ci * be fine, since the Vulkan spec states: 970bf215546Sopenharmony_ci * 971bf215546Sopenharmony_ci * "The application must ensure (using scissor if necessary) that all 972bf215546Sopenharmony_ci * rendering is contained within the render area." 973bf215546Sopenharmony_ci */ 974bf215546Sopenharmony_ci const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer; 975bf215546Sopenharmony_ci cmd_buffer->state.render_area.offset.x = 0; 976bf215546Sopenharmony_ci cmd_buffer->state.render_area.offset.y = 0; 977bf215546Sopenharmony_ci cmd_buffer->state.render_area.extent.width = 978bf215546Sopenharmony_ci framebuffer ? framebuffer->width : V3D_MAX_IMAGE_DIMENSION; 979bf215546Sopenharmony_ci cmd_buffer->state.render_area.extent.height = 980bf215546Sopenharmony_ci framebuffer ? framebuffer->height : V3D_MAX_IMAGE_DIMENSION; 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci return VK_SUCCESS; 983bf215546Sopenharmony_ci} 984bf215546Sopenharmony_ci 985bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 986bf215546Sopenharmony_civ3dv_BeginCommandBuffer(VkCommandBuffer commandBuffer, 987bf215546Sopenharmony_ci const VkCommandBufferBeginInfo *pBeginInfo) 988bf215546Sopenharmony_ci{ 989bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci /* If this is the first vkBeginCommandBuffer, we must initialize the 992bf215546Sopenharmony_ci * command buffer's state. Otherwise, we must reset its state. In both 993bf215546Sopenharmony_ci * cases we reset it. 994bf215546Sopenharmony_ci */ 995bf215546Sopenharmony_ci VkResult result = cmd_buffer_reset(cmd_buffer, 0); 996bf215546Sopenharmony_ci if (result != VK_SUCCESS) 997bf215546Sopenharmony_ci return result; 998bf215546Sopenharmony_ci 999bf215546Sopenharmony_ci assert(cmd_buffer->status == V3DV_CMD_BUFFER_STATUS_INITIALIZED); 1000bf215546Sopenharmony_ci 1001bf215546Sopenharmony_ci cmd_buffer->usage_flags = pBeginInfo->flags; 1002bf215546Sopenharmony_ci 1003bf215546Sopenharmony_ci if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) { 1004bf215546Sopenharmony_ci if (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT) { 1005bf215546Sopenharmony_ci result = 1006bf215546Sopenharmony_ci cmd_buffer_begin_render_pass_secondary(cmd_buffer, 1007bf215546Sopenharmony_ci pBeginInfo->pInheritanceInfo); 1008bf215546Sopenharmony_ci if (result != VK_SUCCESS) 1009bf215546Sopenharmony_ci return result; 1010bf215546Sopenharmony_ci } 1011bf215546Sopenharmony_ci } 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_ci cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_RECORDING; 1014bf215546Sopenharmony_ci 1015bf215546Sopenharmony_ci return VK_SUCCESS; 1016bf215546Sopenharmony_ci} 1017bf215546Sopenharmony_ci 1018bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 1019bf215546Sopenharmony_civ3dv_ResetCommandBuffer(VkCommandBuffer commandBuffer, 1020bf215546Sopenharmony_ci VkCommandBufferResetFlags flags) 1021bf215546Sopenharmony_ci{ 1022bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 1023bf215546Sopenharmony_ci return cmd_buffer_reset(cmd_buffer, flags); 1024bf215546Sopenharmony_ci} 1025bf215546Sopenharmony_ci 1026bf215546Sopenharmony_cistatic void 1027bf215546Sopenharmony_cicmd_buffer_update_tile_alignment(struct v3dv_cmd_buffer *cmd_buffer) 1028bf215546Sopenharmony_ci{ 1029bf215546Sopenharmony_ci /* Render areas and scissor/viewport are only relevant inside render passes, 1030bf215546Sopenharmony_ci * otherwise we are dealing with transfer operations where these elements 1031bf215546Sopenharmony_ci * don't apply. 1032bf215546Sopenharmony_ci */ 1033bf215546Sopenharmony_ci assert(cmd_buffer->state.pass); 1034bf215546Sopenharmony_ci const VkRect2D *rect = &cmd_buffer->state.render_area; 1035bf215546Sopenharmony_ci 1036bf215546Sopenharmony_ci /* We should only call this at the beginning of a subpass so we should 1037bf215546Sopenharmony_ci * always have framebuffer information available. 1038bf215546Sopenharmony_ci */ 1039bf215546Sopenharmony_ci assert(cmd_buffer->state.framebuffer); 1040bf215546Sopenharmony_ci cmd_buffer->state.tile_aligned_render_area = 1041bf215546Sopenharmony_ci v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, rect, 1042bf215546Sopenharmony_ci cmd_buffer->state.framebuffer, 1043bf215546Sopenharmony_ci cmd_buffer->state.pass, 1044bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx); 1045bf215546Sopenharmony_ci 1046bf215546Sopenharmony_ci if (!cmd_buffer->state.tile_aligned_render_area) { 1047bf215546Sopenharmony_ci perf_debug("Render area for subpass %d of render pass %p doesn't " 1048bf215546Sopenharmony_ci "match render pass granularity.\n", 1049bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx, cmd_buffer->state.pass); 1050bf215546Sopenharmony_ci } 1051bf215546Sopenharmony_ci} 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_cistatic void 1054bf215546Sopenharmony_cicmd_buffer_update_attachment_resolve_state(struct v3dv_cmd_buffer *cmd_buffer) 1055bf215546Sopenharmony_ci{ 1056bf215546Sopenharmony_ci /* NOTE: This should be called after cmd_buffer_update_tile_alignment() 1057bf215546Sopenharmony_ci * since it relies on up-to-date information about subpass tile alignment. 1058bf215546Sopenharmony_ci */ 1059bf215546Sopenharmony_ci const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1060bf215546Sopenharmony_ci const struct v3dv_render_pass *pass = state->pass; 1061bf215546Sopenharmony_ci const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; i++) { 1064bf215546Sopenharmony_ci const uint32_t attachment_idx = subpass->color_attachments[i].attachment; 1065bf215546Sopenharmony_ci if (attachment_idx == VK_ATTACHMENT_UNUSED) 1066bf215546Sopenharmony_ci continue; 1067bf215546Sopenharmony_ci 1068bf215546Sopenharmony_ci state->attachments[attachment_idx].has_resolve = 1069bf215546Sopenharmony_ci subpass->resolve_attachments && 1070bf215546Sopenharmony_ci subpass->resolve_attachments[i].attachment != VK_ATTACHMENT_UNUSED; 1071bf215546Sopenharmony_ci 1072bf215546Sopenharmony_ci state->attachments[attachment_idx].use_tlb_resolve = 1073bf215546Sopenharmony_ci state->attachments[attachment_idx].has_resolve && 1074bf215546Sopenharmony_ci state->tile_aligned_render_area && 1075bf215546Sopenharmony_ci pass->attachments[attachment_idx].try_tlb_resolve; 1076bf215546Sopenharmony_ci } 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; 1079bf215546Sopenharmony_ci if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { 1080bf215546Sopenharmony_ci uint32_t ds_resolve_attachment_idx = 1081bf215546Sopenharmony_ci subpass->ds_resolve_attachment.attachment; 1082bf215546Sopenharmony_ci state->attachments[ds_attachment_idx].has_resolve = 1083bf215546Sopenharmony_ci ds_resolve_attachment_idx != VK_ATTACHMENT_UNUSED; 1084bf215546Sopenharmony_ci 1085bf215546Sopenharmony_ci assert(!state->attachments[ds_attachment_idx].has_resolve || 1086bf215546Sopenharmony_ci (subpass->resolve_depth || subpass->resolve_stencil)); 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci state->attachments[ds_attachment_idx].use_tlb_resolve = 1089bf215546Sopenharmony_ci state->attachments[ds_attachment_idx].has_resolve && 1090bf215546Sopenharmony_ci state->tile_aligned_render_area && 1091bf215546Sopenharmony_ci pass->attachments[ds_attachment_idx].try_tlb_resolve; 1092bf215546Sopenharmony_ci } 1093bf215546Sopenharmony_ci} 1094bf215546Sopenharmony_ci 1095bf215546Sopenharmony_cistatic void 1096bf215546Sopenharmony_cicmd_buffer_state_set_attachment_clear_color(struct v3dv_cmd_buffer *cmd_buffer, 1097bf215546Sopenharmony_ci uint32_t attachment_idx, 1098bf215546Sopenharmony_ci const VkClearColorValue *color) 1099bf215546Sopenharmony_ci{ 1100bf215546Sopenharmony_ci assert(attachment_idx < cmd_buffer->state.pass->attachment_count); 1101bf215546Sopenharmony_ci 1102bf215546Sopenharmony_ci const struct v3dv_render_pass_attachment *attachment = 1103bf215546Sopenharmony_ci &cmd_buffer->state.pass->attachments[attachment_idx]; 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci uint32_t internal_type, internal_bpp; 1106bf215546Sopenharmony_ci const struct v3dv_format *format = 1107bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, get_format)(attachment->desc.format); 1108bf215546Sopenharmony_ci 1109bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_output_format) 1110bf215546Sopenharmony_ci (format->rt_type, &internal_type, &internal_bpp); 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci uint32_t internal_size = 4 << internal_bpp; 1113bf215546Sopenharmony_ci 1114bf215546Sopenharmony_ci struct v3dv_cmd_buffer_attachment_state *attachment_state = 1115bf215546Sopenharmony_ci &cmd_buffer->state.attachments[attachment_idx]; 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, get_hw_clear_color) 1118bf215546Sopenharmony_ci (color, internal_type, internal_size, &attachment_state->clear_value.color[0]); 1119bf215546Sopenharmony_ci 1120bf215546Sopenharmony_ci attachment_state->vk_clear_value.color = *color; 1121bf215546Sopenharmony_ci} 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_cistatic void 1124bf215546Sopenharmony_cicmd_buffer_state_set_attachment_clear_depth_stencil( 1125bf215546Sopenharmony_ci struct v3dv_cmd_buffer *cmd_buffer, 1126bf215546Sopenharmony_ci uint32_t attachment_idx, 1127bf215546Sopenharmony_ci bool clear_depth, bool clear_stencil, 1128bf215546Sopenharmony_ci const VkClearDepthStencilValue *ds) 1129bf215546Sopenharmony_ci{ 1130bf215546Sopenharmony_ci struct v3dv_cmd_buffer_attachment_state *attachment_state = 1131bf215546Sopenharmony_ci &cmd_buffer->state.attachments[attachment_idx]; 1132bf215546Sopenharmony_ci 1133bf215546Sopenharmony_ci if (clear_depth) 1134bf215546Sopenharmony_ci attachment_state->clear_value.z = ds->depth; 1135bf215546Sopenharmony_ci 1136bf215546Sopenharmony_ci if (clear_stencil) 1137bf215546Sopenharmony_ci attachment_state->clear_value.s = ds->stencil; 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_ci attachment_state->vk_clear_value.depthStencil = *ds; 1140bf215546Sopenharmony_ci} 1141bf215546Sopenharmony_ci 1142bf215546Sopenharmony_cistatic void 1143bf215546Sopenharmony_cicmd_buffer_state_set_clear_values(struct v3dv_cmd_buffer *cmd_buffer, 1144bf215546Sopenharmony_ci uint32_t count, const VkClearValue *values) 1145bf215546Sopenharmony_ci{ 1146bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1147bf215546Sopenharmony_ci const struct v3dv_render_pass *pass = state->pass; 1148bf215546Sopenharmony_ci 1149bf215546Sopenharmony_ci /* There could be less clear values than attachments in the render pass, in 1150bf215546Sopenharmony_ci * which case we only want to process as many as we have, or there could be 1151bf215546Sopenharmony_ci * more, in which case we want to ignore those for which we don't have a 1152bf215546Sopenharmony_ci * corresponding attachment. 1153bf215546Sopenharmony_ci */ 1154bf215546Sopenharmony_ci count = MIN2(count, pass->attachment_count); 1155bf215546Sopenharmony_ci for (uint32_t i = 0; i < count; i++) { 1156bf215546Sopenharmony_ci const struct v3dv_render_pass_attachment *attachment = 1157bf215546Sopenharmony_ci &pass->attachments[i]; 1158bf215546Sopenharmony_ci 1159bf215546Sopenharmony_ci if (attachment->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR) 1160bf215546Sopenharmony_ci continue; 1161bf215546Sopenharmony_ci 1162bf215546Sopenharmony_ci VkImageAspectFlags aspects = vk_format_aspects(attachment->desc.format); 1163bf215546Sopenharmony_ci if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) { 1164bf215546Sopenharmony_ci cmd_buffer_state_set_attachment_clear_color(cmd_buffer, i, 1165bf215546Sopenharmony_ci &values[i].color); 1166bf215546Sopenharmony_ci } else if (aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | 1167bf215546Sopenharmony_ci VK_IMAGE_ASPECT_STENCIL_BIT)) { 1168bf215546Sopenharmony_ci cmd_buffer_state_set_attachment_clear_depth_stencil( 1169bf215546Sopenharmony_ci cmd_buffer, i, 1170bf215546Sopenharmony_ci aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 1171bf215546Sopenharmony_ci aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 1172bf215546Sopenharmony_ci &values[i].depthStencil); 1173bf215546Sopenharmony_ci } 1174bf215546Sopenharmony_ci } 1175bf215546Sopenharmony_ci} 1176bf215546Sopenharmony_ci 1177bf215546Sopenharmony_cistatic void 1178bf215546Sopenharmony_cicmd_buffer_state_set_attachments(struct v3dv_cmd_buffer *cmd_buffer, 1179bf215546Sopenharmony_ci const VkRenderPassBeginInfo *pRenderPassBegin) 1180bf215546Sopenharmony_ci{ 1181bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_render_pass, pass, pRenderPassBegin->renderPass); 1182bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); 1183bf215546Sopenharmony_ci 1184bf215546Sopenharmony_ci const VkRenderPassAttachmentBeginInfo *attach_begin = 1185bf215546Sopenharmony_ci vk_find_struct_const(pRenderPassBegin, RENDER_PASS_ATTACHMENT_BEGIN_INFO); 1186bf215546Sopenharmony_ci 1187bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1188bf215546Sopenharmony_ci 1189bf215546Sopenharmony_ci for (uint32_t i = 0; i < pass->attachment_count; i++) { 1190bf215546Sopenharmony_ci if (attach_begin && attach_begin->attachmentCount != 0) { 1191bf215546Sopenharmony_ci state->attachments[i].image_view = 1192bf215546Sopenharmony_ci v3dv_image_view_from_handle(attach_begin->pAttachments[i]); 1193bf215546Sopenharmony_ci } else if (framebuffer) { 1194bf215546Sopenharmony_ci state->attachments[i].image_view = framebuffer->attachments[i]; 1195bf215546Sopenharmony_ci } else { 1196bf215546Sopenharmony_ci assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 1197bf215546Sopenharmony_ci state->attachments[i].image_view = NULL; 1198bf215546Sopenharmony_ci } 1199bf215546Sopenharmony_ci } 1200bf215546Sopenharmony_ci} 1201bf215546Sopenharmony_ci 1202bf215546Sopenharmony_cistatic void 1203bf215546Sopenharmony_cicmd_buffer_init_render_pass_attachment_state(struct v3dv_cmd_buffer *cmd_buffer, 1204bf215546Sopenharmony_ci const VkRenderPassBeginInfo *pRenderPassBegin) 1205bf215546Sopenharmony_ci{ 1206bf215546Sopenharmony_ci cmd_buffer_state_set_clear_values(cmd_buffer, 1207bf215546Sopenharmony_ci pRenderPassBegin->clearValueCount, 1208bf215546Sopenharmony_ci pRenderPassBegin->pClearValues); 1209bf215546Sopenharmony_ci 1210bf215546Sopenharmony_ci cmd_buffer_state_set_attachments(cmd_buffer, pRenderPassBegin); 1211bf215546Sopenharmony_ci} 1212bf215546Sopenharmony_ci 1213bf215546Sopenharmony_cistatic void 1214bf215546Sopenharmony_cicmd_buffer_ensure_render_pass_attachment_state(struct v3dv_cmd_buffer *cmd_buffer) 1215bf215546Sopenharmony_ci{ 1216bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1217bf215546Sopenharmony_ci const struct v3dv_render_pass *pass = state->pass; 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_ci if (state->attachment_alloc_count < pass->attachment_count) { 1220bf215546Sopenharmony_ci if (state->attachments > 0) { 1221bf215546Sopenharmony_ci assert(state->attachment_alloc_count > 0); 1222bf215546Sopenharmony_ci vk_free(&cmd_buffer->device->vk.alloc, state->attachments); 1223bf215546Sopenharmony_ci } 1224bf215546Sopenharmony_ci 1225bf215546Sopenharmony_ci uint32_t size = sizeof(struct v3dv_cmd_buffer_attachment_state) * 1226bf215546Sopenharmony_ci pass->attachment_count; 1227bf215546Sopenharmony_ci state->attachments = vk_zalloc(&cmd_buffer->device->vk.alloc, size, 8, 1228bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1229bf215546Sopenharmony_ci if (!state->attachments) { 1230bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 1231bf215546Sopenharmony_ci return; 1232bf215546Sopenharmony_ci } 1233bf215546Sopenharmony_ci state->attachment_alloc_count = pass->attachment_count; 1234bf215546Sopenharmony_ci } 1235bf215546Sopenharmony_ci 1236bf215546Sopenharmony_ci assert(state->attachment_alloc_count >= pass->attachment_count); 1237bf215546Sopenharmony_ci} 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1240bf215546Sopenharmony_civ3dv_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, 1241bf215546Sopenharmony_ci const VkRenderPassBeginInfo *pRenderPassBegin, 1242bf215546Sopenharmony_ci const VkSubpassBeginInfo *pSubpassBeginInfo) 1243bf215546Sopenharmony_ci{ 1244bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 1245bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_render_pass, pass, pRenderPassBegin->renderPass); 1246bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_framebuffer, framebuffer, pRenderPassBegin->framebuffer); 1247bf215546Sopenharmony_ci 1248bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1249bf215546Sopenharmony_ci state->pass = pass; 1250bf215546Sopenharmony_ci state->framebuffer = framebuffer; 1251bf215546Sopenharmony_ci 1252bf215546Sopenharmony_ci cmd_buffer_ensure_render_pass_attachment_state(cmd_buffer); 1253bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_ci cmd_buffer_init_render_pass_attachment_state(cmd_buffer, pRenderPassBegin); 1256bf215546Sopenharmony_ci 1257bf215546Sopenharmony_ci state->render_area = pRenderPassBegin->renderArea; 1258bf215546Sopenharmony_ci 1259bf215546Sopenharmony_ci /* If our render area is smaller than the current clip window we will have 1260bf215546Sopenharmony_ci * to emit a new clip window to constraint it to the render area. 1261bf215546Sopenharmony_ci */ 1262bf215546Sopenharmony_ci uint32_t min_render_x = state->render_area.offset.x; 1263bf215546Sopenharmony_ci uint32_t min_render_y = state->render_area.offset.y; 1264bf215546Sopenharmony_ci uint32_t max_render_x = min_render_x + state->render_area.extent.width - 1; 1265bf215546Sopenharmony_ci uint32_t max_render_y = min_render_y + state->render_area.extent.height - 1; 1266bf215546Sopenharmony_ci uint32_t min_clip_x = state->clip_window.offset.x; 1267bf215546Sopenharmony_ci uint32_t min_clip_y = state->clip_window.offset.y; 1268bf215546Sopenharmony_ci uint32_t max_clip_x = min_clip_x + state->clip_window.extent.width - 1; 1269bf215546Sopenharmony_ci uint32_t max_clip_y = min_clip_y + state->clip_window.extent.height - 1; 1270bf215546Sopenharmony_ci if (min_render_x > min_clip_x || min_render_y > min_clip_y || 1271bf215546Sopenharmony_ci max_render_x < max_clip_x || max_render_y < max_clip_y) { 1272bf215546Sopenharmony_ci state->dirty |= V3DV_CMD_DIRTY_SCISSOR; 1273bf215546Sopenharmony_ci } 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci /* Setup for first subpass */ 1276bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_start(cmd_buffer, 0); 1277bf215546Sopenharmony_ci} 1278bf215546Sopenharmony_ci 1279bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1280bf215546Sopenharmony_civ3dv_CmdNextSubpass2(VkCommandBuffer commandBuffer, 1281bf215546Sopenharmony_ci const VkSubpassBeginInfo *pSubpassBeginInfo, 1282bf215546Sopenharmony_ci const VkSubpassEndInfo *pSubpassEndInfo) 1283bf215546Sopenharmony_ci{ 1284bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 1285bf215546Sopenharmony_ci 1286bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1287bf215546Sopenharmony_ci assert(state->subpass_idx < state->pass->subpass_count - 1); 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_ci /* Finish the previous subpass */ 1290bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_finish(cmd_buffer); 1291bf215546Sopenharmony_ci cmd_buffer_subpass_handle_pending_resolves(cmd_buffer); 1292bf215546Sopenharmony_ci 1293bf215546Sopenharmony_ci /* Start the next subpass */ 1294bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_start(cmd_buffer, state->subpass_idx + 1); 1295bf215546Sopenharmony_ci} 1296bf215546Sopenharmony_ci 1297bf215546Sopenharmony_cistatic void 1298bf215546Sopenharmony_cicmd_buffer_emit_subpass_clears(struct v3dv_cmd_buffer *cmd_buffer) 1299bf215546Sopenharmony_ci{ 1300bf215546Sopenharmony_ci assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); 1301bf215546Sopenharmony_ci 1302bf215546Sopenharmony_ci assert(cmd_buffer->state.pass); 1303bf215546Sopenharmony_ci assert(cmd_buffer->state.subpass_idx < cmd_buffer->state.pass->subpass_count); 1304bf215546Sopenharmony_ci const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1305bf215546Sopenharmony_ci const struct v3dv_render_pass *pass = state->pass; 1306bf215546Sopenharmony_ci const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; 1307bf215546Sopenharmony_ci 1308bf215546Sopenharmony_ci /* We only need to emit subpass clears as draw calls when the render 1309bf215546Sopenharmony_ci * area is not aligned to tile boundaries or for GFXH-1461. 1310bf215546Sopenharmony_ci */ 1311bf215546Sopenharmony_ci if (cmd_buffer->state.tile_aligned_render_area && 1312bf215546Sopenharmony_ci !subpass->do_depth_clear_with_draw && 1313bf215546Sopenharmony_ci !subpass->do_depth_clear_with_draw) { 1314bf215546Sopenharmony_ci return; 1315bf215546Sopenharmony_ci } 1316bf215546Sopenharmony_ci 1317bf215546Sopenharmony_ci uint32_t att_count = 0; 1318bf215546Sopenharmony_ci VkClearAttachment atts[V3D_MAX_DRAW_BUFFERS + 1]; /* 4 color + D/S */ 1319bf215546Sopenharmony_ci 1320bf215546Sopenharmony_ci /* We only need to emit subpass clears as draw calls for color attachments 1321bf215546Sopenharmony_ci * if the render area is not aligned to tile boundaries. 1322bf215546Sopenharmony_ci */ 1323bf215546Sopenharmony_ci if (!cmd_buffer->state.tile_aligned_render_area) { 1324bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; i++) { 1325bf215546Sopenharmony_ci const uint32_t att_idx = subpass->color_attachments[i].attachment; 1326bf215546Sopenharmony_ci if (att_idx == VK_ATTACHMENT_UNUSED) 1327bf215546Sopenharmony_ci continue; 1328bf215546Sopenharmony_ci 1329bf215546Sopenharmony_ci struct v3dv_render_pass_attachment *att = &pass->attachments[att_idx]; 1330bf215546Sopenharmony_ci if (att->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR) 1331bf215546Sopenharmony_ci continue; 1332bf215546Sopenharmony_ci 1333bf215546Sopenharmony_ci if (state->subpass_idx != att->first_subpass) 1334bf215546Sopenharmony_ci continue; 1335bf215546Sopenharmony_ci 1336bf215546Sopenharmony_ci atts[att_count].aspectMask = VK_IMAGE_ASPECT_COLOR_BIT; 1337bf215546Sopenharmony_ci atts[att_count].colorAttachment = i; 1338bf215546Sopenharmony_ci atts[att_count].clearValue = state->attachments[att_idx].vk_clear_value; 1339bf215546Sopenharmony_ci att_count++; 1340bf215546Sopenharmony_ci } 1341bf215546Sopenharmony_ci } 1342bf215546Sopenharmony_ci 1343bf215546Sopenharmony_ci /* For D/S we may also need to emit a subpass clear for GFXH-1461 */ 1344bf215546Sopenharmony_ci const uint32_t ds_att_idx = subpass->ds_attachment.attachment; 1345bf215546Sopenharmony_ci if (ds_att_idx != VK_ATTACHMENT_UNUSED) { 1346bf215546Sopenharmony_ci struct v3dv_render_pass_attachment *att = &pass->attachments[ds_att_idx]; 1347bf215546Sopenharmony_ci if (state->subpass_idx == att->first_subpass) { 1348bf215546Sopenharmony_ci VkImageAspectFlags aspects = vk_format_aspects(att->desc.format); 1349bf215546Sopenharmony_ci if (att->desc.loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR || 1350bf215546Sopenharmony_ci (cmd_buffer->state.tile_aligned_render_area && 1351bf215546Sopenharmony_ci !subpass->do_depth_clear_with_draw)) { 1352bf215546Sopenharmony_ci aspects &= ~VK_IMAGE_ASPECT_DEPTH_BIT; 1353bf215546Sopenharmony_ci } 1354bf215546Sopenharmony_ci if (att->desc.stencilLoadOp != VK_ATTACHMENT_LOAD_OP_CLEAR || 1355bf215546Sopenharmony_ci (cmd_buffer->state.tile_aligned_render_area && 1356bf215546Sopenharmony_ci !subpass->do_stencil_clear_with_draw)) { 1357bf215546Sopenharmony_ci aspects &= ~VK_IMAGE_ASPECT_STENCIL_BIT; 1358bf215546Sopenharmony_ci } 1359bf215546Sopenharmony_ci if (aspects) { 1360bf215546Sopenharmony_ci atts[att_count].aspectMask = aspects; 1361bf215546Sopenharmony_ci atts[att_count].colorAttachment = 0; /* Ignored */ 1362bf215546Sopenharmony_ci atts[att_count].clearValue = 1363bf215546Sopenharmony_ci state->attachments[ds_att_idx].vk_clear_value; 1364bf215546Sopenharmony_ci att_count++; 1365bf215546Sopenharmony_ci } 1366bf215546Sopenharmony_ci } 1367bf215546Sopenharmony_ci } 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_ci if (att_count == 0) 1370bf215546Sopenharmony_ci return; 1371bf215546Sopenharmony_ci 1372bf215546Sopenharmony_ci if (!cmd_buffer->state.tile_aligned_render_area) { 1373bf215546Sopenharmony_ci perf_debug("Render area doesn't match render pass granularity, falling " 1374bf215546Sopenharmony_ci "back to vkCmdClearAttachments for " 1375bf215546Sopenharmony_ci "VK_ATTACHMENT_LOAD_OP_CLEAR.\n"); 1376bf215546Sopenharmony_ci } else if (subpass->do_depth_clear_with_draw || 1377bf215546Sopenharmony_ci subpass->do_stencil_clear_with_draw) { 1378bf215546Sopenharmony_ci perf_debug("Subpass clears DEPTH but loads STENCIL (or viceversa), " 1379bf215546Sopenharmony_ci "falling back to vkCmdClearAttachments for " 1380bf215546Sopenharmony_ci "VK_ATTACHMENT_LOAD_OP_CLEAR.\n"); 1381bf215546Sopenharmony_ci } 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci /* From the Vulkan 1.0 spec: 1384bf215546Sopenharmony_ci * 1385bf215546Sopenharmony_ci * "VK_ATTACHMENT_LOAD_OP_CLEAR specifies that the contents within the 1386bf215546Sopenharmony_ci * render area will be cleared to a uniform value, which is specified 1387bf215546Sopenharmony_ci * when a render pass instance is begun." 1388bf215546Sopenharmony_ci * 1389bf215546Sopenharmony_ci * So the clear is only constrained by the render area and not by pipeline 1390bf215546Sopenharmony_ci * state such as scissor or viewport, these are the semantics of 1391bf215546Sopenharmony_ci * vkCmdClearAttachments as well. 1392bf215546Sopenharmony_ci */ 1393bf215546Sopenharmony_ci VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer); 1394bf215546Sopenharmony_ci VkClearRect rect = { 1395bf215546Sopenharmony_ci .rect = state->render_area, 1396bf215546Sopenharmony_ci .baseArrayLayer = 0, 1397bf215546Sopenharmony_ci .layerCount = 1, 1398bf215546Sopenharmony_ci }; 1399bf215546Sopenharmony_ci v3dv_CmdClearAttachments(_cmd_buffer, att_count, atts, 1, &rect); 1400bf215546Sopenharmony_ci} 1401bf215546Sopenharmony_ci 1402bf215546Sopenharmony_cistatic struct v3dv_job * 1403bf215546Sopenharmony_cicmd_buffer_subpass_create_job(struct v3dv_cmd_buffer *cmd_buffer, 1404bf215546Sopenharmony_ci uint32_t subpass_idx, 1405bf215546Sopenharmony_ci enum v3dv_job_type type) 1406bf215546Sopenharmony_ci{ 1407bf215546Sopenharmony_ci assert(type == V3DV_JOB_TYPE_GPU_CL || 1408bf215546Sopenharmony_ci type == V3DV_JOB_TYPE_GPU_CL_SECONDARY); 1409bf215546Sopenharmony_ci 1410bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1411bf215546Sopenharmony_ci assert(subpass_idx < state->pass->subpass_count); 1412bf215546Sopenharmony_ci 1413bf215546Sopenharmony_ci /* Starting a new job can trigger a finish of the current one, so don't 1414bf215546Sopenharmony_ci * change the command buffer state for the new job until we are done creating 1415bf215546Sopenharmony_ci * the new job. 1416bf215546Sopenharmony_ci */ 1417bf215546Sopenharmony_ci struct v3dv_job *job = 1418bf215546Sopenharmony_ci v3dv_cmd_buffer_start_job(cmd_buffer, subpass_idx, type); 1419bf215546Sopenharmony_ci if (!job) 1420bf215546Sopenharmony_ci return NULL; 1421bf215546Sopenharmony_ci 1422bf215546Sopenharmony_ci state->subpass_idx = subpass_idx; 1423bf215546Sopenharmony_ci 1424bf215546Sopenharmony_ci /* If we are starting a new job we need to setup binning. We only do this 1425bf215546Sopenharmony_ci * for V3DV_JOB_TYPE_GPU_CL jobs because V3DV_JOB_TYPE_GPU_CL_SECONDARY 1426bf215546Sopenharmony_ci * jobs are not submitted to the GPU directly, and are instead meant to be 1427bf215546Sopenharmony_ci * branched to from other V3DV_JOB_TYPE_GPU_CL jobs. 1428bf215546Sopenharmony_ci */ 1429bf215546Sopenharmony_ci if (type == V3DV_JOB_TYPE_GPU_CL && 1430bf215546Sopenharmony_ci job->first_subpass == state->subpass_idx) { 1431bf215546Sopenharmony_ci const struct v3dv_subpass *subpass = 1432bf215546Sopenharmony_ci &state->pass->subpasses[state->subpass_idx]; 1433bf215546Sopenharmony_ci 1434bf215546Sopenharmony_ci const struct v3dv_framebuffer *framebuffer = state->framebuffer; 1435bf215546Sopenharmony_ci 1436bf215546Sopenharmony_ci uint8_t internal_bpp; 1437bf215546Sopenharmony_ci bool msaa; 1438bf215546Sopenharmony_ci v3dv_X(job->device, framebuffer_compute_internal_bpp_msaa) 1439bf215546Sopenharmony_ci (framebuffer, state->attachments, subpass, &internal_bpp, &msaa); 1440bf215546Sopenharmony_ci 1441bf215546Sopenharmony_ci /* From the Vulkan spec: 1442bf215546Sopenharmony_ci * 1443bf215546Sopenharmony_ci * "If the render pass uses multiview, then layers must be one and 1444bf215546Sopenharmony_ci * each attachment requires a number of layers that is greater than 1445bf215546Sopenharmony_ci * the maximum bit index set in the view mask in the subpasses in 1446bf215546Sopenharmony_ci * which it is used." 1447bf215546Sopenharmony_ci * 1448bf215546Sopenharmony_ci * So when multiview is enabled, we take the number of layers from the 1449bf215546Sopenharmony_ci * last bit set in the view mask. 1450bf215546Sopenharmony_ci */ 1451bf215546Sopenharmony_ci uint32_t layers = framebuffer->layers; 1452bf215546Sopenharmony_ci if (subpass->view_mask != 0) { 1453bf215546Sopenharmony_ci assert(framebuffer->layers == 1); 1454bf215546Sopenharmony_ci layers = util_last_bit(subpass->view_mask); 1455bf215546Sopenharmony_ci } 1456bf215546Sopenharmony_ci 1457bf215546Sopenharmony_ci v3dv_job_start_frame(job, 1458bf215546Sopenharmony_ci framebuffer->width, 1459bf215546Sopenharmony_ci framebuffer->height, 1460bf215546Sopenharmony_ci layers, 1461bf215546Sopenharmony_ci true, 1462bf215546Sopenharmony_ci subpass->color_count, 1463bf215546Sopenharmony_ci internal_bpp, 1464bf215546Sopenharmony_ci msaa); 1465bf215546Sopenharmony_ci } 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_ci return job; 1468bf215546Sopenharmony_ci} 1469bf215546Sopenharmony_ci 1470bf215546Sopenharmony_cistruct v3dv_job * 1471bf215546Sopenharmony_civ3dv_cmd_buffer_subpass_start(struct v3dv_cmd_buffer *cmd_buffer, 1472bf215546Sopenharmony_ci uint32_t subpass_idx) 1473bf215546Sopenharmony_ci{ 1474bf215546Sopenharmony_ci assert(cmd_buffer->state.pass); 1475bf215546Sopenharmony_ci assert(subpass_idx < cmd_buffer->state.pass->subpass_count); 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_ci struct v3dv_job *job = 1478bf215546Sopenharmony_ci cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx, 1479bf215546Sopenharmony_ci V3DV_JOB_TYPE_GPU_CL); 1480bf215546Sopenharmony_ci if (!job) 1481bf215546Sopenharmony_ci return NULL; 1482bf215546Sopenharmony_ci 1483bf215546Sopenharmony_ci /* Check if our render area is aligned to tile boundaries. We have to do 1484bf215546Sopenharmony_ci * this in each subpass because the subset of attachments used can change 1485bf215546Sopenharmony_ci * and with that the tile size selected by the hardware can change too. 1486bf215546Sopenharmony_ci */ 1487bf215546Sopenharmony_ci cmd_buffer_update_tile_alignment(cmd_buffer); 1488bf215546Sopenharmony_ci 1489bf215546Sopenharmony_ci cmd_buffer_update_attachment_resolve_state(cmd_buffer); 1490bf215546Sopenharmony_ci 1491bf215546Sopenharmony_ci /* If we can't use TLB clears then we need to emit draw clears for any 1492bf215546Sopenharmony_ci * LOAD_OP_CLEAR attachments in this subpass now. We might also need to emit 1493bf215546Sopenharmony_ci * Depth/Stencil clears if we hit GFXH-1461. 1494bf215546Sopenharmony_ci * 1495bf215546Sopenharmony_ci * Secondary command buffers don't start subpasses (and may not even have 1496bf215546Sopenharmony_ci * framebuffer state), so we only care about this in primaries. The only 1497bf215546Sopenharmony_ci * exception could be a secondary runnning inside a subpass that needs to 1498bf215546Sopenharmony_ci * record a meta operation (with its own render pass) that relies on 1499bf215546Sopenharmony_ci * attachment load clears, but we don't have any instances of that right 1500bf215546Sopenharmony_ci * now. 1501bf215546Sopenharmony_ci */ 1502bf215546Sopenharmony_ci if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) 1503bf215546Sopenharmony_ci cmd_buffer_emit_subpass_clears(cmd_buffer); 1504bf215546Sopenharmony_ci 1505bf215546Sopenharmony_ci return job; 1506bf215546Sopenharmony_ci} 1507bf215546Sopenharmony_ci 1508bf215546Sopenharmony_cistruct v3dv_job * 1509bf215546Sopenharmony_civ3dv_cmd_buffer_subpass_resume(struct v3dv_cmd_buffer *cmd_buffer, 1510bf215546Sopenharmony_ci uint32_t subpass_idx) 1511bf215546Sopenharmony_ci{ 1512bf215546Sopenharmony_ci assert(cmd_buffer->state.pass); 1513bf215546Sopenharmony_ci assert(subpass_idx < cmd_buffer->state.pass->subpass_count); 1514bf215546Sopenharmony_ci 1515bf215546Sopenharmony_ci struct v3dv_job *job; 1516bf215546Sopenharmony_ci if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 1517bf215546Sopenharmony_ci job = cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx, 1518bf215546Sopenharmony_ci V3DV_JOB_TYPE_GPU_CL); 1519bf215546Sopenharmony_ci } else { 1520bf215546Sopenharmony_ci assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 1521bf215546Sopenharmony_ci job = cmd_buffer_subpass_create_job(cmd_buffer, subpass_idx, 1522bf215546Sopenharmony_ci V3DV_JOB_TYPE_GPU_CL_SECONDARY); 1523bf215546Sopenharmony_ci } 1524bf215546Sopenharmony_ci 1525bf215546Sopenharmony_ci if (!job) 1526bf215546Sopenharmony_ci return NULL; 1527bf215546Sopenharmony_ci 1528bf215546Sopenharmony_ci job->is_subpass_continue = true; 1529bf215546Sopenharmony_ci 1530bf215546Sopenharmony_ci return job; 1531bf215546Sopenharmony_ci} 1532bf215546Sopenharmony_ci 1533bf215546Sopenharmony_civoid 1534bf215546Sopenharmony_civ3dv_cmd_buffer_subpass_finish(struct v3dv_cmd_buffer *cmd_buffer) 1535bf215546Sopenharmony_ci{ 1536bf215546Sopenharmony_ci /* We can end up here without a job if the last command recorded into the 1537bf215546Sopenharmony_ci * subpass already finished the job (for example a pipeline barrier). In 1538bf215546Sopenharmony_ci * that case we miss to set the is_subpass_finish flag, but that is not 1539bf215546Sopenharmony_ci * required for proper behavior. 1540bf215546Sopenharmony_ci */ 1541bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1542bf215546Sopenharmony_ci if (job) 1543bf215546Sopenharmony_ci job->is_subpass_finish = true; 1544bf215546Sopenharmony_ci} 1545bf215546Sopenharmony_ci 1546bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1547bf215546Sopenharmony_civ3dv_CmdEndRenderPass2(VkCommandBuffer commandBuffer, 1548bf215546Sopenharmony_ci const VkSubpassEndInfo *pSubpassEndInfo) 1549bf215546Sopenharmony_ci{ 1550bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 1551bf215546Sopenharmony_ci 1552bf215546Sopenharmony_ci /* Finalize last subpass */ 1553bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1554bf215546Sopenharmony_ci assert(state->subpass_idx == state->pass->subpass_count - 1); 1555bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_finish(cmd_buffer); 1556bf215546Sopenharmony_ci v3dv_cmd_buffer_finish_job(cmd_buffer); 1557bf215546Sopenharmony_ci 1558bf215546Sopenharmony_ci cmd_buffer_subpass_handle_pending_resolves(cmd_buffer); 1559bf215546Sopenharmony_ci 1560bf215546Sopenharmony_ci /* We are no longer inside a render pass */ 1561bf215546Sopenharmony_ci state->framebuffer = NULL; 1562bf215546Sopenharmony_ci state->pass = NULL; 1563bf215546Sopenharmony_ci state->subpass_idx = -1; 1564bf215546Sopenharmony_ci} 1565bf215546Sopenharmony_ci 1566bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 1567bf215546Sopenharmony_civ3dv_EndCommandBuffer(VkCommandBuffer commandBuffer) 1568bf215546Sopenharmony_ci{ 1569bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 1570bf215546Sopenharmony_ci 1571bf215546Sopenharmony_ci if (cmd_buffer->state.oom) 1572bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 1573bf215546Sopenharmony_ci 1574bf215546Sopenharmony_ci /* Primaries should have ended any recording jobs by the time they hit 1575bf215546Sopenharmony_ci * vkEndRenderPass (if we are inside a render pass). Commands outside 1576bf215546Sopenharmony_ci * a render pass instance (for both primaries and secondaries) spawn 1577bf215546Sopenharmony_ci * complete jobs too. So the only case where we can get here without 1578bf215546Sopenharmony_ci * finishing a recording job is when we are recording a secondary 1579bf215546Sopenharmony_ci * inside a render pass. 1580bf215546Sopenharmony_ci */ 1581bf215546Sopenharmony_ci if (cmd_buffer->state.job) { 1582bf215546Sopenharmony_ci assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && 1583bf215546Sopenharmony_ci cmd_buffer->state.pass); 1584bf215546Sopenharmony_ci v3dv_cmd_buffer_finish_job(cmd_buffer); 1585bf215546Sopenharmony_ci } 1586bf215546Sopenharmony_ci 1587bf215546Sopenharmony_ci cmd_buffer->status = V3DV_CMD_BUFFER_STATUS_EXECUTABLE; 1588bf215546Sopenharmony_ci 1589bf215546Sopenharmony_ci return VK_SUCCESS; 1590bf215546Sopenharmony_ci} 1591bf215546Sopenharmony_ci 1592bf215546Sopenharmony_cistatic void 1593bf215546Sopenharmony_ciclone_bo_list(struct v3dv_cmd_buffer *cmd_buffer, 1594bf215546Sopenharmony_ci struct list_head *dst, 1595bf215546Sopenharmony_ci struct list_head *src) 1596bf215546Sopenharmony_ci{ 1597bf215546Sopenharmony_ci assert(cmd_buffer); 1598bf215546Sopenharmony_ci 1599bf215546Sopenharmony_ci list_inithead(dst); 1600bf215546Sopenharmony_ci list_for_each_entry(struct v3dv_bo, bo, src, list_link) { 1601bf215546Sopenharmony_ci struct v3dv_bo *clone_bo = 1602bf215546Sopenharmony_ci vk_alloc(&cmd_buffer->device->vk.alloc, sizeof(struct v3dv_bo), 8, 1603bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1604bf215546Sopenharmony_ci if (!clone_bo) { 1605bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 1606bf215546Sopenharmony_ci return; 1607bf215546Sopenharmony_ci } 1608bf215546Sopenharmony_ci 1609bf215546Sopenharmony_ci *clone_bo = *bo; 1610bf215546Sopenharmony_ci list_addtail(&clone_bo->list_link, dst); 1611bf215546Sopenharmony_ci } 1612bf215546Sopenharmony_ci} 1613bf215546Sopenharmony_ci 1614bf215546Sopenharmony_ci/* Clones a job for inclusion in the given command buffer. Note that this 1615bf215546Sopenharmony_ci * doesn't make a deep copy so the cloned job it doesn't own any resources. 1616bf215546Sopenharmony_ci * Useful when we need to have a job in more than one list, which happens 1617bf215546Sopenharmony_ci * for jobs recorded in secondary command buffers when we want to execute 1618bf215546Sopenharmony_ci * them in primaries. 1619bf215546Sopenharmony_ci */ 1620bf215546Sopenharmony_cistruct v3dv_job * 1621bf215546Sopenharmony_civ3dv_job_clone_in_cmd_buffer(struct v3dv_job *job, 1622bf215546Sopenharmony_ci struct v3dv_cmd_buffer *cmd_buffer) 1623bf215546Sopenharmony_ci{ 1624bf215546Sopenharmony_ci struct v3dv_job *clone_job = vk_alloc(&job->device->vk.alloc, 1625bf215546Sopenharmony_ci sizeof(struct v3dv_job), 8, 1626bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1627bf215546Sopenharmony_ci if (!clone_job) { 1628bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 1629bf215546Sopenharmony_ci return NULL; 1630bf215546Sopenharmony_ci } 1631bf215546Sopenharmony_ci 1632bf215546Sopenharmony_ci /* Cloned jobs don't duplicate resources! */ 1633bf215546Sopenharmony_ci *clone_job = *job; 1634bf215546Sopenharmony_ci clone_job->is_clone = true; 1635bf215546Sopenharmony_ci clone_job->cmd_buffer = cmd_buffer; 1636bf215546Sopenharmony_ci list_addtail(&clone_job->list_link, &cmd_buffer->jobs); 1637bf215546Sopenharmony_ci 1638bf215546Sopenharmony_ci /* We need to regen the BO lists so that they point to the BO list in the 1639bf215546Sopenharmony_ci * cloned job. Otherwise functions like list_length() will loop forever. 1640bf215546Sopenharmony_ci */ 1641bf215546Sopenharmony_ci if (job->type == V3DV_JOB_TYPE_GPU_CL) { 1642bf215546Sopenharmony_ci clone_bo_list(cmd_buffer, &clone_job->bcl.bo_list, &job->bcl.bo_list); 1643bf215546Sopenharmony_ci clone_bo_list(cmd_buffer, &clone_job->rcl.bo_list, &job->rcl.bo_list); 1644bf215546Sopenharmony_ci clone_bo_list(cmd_buffer, &clone_job->indirect.bo_list, 1645bf215546Sopenharmony_ci &job->indirect.bo_list); 1646bf215546Sopenharmony_ci } 1647bf215546Sopenharmony_ci 1648bf215546Sopenharmony_ci return clone_job; 1649bf215546Sopenharmony_ci} 1650bf215546Sopenharmony_ci 1651bf215546Sopenharmony_civoid 1652bf215546Sopenharmony_civ3dv_cmd_buffer_merge_barrier_state(struct v3dv_barrier_state *dst, 1653bf215546Sopenharmony_ci struct v3dv_barrier_state *src) 1654bf215546Sopenharmony_ci{ 1655bf215546Sopenharmony_ci dst->dst_mask |= src->dst_mask; 1656bf215546Sopenharmony_ci 1657bf215546Sopenharmony_ci dst->src_mask_graphics |= src->src_mask_graphics; 1658bf215546Sopenharmony_ci dst->src_mask_compute |= src->src_mask_compute; 1659bf215546Sopenharmony_ci dst->src_mask_transfer |= src->src_mask_transfer; 1660bf215546Sopenharmony_ci 1661bf215546Sopenharmony_ci dst->bcl_buffer_access |= src->bcl_buffer_access; 1662bf215546Sopenharmony_ci dst->bcl_image_access |= src->bcl_image_access; 1663bf215546Sopenharmony_ci} 1664bf215546Sopenharmony_ci 1665bf215546Sopenharmony_cistatic void 1666bf215546Sopenharmony_cicmd_buffer_execute_outside_pass(struct v3dv_cmd_buffer *primary, 1667bf215546Sopenharmony_ci uint32_t cmd_buffer_count, 1668bf215546Sopenharmony_ci const VkCommandBuffer *cmd_buffers) 1669bf215546Sopenharmony_ci{ 1670bf215546Sopenharmony_ci struct v3dv_barrier_state pending_barrier = { 0 }; 1671bf215546Sopenharmony_ci for (uint32_t i = 0; i < cmd_buffer_count; i++) { 1672bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]); 1673bf215546Sopenharmony_ci 1674bf215546Sopenharmony_ci assert(!(secondary->usage_flags & 1675bf215546Sopenharmony_ci VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)); 1676bf215546Sopenharmony_ci 1677bf215546Sopenharmony_ci /* Secondary command buffers that execute outside a render pass create 1678bf215546Sopenharmony_ci * complete jobs with an RCL and tile setup, so we simply want to merge 1679bf215546Sopenharmony_ci * their job list into the primary's. However, because they may be 1680bf215546Sopenharmony_ci * executed into multiple primaries at the same time and we only have a 1681bf215546Sopenharmony_ci * single list_link in each job, we can't just add then to the primary's 1682bf215546Sopenharmony_ci * job list and we instead have to clone them first. 1683bf215546Sopenharmony_ci * 1684bf215546Sopenharmony_ci * Alternatively, we could create a "execute secondary" CPU job that 1685bf215546Sopenharmony_ci * when executed in a queue, would submit all the jobs in the referenced 1686bf215546Sopenharmony_ci * secondary command buffer. However, this would raise some challenges 1687bf215546Sopenharmony_ci * to make it work with the implementation of wait threads in the queue 1688bf215546Sopenharmony_ci * which we use for event waits, for example. 1689bf215546Sopenharmony_ci */ 1690bf215546Sopenharmony_ci list_for_each_entry(struct v3dv_job, secondary_job, 1691bf215546Sopenharmony_ci &secondary->jobs, list_link) { 1692bf215546Sopenharmony_ci /* These can only happen inside a render pass */ 1693bf215546Sopenharmony_ci assert(secondary_job->type != V3DV_JOB_TYPE_GPU_CL_SECONDARY); 1694bf215546Sopenharmony_ci struct v3dv_job *job = v3dv_job_clone_in_cmd_buffer(secondary_job, primary); 1695bf215546Sopenharmony_ci if (!job) 1696bf215546Sopenharmony_ci return; 1697bf215546Sopenharmony_ci 1698bf215546Sopenharmony_ci if (pending_barrier.dst_mask) { 1699bf215546Sopenharmony_ci /* FIXME: do the same we do for primaries and only choose the 1700bf215546Sopenharmony_ci * relevant src masks. 1701bf215546Sopenharmony_ci */ 1702bf215546Sopenharmony_ci job->serialize = pending_barrier.src_mask_graphics | 1703bf215546Sopenharmony_ci pending_barrier.src_mask_transfer | 1704bf215546Sopenharmony_ci pending_barrier.src_mask_compute; 1705bf215546Sopenharmony_ci if (pending_barrier.bcl_buffer_access || 1706bf215546Sopenharmony_ci pending_barrier.bcl_image_access) { 1707bf215546Sopenharmony_ci job->needs_bcl_sync = true; 1708bf215546Sopenharmony_ci } 1709bf215546Sopenharmony_ci memset(&pending_barrier, 0, sizeof(pending_barrier)); 1710bf215546Sopenharmony_ci } 1711bf215546Sopenharmony_ci } 1712bf215546Sopenharmony_ci 1713bf215546Sopenharmony_ci /* If this secondary had any pending barrier state we will need that 1714bf215546Sopenharmony_ci * barrier state consumed with whatever comes after it (first job in 1715bf215546Sopenharmony_ci * the next secondary or the primary, if this was the last secondary). 1716bf215546Sopenharmony_ci */ 1717bf215546Sopenharmony_ci assert(secondary->state.barrier.dst_mask || 1718bf215546Sopenharmony_ci (!secondary->state.barrier.bcl_buffer_access && 1719bf215546Sopenharmony_ci !secondary->state.barrier.bcl_image_access)); 1720bf215546Sopenharmony_ci pending_barrier = secondary->state.barrier; 1721bf215546Sopenharmony_ci } 1722bf215546Sopenharmony_ci 1723bf215546Sopenharmony_ci if (pending_barrier.dst_mask) { 1724bf215546Sopenharmony_ci v3dv_cmd_buffer_merge_barrier_state(&primary->state.barrier, 1725bf215546Sopenharmony_ci &pending_barrier); 1726bf215546Sopenharmony_ci } 1727bf215546Sopenharmony_ci} 1728bf215546Sopenharmony_ci 1729bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1730bf215546Sopenharmony_civ3dv_CmdExecuteCommands(VkCommandBuffer commandBuffer, 1731bf215546Sopenharmony_ci uint32_t commandBufferCount, 1732bf215546Sopenharmony_ci const VkCommandBuffer *pCommandBuffers) 1733bf215546Sopenharmony_ci{ 1734bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, primary, commandBuffer); 1735bf215546Sopenharmony_ci 1736bf215546Sopenharmony_ci if (primary->state.pass != NULL) { 1737bf215546Sopenharmony_ci v3dv_X(primary->device, cmd_buffer_execute_inside_pass) 1738bf215546Sopenharmony_ci (primary, commandBufferCount, pCommandBuffers); 1739bf215546Sopenharmony_ci } else { 1740bf215546Sopenharmony_ci cmd_buffer_execute_outside_pass(primary, 1741bf215546Sopenharmony_ci commandBufferCount, pCommandBuffers); 1742bf215546Sopenharmony_ci } 1743bf215546Sopenharmony_ci} 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_ci/* This goes though the list of possible dynamic states in the pipeline and, 1746bf215546Sopenharmony_ci * for those that are not configured as dynamic, copies relevant state into 1747bf215546Sopenharmony_ci * the command buffer. 1748bf215546Sopenharmony_ci */ 1749bf215546Sopenharmony_cistatic void 1750bf215546Sopenharmony_cicmd_buffer_bind_pipeline_static_state(struct v3dv_cmd_buffer *cmd_buffer, 1751bf215546Sopenharmony_ci const struct v3dv_dynamic_state *src) 1752bf215546Sopenharmony_ci{ 1753bf215546Sopenharmony_ci struct v3dv_dynamic_state *dest = &cmd_buffer->state.dynamic; 1754bf215546Sopenharmony_ci uint32_t dynamic_mask = src->mask; 1755bf215546Sopenharmony_ci uint32_t dirty = 0; 1756bf215546Sopenharmony_ci 1757bf215546Sopenharmony_ci if (!(dynamic_mask & V3DV_DYNAMIC_VIEWPORT)) { 1758bf215546Sopenharmony_ci dest->viewport.count = src->viewport.count; 1759bf215546Sopenharmony_ci if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, 1760bf215546Sopenharmony_ci src->viewport.count * sizeof(VkViewport))) { 1761bf215546Sopenharmony_ci typed_memcpy(dest->viewport.viewports, 1762bf215546Sopenharmony_ci src->viewport.viewports, 1763bf215546Sopenharmony_ci src->viewport.count); 1764bf215546Sopenharmony_ci typed_memcpy(dest->viewport.scale, src->viewport.scale, 1765bf215546Sopenharmony_ci src->viewport.count); 1766bf215546Sopenharmony_ci typed_memcpy(dest->viewport.translate, src->viewport.translate, 1767bf215546Sopenharmony_ci src->viewport.count); 1768bf215546Sopenharmony_ci dirty |= V3DV_CMD_DIRTY_VIEWPORT; 1769bf215546Sopenharmony_ci } 1770bf215546Sopenharmony_ci } 1771bf215546Sopenharmony_ci 1772bf215546Sopenharmony_ci if (!(dynamic_mask & V3DV_DYNAMIC_SCISSOR)) { 1773bf215546Sopenharmony_ci dest->scissor.count = src->scissor.count; 1774bf215546Sopenharmony_ci if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, 1775bf215546Sopenharmony_ci src->scissor.count * sizeof(VkRect2D))) { 1776bf215546Sopenharmony_ci typed_memcpy(dest->scissor.scissors, 1777bf215546Sopenharmony_ci src->scissor.scissors, src->scissor.count); 1778bf215546Sopenharmony_ci dirty |= V3DV_CMD_DIRTY_SCISSOR; 1779bf215546Sopenharmony_ci } 1780bf215546Sopenharmony_ci } 1781bf215546Sopenharmony_ci 1782bf215546Sopenharmony_ci if (!(dynamic_mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) { 1783bf215546Sopenharmony_ci if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask, 1784bf215546Sopenharmony_ci sizeof(src->stencil_compare_mask))) { 1785bf215546Sopenharmony_ci dest->stencil_compare_mask = src->stencil_compare_mask; 1786bf215546Sopenharmony_ci dirty |= V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK; 1787bf215546Sopenharmony_ci } 1788bf215546Sopenharmony_ci } 1789bf215546Sopenharmony_ci 1790bf215546Sopenharmony_ci if (!(dynamic_mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) { 1791bf215546Sopenharmony_ci if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask, 1792bf215546Sopenharmony_ci sizeof(src->stencil_write_mask))) { 1793bf215546Sopenharmony_ci dest->stencil_write_mask = src->stencil_write_mask; 1794bf215546Sopenharmony_ci dirty |= V3DV_CMD_DIRTY_STENCIL_WRITE_MASK; 1795bf215546Sopenharmony_ci } 1796bf215546Sopenharmony_ci } 1797bf215546Sopenharmony_ci 1798bf215546Sopenharmony_ci if (!(dynamic_mask & V3DV_DYNAMIC_STENCIL_REFERENCE)) { 1799bf215546Sopenharmony_ci if (memcmp(&dest->stencil_reference, &src->stencil_reference, 1800bf215546Sopenharmony_ci sizeof(src->stencil_reference))) { 1801bf215546Sopenharmony_ci dest->stencil_reference = src->stencil_reference; 1802bf215546Sopenharmony_ci dirty |= V3DV_CMD_DIRTY_STENCIL_REFERENCE; 1803bf215546Sopenharmony_ci } 1804bf215546Sopenharmony_ci } 1805bf215546Sopenharmony_ci 1806bf215546Sopenharmony_ci if (!(dynamic_mask & V3DV_DYNAMIC_BLEND_CONSTANTS)) { 1807bf215546Sopenharmony_ci if (memcmp(dest->blend_constants, src->blend_constants, 1808bf215546Sopenharmony_ci sizeof(src->blend_constants))) { 1809bf215546Sopenharmony_ci memcpy(dest->blend_constants, src->blend_constants, 1810bf215546Sopenharmony_ci sizeof(src->blend_constants)); 1811bf215546Sopenharmony_ci dirty |= V3DV_CMD_DIRTY_BLEND_CONSTANTS; 1812bf215546Sopenharmony_ci } 1813bf215546Sopenharmony_ci } 1814bf215546Sopenharmony_ci 1815bf215546Sopenharmony_ci if (!(dynamic_mask & V3DV_DYNAMIC_DEPTH_BIAS)) { 1816bf215546Sopenharmony_ci if (memcmp(&dest->depth_bias, &src->depth_bias, 1817bf215546Sopenharmony_ci sizeof(src->depth_bias))) { 1818bf215546Sopenharmony_ci memcpy(&dest->depth_bias, &src->depth_bias, sizeof(src->depth_bias)); 1819bf215546Sopenharmony_ci dirty |= V3DV_CMD_DIRTY_DEPTH_BIAS; 1820bf215546Sopenharmony_ci } 1821bf215546Sopenharmony_ci } 1822bf215546Sopenharmony_ci 1823bf215546Sopenharmony_ci if (!(dynamic_mask & V3DV_DYNAMIC_LINE_WIDTH)) { 1824bf215546Sopenharmony_ci if (dest->line_width != src->line_width) { 1825bf215546Sopenharmony_ci dest->line_width = src->line_width; 1826bf215546Sopenharmony_ci dirty |= V3DV_CMD_DIRTY_LINE_WIDTH; 1827bf215546Sopenharmony_ci } 1828bf215546Sopenharmony_ci } 1829bf215546Sopenharmony_ci 1830bf215546Sopenharmony_ci if (!(dynamic_mask & V3DV_DYNAMIC_COLOR_WRITE_ENABLE)) { 1831bf215546Sopenharmony_ci if (dest->color_write_enable != src->color_write_enable) { 1832bf215546Sopenharmony_ci dest->color_write_enable = src->color_write_enable; 1833bf215546Sopenharmony_ci dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE; 1834bf215546Sopenharmony_ci } 1835bf215546Sopenharmony_ci } 1836bf215546Sopenharmony_ci 1837bf215546Sopenharmony_ci cmd_buffer->state.dynamic.mask = dynamic_mask; 1838bf215546Sopenharmony_ci cmd_buffer->state.dirty |= dirty; 1839bf215546Sopenharmony_ci} 1840bf215546Sopenharmony_ci 1841bf215546Sopenharmony_cistatic void 1842bf215546Sopenharmony_cibind_graphics_pipeline(struct v3dv_cmd_buffer *cmd_buffer, 1843bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline) 1844bf215546Sopenharmony_ci{ 1845bf215546Sopenharmony_ci assert(pipeline && !(pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT)); 1846bf215546Sopenharmony_ci if (cmd_buffer->state.gfx.pipeline == pipeline) 1847bf215546Sopenharmony_ci return; 1848bf215546Sopenharmony_ci 1849bf215546Sopenharmony_ci cmd_buffer->state.gfx.pipeline = pipeline; 1850bf215546Sopenharmony_ci 1851bf215546Sopenharmony_ci cmd_buffer_bind_pipeline_static_state(cmd_buffer, &pipeline->dynamic_state); 1852bf215546Sopenharmony_ci 1853bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PIPELINE; 1854bf215546Sopenharmony_ci} 1855bf215546Sopenharmony_ci 1856bf215546Sopenharmony_cistatic void 1857bf215546Sopenharmony_cibind_compute_pipeline(struct v3dv_cmd_buffer *cmd_buffer, 1858bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline) 1859bf215546Sopenharmony_ci{ 1860bf215546Sopenharmony_ci assert(pipeline && pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); 1861bf215546Sopenharmony_ci 1862bf215546Sopenharmony_ci if (cmd_buffer->state.compute.pipeline == pipeline) 1863bf215546Sopenharmony_ci return; 1864bf215546Sopenharmony_ci 1865bf215546Sopenharmony_ci cmd_buffer->state.compute.pipeline = pipeline; 1866bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_PIPELINE; 1867bf215546Sopenharmony_ci} 1868bf215546Sopenharmony_ci 1869bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1870bf215546Sopenharmony_civ3dv_CmdBindPipeline(VkCommandBuffer commandBuffer, 1871bf215546Sopenharmony_ci VkPipelineBindPoint pipelineBindPoint, 1872bf215546Sopenharmony_ci VkPipeline _pipeline) 1873bf215546Sopenharmony_ci{ 1874bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 1875bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline); 1876bf215546Sopenharmony_ci 1877bf215546Sopenharmony_ci switch (pipelineBindPoint) { 1878bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_COMPUTE: 1879bf215546Sopenharmony_ci bind_compute_pipeline(cmd_buffer, pipeline); 1880bf215546Sopenharmony_ci break; 1881bf215546Sopenharmony_ci 1882bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_GRAPHICS: 1883bf215546Sopenharmony_ci bind_graphics_pipeline(cmd_buffer, pipeline); 1884bf215546Sopenharmony_ci break; 1885bf215546Sopenharmony_ci 1886bf215546Sopenharmony_ci default: 1887bf215546Sopenharmony_ci assert(!"invalid bind point"); 1888bf215546Sopenharmony_ci break; 1889bf215546Sopenharmony_ci } 1890bf215546Sopenharmony_ci} 1891bf215546Sopenharmony_ci 1892bf215546Sopenharmony_ci/* FIXME: C&P from radv. tu has similar code. Perhaps common place? */ 1893bf215546Sopenharmony_civoid 1894bf215546Sopenharmony_civ3dv_viewport_compute_xform(const VkViewport *viewport, 1895bf215546Sopenharmony_ci float scale[3], 1896bf215546Sopenharmony_ci float translate[3]) 1897bf215546Sopenharmony_ci{ 1898bf215546Sopenharmony_ci float x = viewport->x; 1899bf215546Sopenharmony_ci float y = viewport->y; 1900bf215546Sopenharmony_ci float half_width = 0.5f * viewport->width; 1901bf215546Sopenharmony_ci float half_height = 0.5f * viewport->height; 1902bf215546Sopenharmony_ci double n = viewport->minDepth; 1903bf215546Sopenharmony_ci double f = viewport->maxDepth; 1904bf215546Sopenharmony_ci 1905bf215546Sopenharmony_ci scale[0] = half_width; 1906bf215546Sopenharmony_ci translate[0] = half_width + x; 1907bf215546Sopenharmony_ci scale[1] = half_height; 1908bf215546Sopenharmony_ci translate[1] = half_height + y; 1909bf215546Sopenharmony_ci 1910bf215546Sopenharmony_ci scale[2] = (f - n); 1911bf215546Sopenharmony_ci translate[2] = n; 1912bf215546Sopenharmony_ci 1913bf215546Sopenharmony_ci /* It seems that if the scale is small enough the hardware won't clip 1914bf215546Sopenharmony_ci * correctly so we work around this my choosing the smallest scale that 1915bf215546Sopenharmony_ci * seems to work. 1916bf215546Sopenharmony_ci * 1917bf215546Sopenharmony_ci * This case is exercised by CTS: 1918bf215546Sopenharmony_ci * dEQP-VK.draw.inverted_depth_ranges.nodepthclamp_deltazero 1919bf215546Sopenharmony_ci */ 1920bf215546Sopenharmony_ci const float min_abs_scale = 0.000009f; 1921bf215546Sopenharmony_ci if (fabs(scale[2]) < min_abs_scale) 1922bf215546Sopenharmony_ci scale[2] = min_abs_scale * (scale[2] < 0 ? -1.0f : 1.0f); 1923bf215546Sopenharmony_ci} 1924bf215546Sopenharmony_ci 1925bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1926bf215546Sopenharmony_civ3dv_CmdSetViewport(VkCommandBuffer commandBuffer, 1927bf215546Sopenharmony_ci uint32_t firstViewport, 1928bf215546Sopenharmony_ci uint32_t viewportCount, 1929bf215546Sopenharmony_ci const VkViewport *pViewports) 1930bf215546Sopenharmony_ci{ 1931bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 1932bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1933bf215546Sopenharmony_ci const uint32_t total_count = firstViewport + viewportCount; 1934bf215546Sopenharmony_ci 1935bf215546Sopenharmony_ci assert(firstViewport < MAX_VIEWPORTS); 1936bf215546Sopenharmony_ci assert(total_count >= 1 && total_count <= MAX_VIEWPORTS); 1937bf215546Sopenharmony_ci 1938bf215546Sopenharmony_ci if (state->dynamic.viewport.count < total_count) 1939bf215546Sopenharmony_ci state->dynamic.viewport.count = total_count; 1940bf215546Sopenharmony_ci 1941bf215546Sopenharmony_ci if (!memcmp(state->dynamic.viewport.viewports + firstViewport, 1942bf215546Sopenharmony_ci pViewports, viewportCount * sizeof(*pViewports))) { 1943bf215546Sopenharmony_ci return; 1944bf215546Sopenharmony_ci } 1945bf215546Sopenharmony_ci 1946bf215546Sopenharmony_ci memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports, 1947bf215546Sopenharmony_ci viewportCount * sizeof(*pViewports)); 1948bf215546Sopenharmony_ci 1949bf215546Sopenharmony_ci for (uint32_t i = firstViewport; i < total_count; i++) { 1950bf215546Sopenharmony_ci v3dv_viewport_compute_xform(&state->dynamic.viewport.viewports[i], 1951bf215546Sopenharmony_ci state->dynamic.viewport.scale[i], 1952bf215546Sopenharmony_ci state->dynamic.viewport.translate[i]); 1953bf215546Sopenharmony_ci } 1954bf215546Sopenharmony_ci 1955bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEWPORT; 1956bf215546Sopenharmony_ci} 1957bf215546Sopenharmony_ci 1958bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1959bf215546Sopenharmony_civ3dv_CmdSetScissor(VkCommandBuffer commandBuffer, 1960bf215546Sopenharmony_ci uint32_t firstScissor, 1961bf215546Sopenharmony_ci uint32_t scissorCount, 1962bf215546Sopenharmony_ci const VkRect2D *pScissors) 1963bf215546Sopenharmony_ci{ 1964bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 1965bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1966bf215546Sopenharmony_ci 1967bf215546Sopenharmony_ci assert(firstScissor < MAX_SCISSORS); 1968bf215546Sopenharmony_ci assert(firstScissor + scissorCount >= 1 && 1969bf215546Sopenharmony_ci firstScissor + scissorCount <= MAX_SCISSORS); 1970bf215546Sopenharmony_ci 1971bf215546Sopenharmony_ci if (state->dynamic.scissor.count < firstScissor + scissorCount) 1972bf215546Sopenharmony_ci state->dynamic.scissor.count = firstScissor + scissorCount; 1973bf215546Sopenharmony_ci 1974bf215546Sopenharmony_ci if (!memcmp(state->dynamic.scissor.scissors + firstScissor, 1975bf215546Sopenharmony_ci pScissors, scissorCount * sizeof(*pScissors))) { 1976bf215546Sopenharmony_ci return; 1977bf215546Sopenharmony_ci } 1978bf215546Sopenharmony_ci 1979bf215546Sopenharmony_ci memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors, 1980bf215546Sopenharmony_ci scissorCount * sizeof(*pScissors)); 1981bf215546Sopenharmony_ci 1982bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_SCISSOR; 1983bf215546Sopenharmony_ci} 1984bf215546Sopenharmony_ci 1985bf215546Sopenharmony_cistatic void 1986bf215546Sopenharmony_ciemit_scissor(struct v3dv_cmd_buffer *cmd_buffer) 1987bf215546Sopenharmony_ci{ 1988bf215546Sopenharmony_ci if (cmd_buffer->state.dynamic.viewport.count == 0) 1989bf215546Sopenharmony_ci return; 1990bf215546Sopenharmony_ci 1991bf215546Sopenharmony_ci struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 1992bf215546Sopenharmony_ci 1993bf215546Sopenharmony_ci /* FIXME: right now we only support one viewport. viewporst[0] would work 1994bf215546Sopenharmony_ci * now, but would need to change if we allow multiple viewports. 1995bf215546Sopenharmony_ci */ 1996bf215546Sopenharmony_ci float *vptranslate = dynamic->viewport.translate[0]; 1997bf215546Sopenharmony_ci float *vpscale = dynamic->viewport.scale[0]; 1998bf215546Sopenharmony_ci 1999bf215546Sopenharmony_ci float vp_minx = -fabsf(vpscale[0]) + vptranslate[0]; 2000bf215546Sopenharmony_ci float vp_maxx = fabsf(vpscale[0]) + vptranslate[0]; 2001bf215546Sopenharmony_ci float vp_miny = -fabsf(vpscale[1]) + vptranslate[1]; 2002bf215546Sopenharmony_ci float vp_maxy = fabsf(vpscale[1]) + vptranslate[1]; 2003bf215546Sopenharmony_ci 2004bf215546Sopenharmony_ci /* Quoting from v3dx_emit: 2005bf215546Sopenharmony_ci * "Clip to the scissor if it's enabled, but still clip to the 2006bf215546Sopenharmony_ci * drawable regardless since that controls where the binner 2007bf215546Sopenharmony_ci * tries to put things. 2008bf215546Sopenharmony_ci * 2009bf215546Sopenharmony_ci * Additionally, always clip the rendering to the viewport, 2010bf215546Sopenharmony_ci * since the hardware does guardband clipping, meaning 2011bf215546Sopenharmony_ci * primitives would rasterize outside of the view volume." 2012bf215546Sopenharmony_ci */ 2013bf215546Sopenharmony_ci uint32_t minx, miny, maxx, maxy; 2014bf215546Sopenharmony_ci 2015bf215546Sopenharmony_ci /* From the Vulkan spec: 2016bf215546Sopenharmony_ci * 2017bf215546Sopenharmony_ci * "The application must ensure (using scissor if necessary) that all 2018bf215546Sopenharmony_ci * rendering is contained within the render area. The render area must be 2019bf215546Sopenharmony_ci * contained within the framebuffer dimensions." 2020bf215546Sopenharmony_ci * 2021bf215546Sopenharmony_ci * So it is the application's responsibility to ensure this. Still, we can 2022bf215546Sopenharmony_ci * help by automatically restricting the scissor rect to the render area. 2023bf215546Sopenharmony_ci */ 2024bf215546Sopenharmony_ci minx = MAX2(vp_minx, cmd_buffer->state.render_area.offset.x); 2025bf215546Sopenharmony_ci miny = MAX2(vp_miny, cmd_buffer->state.render_area.offset.y); 2026bf215546Sopenharmony_ci maxx = MIN2(vp_maxx, cmd_buffer->state.render_area.offset.x + 2027bf215546Sopenharmony_ci cmd_buffer->state.render_area.extent.width); 2028bf215546Sopenharmony_ci maxy = MIN2(vp_maxy, cmd_buffer->state.render_area.offset.y + 2029bf215546Sopenharmony_ci cmd_buffer->state.render_area.extent.height); 2030bf215546Sopenharmony_ci 2031bf215546Sopenharmony_ci minx = vp_minx; 2032bf215546Sopenharmony_ci miny = vp_miny; 2033bf215546Sopenharmony_ci maxx = vp_maxx; 2034bf215546Sopenharmony_ci maxy = vp_maxy; 2035bf215546Sopenharmony_ci 2036bf215546Sopenharmony_ci /* Clip against user provided scissor if needed. 2037bf215546Sopenharmony_ci * 2038bf215546Sopenharmony_ci * FIXME: right now we only allow one scissor. Below would need to be 2039bf215546Sopenharmony_ci * updated if we support more 2040bf215546Sopenharmony_ci */ 2041bf215546Sopenharmony_ci if (dynamic->scissor.count > 0) { 2042bf215546Sopenharmony_ci VkRect2D *scissor = &dynamic->scissor.scissors[0]; 2043bf215546Sopenharmony_ci minx = MAX2(minx, scissor->offset.x); 2044bf215546Sopenharmony_ci miny = MAX2(miny, scissor->offset.y); 2045bf215546Sopenharmony_ci maxx = MIN2(maxx, scissor->offset.x + scissor->extent.width); 2046bf215546Sopenharmony_ci maxy = MIN2(maxy, scissor->offset.y + scissor->extent.height); 2047bf215546Sopenharmony_ci } 2048bf215546Sopenharmony_ci 2049bf215546Sopenharmony_ci /* If the scissor is outside the viewport area we end up with 2050bf215546Sopenharmony_ci * min{x,y} > max{x,y}. 2051bf215546Sopenharmony_ci */ 2052bf215546Sopenharmony_ci if (minx > maxx) 2053bf215546Sopenharmony_ci maxx = minx; 2054bf215546Sopenharmony_ci if (miny > maxy) 2055bf215546Sopenharmony_ci maxy = miny; 2056bf215546Sopenharmony_ci 2057bf215546Sopenharmony_ci cmd_buffer->state.clip_window.offset.x = minx; 2058bf215546Sopenharmony_ci cmd_buffer->state.clip_window.offset.y = miny; 2059bf215546Sopenharmony_ci cmd_buffer->state.clip_window.extent.width = maxx - minx; 2060bf215546Sopenharmony_ci cmd_buffer->state.clip_window.extent.height = maxy - miny; 2061bf215546Sopenharmony_ci 2062bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, job_emit_clip_window) 2063bf215546Sopenharmony_ci (cmd_buffer->state.job, &cmd_buffer->state.clip_window); 2064bf215546Sopenharmony_ci 2065bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_SCISSOR; 2066bf215546Sopenharmony_ci} 2067bf215546Sopenharmony_ci 2068bf215546Sopenharmony_cistatic void 2069bf215546Sopenharmony_ciupdate_gfx_uniform_state(struct v3dv_cmd_buffer *cmd_buffer, 2070bf215546Sopenharmony_ci uint32_t dirty_uniform_state) 2071bf215546Sopenharmony_ci{ 2072bf215546Sopenharmony_ci /* We need to update uniform streams if any piece of state that is passed 2073bf215546Sopenharmony_ci * to the shader as a uniform may have changed. 2074bf215546Sopenharmony_ci * 2075bf215546Sopenharmony_ci * If only descriptor sets are dirty then we can safely ignore updates 2076bf215546Sopenharmony_ci * for shader stages that don't access descriptors. 2077bf215546Sopenharmony_ci */ 2078bf215546Sopenharmony_ci 2079bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 2080bf215546Sopenharmony_ci assert(pipeline); 2081bf215546Sopenharmony_ci 2082bf215546Sopenharmony_ci const bool has_new_pipeline = dirty_uniform_state & V3DV_CMD_DIRTY_PIPELINE; 2083bf215546Sopenharmony_ci const bool has_new_viewport = dirty_uniform_state & V3DV_CMD_DIRTY_VIEWPORT; 2084bf215546Sopenharmony_ci const bool has_new_push_constants = dirty_uniform_state & V3DV_CMD_DIRTY_PUSH_CONSTANTS; 2085bf215546Sopenharmony_ci const bool has_new_descriptors = dirty_uniform_state & V3DV_CMD_DIRTY_DESCRIPTOR_SETS; 2086bf215546Sopenharmony_ci const bool has_new_view_index = dirty_uniform_state & V3DV_CMD_DIRTY_VIEW_INDEX; 2087bf215546Sopenharmony_ci 2088bf215546Sopenharmony_ci /* VK_SHADER_STAGE_FRAGMENT_BIT */ 2089bf215546Sopenharmony_ci const bool has_new_descriptors_fs = 2090bf215546Sopenharmony_ci has_new_descriptors && 2091bf215546Sopenharmony_ci (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_FRAGMENT_BIT); 2092bf215546Sopenharmony_ci 2093bf215546Sopenharmony_ci const bool has_new_push_constants_fs = 2094bf215546Sopenharmony_ci has_new_push_constants && 2095bf215546Sopenharmony_ci (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_FRAGMENT_BIT); 2096bf215546Sopenharmony_ci 2097bf215546Sopenharmony_ci const bool needs_fs_update = has_new_pipeline || 2098bf215546Sopenharmony_ci has_new_view_index || 2099bf215546Sopenharmony_ci has_new_push_constants_fs || 2100bf215546Sopenharmony_ci has_new_descriptors_fs; 2101bf215546Sopenharmony_ci 2102bf215546Sopenharmony_ci if (needs_fs_update) { 2103bf215546Sopenharmony_ci struct v3dv_shader_variant *fs_variant = 2104bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 2105bf215546Sopenharmony_ci 2106bf215546Sopenharmony_ci cmd_buffer->state.uniforms.fs = 2107bf215546Sopenharmony_ci v3dv_write_uniforms(cmd_buffer, pipeline, fs_variant); 2108bf215546Sopenharmony_ci } 2109bf215546Sopenharmony_ci 2110bf215546Sopenharmony_ci /* VK_SHADER_STAGE_GEOMETRY_BIT */ 2111bf215546Sopenharmony_ci if (pipeline->has_gs) { 2112bf215546Sopenharmony_ci const bool has_new_descriptors_gs = 2113bf215546Sopenharmony_ci has_new_descriptors && 2114bf215546Sopenharmony_ci (cmd_buffer->state.dirty_descriptor_stages & 2115bf215546Sopenharmony_ci VK_SHADER_STAGE_GEOMETRY_BIT); 2116bf215546Sopenharmony_ci 2117bf215546Sopenharmony_ci const bool has_new_push_constants_gs = 2118bf215546Sopenharmony_ci has_new_push_constants && 2119bf215546Sopenharmony_ci (cmd_buffer->state.dirty_push_constants_stages & 2120bf215546Sopenharmony_ci VK_SHADER_STAGE_GEOMETRY_BIT); 2121bf215546Sopenharmony_ci 2122bf215546Sopenharmony_ci const bool needs_gs_update = has_new_viewport || 2123bf215546Sopenharmony_ci has_new_view_index || 2124bf215546Sopenharmony_ci has_new_pipeline || 2125bf215546Sopenharmony_ci has_new_push_constants_gs || 2126bf215546Sopenharmony_ci has_new_descriptors_gs; 2127bf215546Sopenharmony_ci 2128bf215546Sopenharmony_ci if (needs_gs_update) { 2129bf215546Sopenharmony_ci struct v3dv_shader_variant *gs_variant = 2130bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; 2131bf215546Sopenharmony_ci 2132bf215546Sopenharmony_ci struct v3dv_shader_variant *gs_bin_variant = 2133bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; 2134bf215546Sopenharmony_ci 2135bf215546Sopenharmony_ci cmd_buffer->state.uniforms.gs = 2136bf215546Sopenharmony_ci v3dv_write_uniforms(cmd_buffer, pipeline, gs_variant); 2137bf215546Sopenharmony_ci 2138bf215546Sopenharmony_ci cmd_buffer->state.uniforms.gs_bin = 2139bf215546Sopenharmony_ci v3dv_write_uniforms(cmd_buffer, pipeline, gs_bin_variant); 2140bf215546Sopenharmony_ci } 2141bf215546Sopenharmony_ci } 2142bf215546Sopenharmony_ci 2143bf215546Sopenharmony_ci /* VK_SHADER_STAGE_VERTEX_BIT */ 2144bf215546Sopenharmony_ci const bool has_new_descriptors_vs = 2145bf215546Sopenharmony_ci has_new_descriptors && 2146bf215546Sopenharmony_ci (cmd_buffer->state.dirty_descriptor_stages & VK_SHADER_STAGE_VERTEX_BIT); 2147bf215546Sopenharmony_ci 2148bf215546Sopenharmony_ci const bool has_new_push_constants_vs = 2149bf215546Sopenharmony_ci has_new_push_constants && 2150bf215546Sopenharmony_ci (cmd_buffer->state.dirty_push_constants_stages & VK_SHADER_STAGE_VERTEX_BIT); 2151bf215546Sopenharmony_ci 2152bf215546Sopenharmony_ci const bool needs_vs_update = has_new_viewport || 2153bf215546Sopenharmony_ci has_new_view_index || 2154bf215546Sopenharmony_ci has_new_pipeline || 2155bf215546Sopenharmony_ci has_new_push_constants_vs || 2156bf215546Sopenharmony_ci has_new_descriptors_vs; 2157bf215546Sopenharmony_ci 2158bf215546Sopenharmony_ci if (needs_vs_update) { 2159bf215546Sopenharmony_ci struct v3dv_shader_variant *vs_variant = 2160bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; 2161bf215546Sopenharmony_ci 2162bf215546Sopenharmony_ci struct v3dv_shader_variant *vs_bin_variant = 2163bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; 2164bf215546Sopenharmony_ci 2165bf215546Sopenharmony_ci cmd_buffer->state.uniforms.vs = 2166bf215546Sopenharmony_ci v3dv_write_uniforms(cmd_buffer, pipeline, vs_variant); 2167bf215546Sopenharmony_ci 2168bf215546Sopenharmony_ci cmd_buffer->state.uniforms.vs_bin = 2169bf215546Sopenharmony_ci v3dv_write_uniforms(cmd_buffer, pipeline, vs_bin_variant); 2170bf215546Sopenharmony_ci } 2171bf215546Sopenharmony_ci 2172bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEW_INDEX; 2173bf215546Sopenharmony_ci} 2174bf215546Sopenharmony_ci 2175bf215546Sopenharmony_ci/* This stores command buffer state that we might be about to stomp for 2176bf215546Sopenharmony_ci * a meta operation. 2177bf215546Sopenharmony_ci */ 2178bf215546Sopenharmony_civoid 2179bf215546Sopenharmony_civ3dv_cmd_buffer_meta_state_push(struct v3dv_cmd_buffer *cmd_buffer, 2180bf215546Sopenharmony_ci bool push_descriptor_state) 2181bf215546Sopenharmony_ci{ 2182bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 2183bf215546Sopenharmony_ci 2184bf215546Sopenharmony_ci if (state->subpass_idx != -1) { 2185bf215546Sopenharmony_ci state->meta.subpass_idx = state->subpass_idx; 2186bf215546Sopenharmony_ci state->meta.framebuffer = v3dv_framebuffer_to_handle(state->framebuffer); 2187bf215546Sopenharmony_ci state->meta.pass = v3dv_render_pass_to_handle(state->pass); 2188bf215546Sopenharmony_ci 2189bf215546Sopenharmony_ci const uint32_t attachment_state_item_size = 2190bf215546Sopenharmony_ci sizeof(struct v3dv_cmd_buffer_attachment_state); 2191bf215546Sopenharmony_ci const uint32_t attachment_state_total_size = 2192bf215546Sopenharmony_ci attachment_state_item_size * state->attachment_alloc_count; 2193bf215546Sopenharmony_ci if (state->meta.attachment_alloc_count < state->attachment_alloc_count) { 2194bf215546Sopenharmony_ci if (state->meta.attachment_alloc_count > 0) 2195bf215546Sopenharmony_ci vk_free(&cmd_buffer->device->vk.alloc, state->meta.attachments); 2196bf215546Sopenharmony_ci 2197bf215546Sopenharmony_ci state->meta.attachments = vk_zalloc(&cmd_buffer->device->vk.alloc, 2198bf215546Sopenharmony_ci attachment_state_total_size, 8, 2199bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 2200bf215546Sopenharmony_ci if (!state->meta.attachments) { 2201bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 2202bf215546Sopenharmony_ci return; 2203bf215546Sopenharmony_ci } 2204bf215546Sopenharmony_ci state->meta.attachment_alloc_count = state->attachment_alloc_count; 2205bf215546Sopenharmony_ci } 2206bf215546Sopenharmony_ci state->meta.attachment_count = state->attachment_alloc_count; 2207bf215546Sopenharmony_ci memcpy(state->meta.attachments, state->attachments, 2208bf215546Sopenharmony_ci attachment_state_total_size); 2209bf215546Sopenharmony_ci 2210bf215546Sopenharmony_ci state->meta.tile_aligned_render_area = state->tile_aligned_render_area; 2211bf215546Sopenharmony_ci memcpy(&state->meta.render_area, &state->render_area, sizeof(VkRect2D)); 2212bf215546Sopenharmony_ci } 2213bf215546Sopenharmony_ci 2214bf215546Sopenharmony_ci /* We expect that meta operations are graphics-only, so we only take into 2215bf215546Sopenharmony_ci * account the graphics pipeline, and the graphics state 2216bf215546Sopenharmony_ci */ 2217bf215546Sopenharmony_ci state->meta.gfx.pipeline = state->gfx.pipeline; 2218bf215546Sopenharmony_ci memcpy(&state->meta.dynamic, &state->dynamic, sizeof(state->dynamic)); 2219bf215546Sopenharmony_ci 2220bf215546Sopenharmony_ci struct v3dv_descriptor_state *gfx_descriptor_state = 2221bf215546Sopenharmony_ci &cmd_buffer->state.gfx.descriptor_state; 2222bf215546Sopenharmony_ci 2223bf215546Sopenharmony_ci if (push_descriptor_state) { 2224bf215546Sopenharmony_ci if (gfx_descriptor_state->valid != 0) { 2225bf215546Sopenharmony_ci memcpy(&state->meta.gfx.descriptor_state, gfx_descriptor_state, 2226bf215546Sopenharmony_ci sizeof(state->gfx.descriptor_state)); 2227bf215546Sopenharmony_ci } 2228bf215546Sopenharmony_ci state->meta.has_descriptor_state = true; 2229bf215546Sopenharmony_ci } else { 2230bf215546Sopenharmony_ci state->meta.has_descriptor_state = false; 2231bf215546Sopenharmony_ci } 2232bf215546Sopenharmony_ci 2233bf215546Sopenharmony_ci if (cmd_buffer->state.push_constants_size > 0) { 2234bf215546Sopenharmony_ci state->meta.push_constants_size = cmd_buffer->state.push_constants_size; 2235bf215546Sopenharmony_ci memcpy(state->meta.push_constants, cmd_buffer->state.push_constants_data, 2236bf215546Sopenharmony_ci cmd_buffer->state.push_constants_size); 2237bf215546Sopenharmony_ci cmd_buffer->state.push_constants_size = 0; 2238bf215546Sopenharmony_ci } 2239bf215546Sopenharmony_ci} 2240bf215546Sopenharmony_ci 2241bf215546Sopenharmony_ci/* This restores command buffer state after a meta operation 2242bf215546Sopenharmony_ci */ 2243bf215546Sopenharmony_civoid 2244bf215546Sopenharmony_civ3dv_cmd_buffer_meta_state_pop(struct v3dv_cmd_buffer *cmd_buffer, 2245bf215546Sopenharmony_ci uint32_t dirty_dynamic_state, 2246bf215546Sopenharmony_ci bool needs_subpass_resume) 2247bf215546Sopenharmony_ci{ 2248bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 2249bf215546Sopenharmony_ci 2250bf215546Sopenharmony_ci if (state->meta.subpass_idx != -1) { 2251bf215546Sopenharmony_ci state->pass = v3dv_render_pass_from_handle(state->meta.pass); 2252bf215546Sopenharmony_ci state->framebuffer = v3dv_framebuffer_from_handle(state->meta.framebuffer); 2253bf215546Sopenharmony_ci 2254bf215546Sopenharmony_ci assert(state->meta.attachment_count <= state->attachment_alloc_count); 2255bf215546Sopenharmony_ci const uint32_t attachment_state_item_size = 2256bf215546Sopenharmony_ci sizeof(struct v3dv_cmd_buffer_attachment_state); 2257bf215546Sopenharmony_ci const uint32_t attachment_state_total_size = 2258bf215546Sopenharmony_ci attachment_state_item_size * state->meta.attachment_count; 2259bf215546Sopenharmony_ci memcpy(state->attachments, state->meta.attachments, 2260bf215546Sopenharmony_ci attachment_state_total_size); 2261bf215546Sopenharmony_ci 2262bf215546Sopenharmony_ci state->tile_aligned_render_area = state->meta.tile_aligned_render_area; 2263bf215546Sopenharmony_ci memcpy(&state->render_area, &state->meta.render_area, sizeof(VkRect2D)); 2264bf215546Sopenharmony_ci 2265bf215546Sopenharmony_ci /* Is needs_subpass_resume is true it means that the emitted the meta 2266bf215546Sopenharmony_ci * operation in its own job (possibly with an RT config that is 2267bf215546Sopenharmony_ci * incompatible with the current subpass), so resuming subpass execution 2268bf215546Sopenharmony_ci * after it requires that we create a new job with the subpass RT setup. 2269bf215546Sopenharmony_ci */ 2270bf215546Sopenharmony_ci if (needs_subpass_resume) 2271bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_resume(cmd_buffer, state->meta.subpass_idx); 2272bf215546Sopenharmony_ci } else { 2273bf215546Sopenharmony_ci state->subpass_idx = -1; 2274bf215546Sopenharmony_ci } 2275bf215546Sopenharmony_ci 2276bf215546Sopenharmony_ci if (state->meta.gfx.pipeline != NULL) { 2277bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = state->meta.gfx.pipeline; 2278bf215546Sopenharmony_ci VkPipelineBindPoint pipeline_binding = 2279bf215546Sopenharmony_ci v3dv_pipeline_get_binding_point(pipeline); 2280bf215546Sopenharmony_ci v3dv_CmdBindPipeline(v3dv_cmd_buffer_to_handle(cmd_buffer), 2281bf215546Sopenharmony_ci pipeline_binding, 2282bf215546Sopenharmony_ci v3dv_pipeline_to_handle(state->meta.gfx.pipeline)); 2283bf215546Sopenharmony_ci } else { 2284bf215546Sopenharmony_ci state->gfx.pipeline = NULL; 2285bf215546Sopenharmony_ci } 2286bf215546Sopenharmony_ci 2287bf215546Sopenharmony_ci if (dirty_dynamic_state) { 2288bf215546Sopenharmony_ci memcpy(&state->dynamic, &state->meta.dynamic, sizeof(state->dynamic)); 2289bf215546Sopenharmony_ci state->dirty |= dirty_dynamic_state; 2290bf215546Sopenharmony_ci } 2291bf215546Sopenharmony_ci 2292bf215546Sopenharmony_ci if (state->meta.has_descriptor_state) { 2293bf215546Sopenharmony_ci if (state->meta.gfx.descriptor_state.valid != 0) { 2294bf215546Sopenharmony_ci memcpy(&state->gfx.descriptor_state, &state->meta.gfx.descriptor_state, 2295bf215546Sopenharmony_ci sizeof(state->gfx.descriptor_state)); 2296bf215546Sopenharmony_ci } else { 2297bf215546Sopenharmony_ci state->gfx.descriptor_state.valid = 0; 2298bf215546Sopenharmony_ci } 2299bf215546Sopenharmony_ci } 2300bf215546Sopenharmony_ci 2301bf215546Sopenharmony_ci /* We only need to restore push constant data if we had any data in the 2302bf215546Sopenharmony_ci * original command buffer and the meta operation wrote new push constant 2303bf215546Sopenharmony_ci * data. 2304bf215546Sopenharmony_ci */ 2305bf215546Sopenharmony_ci if (state->meta.push_constants_size > 0 && 2306bf215546Sopenharmony_ci cmd_buffer->state.push_constants_size > 0) { 2307bf215546Sopenharmony_ci memcpy(cmd_buffer->state.push_constants_data, state->meta.push_constants, 2308bf215546Sopenharmony_ci state->meta.push_constants_size); 2309bf215546Sopenharmony_ci } 2310bf215546Sopenharmony_ci cmd_buffer->state.push_constants_size = state->meta.push_constants_size; 2311bf215546Sopenharmony_ci 2312bf215546Sopenharmony_ci state->meta.gfx.pipeline = NULL; 2313bf215546Sopenharmony_ci state->meta.framebuffer = VK_NULL_HANDLE; 2314bf215546Sopenharmony_ci state->meta.pass = VK_NULL_HANDLE; 2315bf215546Sopenharmony_ci state->meta.subpass_idx = -1; 2316bf215546Sopenharmony_ci state->meta.has_descriptor_state = false; 2317bf215546Sopenharmony_ci state->meta.push_constants_size = 0; 2318bf215546Sopenharmony_ci} 2319bf215546Sopenharmony_ci 2320bf215546Sopenharmony_cistatic struct v3dv_job * 2321bf215546Sopenharmony_cicmd_buffer_pre_draw_split_job(struct v3dv_cmd_buffer *cmd_buffer) 2322bf215546Sopenharmony_ci{ 2323bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 2324bf215546Sopenharmony_ci assert(job); 2325bf215546Sopenharmony_ci 2326bf215546Sopenharmony_ci /* If the job has been flagged with 'always_flush' and it has already 2327bf215546Sopenharmony_ci * recorded any draw calls then we need to start a new job for it. 2328bf215546Sopenharmony_ci */ 2329bf215546Sopenharmony_ci if (job->always_flush && job->draw_count > 0) { 2330bf215546Sopenharmony_ci assert(cmd_buffer->state.pass); 2331bf215546Sopenharmony_ci /* First, flag the current job as not being the last in the 2332bf215546Sopenharmony_ci * current subpass 2333bf215546Sopenharmony_ci */ 2334bf215546Sopenharmony_ci job->is_subpass_finish = false; 2335bf215546Sopenharmony_ci 2336bf215546Sopenharmony_ci /* Now start a new job in the same subpass and flag it as continuing 2337bf215546Sopenharmony_ci * the current subpass. 2338bf215546Sopenharmony_ci */ 2339bf215546Sopenharmony_ci job = v3dv_cmd_buffer_subpass_resume(cmd_buffer, 2340bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx); 2341bf215546Sopenharmony_ci assert(job->draw_count == 0); 2342bf215546Sopenharmony_ci 2343bf215546Sopenharmony_ci /* Inherit the 'always flush' behavior */ 2344bf215546Sopenharmony_ci job->always_flush = true; 2345bf215546Sopenharmony_ci } 2346bf215546Sopenharmony_ci 2347bf215546Sopenharmony_ci assert(job->draw_count == 0 || !job->always_flush); 2348bf215546Sopenharmony_ci return job; 2349bf215546Sopenharmony_ci} 2350bf215546Sopenharmony_ci 2351bf215546Sopenharmony_ci/** 2352bf215546Sopenharmony_ci * The Vulkan spec states: 2353bf215546Sopenharmony_ci * 2354bf215546Sopenharmony_ci * "It is legal for a subpass to use no color or depth/stencil 2355bf215546Sopenharmony_ci * attachments (...) This kind of subpass can use shader side effects such 2356bf215546Sopenharmony_ci * as image stores and atomics to produce an output. In this case, the 2357bf215546Sopenharmony_ci * subpass continues to use the width, height, and layers of the framebuffer 2358bf215546Sopenharmony_ci * to define the dimensions of the rendering area, and the 2359bf215546Sopenharmony_ci * rasterizationSamples from each pipeline’s 2360bf215546Sopenharmony_ci * VkPipelineMultisampleStateCreateInfo to define the number of samples used 2361bf215546Sopenharmony_ci * in rasterization." 2362bf215546Sopenharmony_ci * 2363bf215546Sopenharmony_ci * We need to enable MSAA in the TILE_BINNING_MODE_CFG packet, which we 2364bf215546Sopenharmony_ci * emit when we start a new frame at the begining of a subpass. At that point, 2365bf215546Sopenharmony_ci * if the framebuffer doesn't have any attachments we won't enable MSAA and 2366bf215546Sopenharmony_ci * the job won't be valid in the scenario described by the spec. 2367bf215546Sopenharmony_ci * 2368bf215546Sopenharmony_ci * This function is intended to be called before a draw call and will test if 2369bf215546Sopenharmony_ci * we are in that scenario, in which case, it will restart the current job 2370bf215546Sopenharmony_ci * with MSAA enabled. 2371bf215546Sopenharmony_ci */ 2372bf215546Sopenharmony_cistatic void 2373bf215546Sopenharmony_cicmd_buffer_restart_job_for_msaa_if_needed(struct v3dv_cmd_buffer *cmd_buffer) 2374bf215546Sopenharmony_ci{ 2375bf215546Sopenharmony_ci assert(cmd_buffer->state.job); 2376bf215546Sopenharmony_ci 2377bf215546Sopenharmony_ci /* We don't support variableMultisampleRate so we know that all pipelines 2378bf215546Sopenharmony_ci * bound in the same subpass must have matching number of samples, so we 2379bf215546Sopenharmony_ci * can do this check only on the first draw call. 2380bf215546Sopenharmony_ci */ 2381bf215546Sopenharmony_ci if (cmd_buffer->state.job->draw_count > 0) 2382bf215546Sopenharmony_ci return; 2383bf215546Sopenharmony_ci 2384bf215546Sopenharmony_ci /* We only need to restart the frame if the pipeline requires MSAA but 2385bf215546Sopenharmony_ci * our frame tiling didn't enable it. 2386bf215546Sopenharmony_ci */ 2387bf215546Sopenharmony_ci if (!cmd_buffer->state.gfx.pipeline->msaa || 2388bf215546Sopenharmony_ci cmd_buffer->state.job->frame_tiling.msaa) { 2389bf215546Sopenharmony_ci return; 2390bf215546Sopenharmony_ci } 2391bf215546Sopenharmony_ci 2392bf215546Sopenharmony_ci /* FIXME: Secondary command buffers don't start frames. Instead, they are 2393bf215546Sopenharmony_ci * recorded into primary jobs that start them. For secondaries, we should 2394bf215546Sopenharmony_ci * still handle this scenario, but we should do that when we record them 2395bf215546Sopenharmony_ci * into primaries by testing if any of the secondaries has multisampled 2396bf215546Sopenharmony_ci * draw calls in them, and then using that info to decide if we need to 2397bf215546Sopenharmony_ci * restart the primary job into which they are being recorded. 2398bf215546Sopenharmony_ci */ 2399bf215546Sopenharmony_ci if (cmd_buffer->vk.level != VK_COMMAND_BUFFER_LEVEL_PRIMARY) 2400bf215546Sopenharmony_ci return; 2401bf215546Sopenharmony_ci 2402bf215546Sopenharmony_ci /* Drop the current job and restart it with MSAA enabled */ 2403bf215546Sopenharmony_ci struct v3dv_job *old_job = cmd_buffer->state.job; 2404bf215546Sopenharmony_ci cmd_buffer->state.job = NULL; 2405bf215546Sopenharmony_ci 2406bf215546Sopenharmony_ci struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc, 2407bf215546Sopenharmony_ci sizeof(struct v3dv_job), 8, 2408bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 2409bf215546Sopenharmony_ci if (!job) { 2410bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 2411bf215546Sopenharmony_ci return; 2412bf215546Sopenharmony_ci } 2413bf215546Sopenharmony_ci 2414bf215546Sopenharmony_ci v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CL, cmd_buffer->device, cmd_buffer, 2415bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx); 2416bf215546Sopenharmony_ci cmd_buffer->state.job = job; 2417bf215546Sopenharmony_ci 2418bf215546Sopenharmony_ci v3dv_job_start_frame(job, 2419bf215546Sopenharmony_ci old_job->frame_tiling.width, 2420bf215546Sopenharmony_ci old_job->frame_tiling.height, 2421bf215546Sopenharmony_ci old_job->frame_tiling.layers, 2422bf215546Sopenharmony_ci true, 2423bf215546Sopenharmony_ci old_job->frame_tiling.render_target_count, 2424bf215546Sopenharmony_ci old_job->frame_tiling.internal_bpp, 2425bf215546Sopenharmony_ci true /* msaa */); 2426bf215546Sopenharmony_ci 2427bf215546Sopenharmony_ci v3dv_job_destroy(old_job); 2428bf215546Sopenharmony_ci} 2429bf215546Sopenharmony_ci 2430bf215546Sopenharmony_cistatic bool 2431bf215546Sopenharmony_cicmd_buffer_binning_sync_required(struct v3dv_cmd_buffer *cmd_buffer, 2432bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline, 2433bf215546Sopenharmony_ci bool indexed, bool indirect) 2434bf215546Sopenharmony_ci{ 2435bf215546Sopenharmony_ci const struct v3dv_descriptor_maps *vs_bin_maps = 2436bf215546Sopenharmony_ci pipeline->shared_data->maps[BROADCOM_SHADER_VERTEX_BIN]; 2437bf215546Sopenharmony_ci 2438bf215546Sopenharmony_ci const struct v3dv_descriptor_maps *gs_bin_maps = 2439bf215546Sopenharmony_ci pipeline->shared_data->maps[BROADCOM_SHADER_GEOMETRY_BIN]; 2440bf215546Sopenharmony_ci 2441bf215546Sopenharmony_ci VkAccessFlags buffer_access = 2442bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_buffer_access; 2443bf215546Sopenharmony_ci if (buffer_access) { 2444bf215546Sopenharmony_ci /* Index buffer read */ 2445bf215546Sopenharmony_ci if (indexed && (buffer_access & VK_ACCESS_INDEX_READ_BIT)) 2446bf215546Sopenharmony_ci return true; 2447bf215546Sopenharmony_ci 2448bf215546Sopenharmony_ci /* Indirect buffer read */ 2449bf215546Sopenharmony_ci if (indirect && (buffer_access & VK_ACCESS_INDIRECT_COMMAND_READ_BIT)) 2450bf215546Sopenharmony_ci return true; 2451bf215546Sopenharmony_ci 2452bf215546Sopenharmony_ci /* Attribute read */ 2453bf215546Sopenharmony_ci if (buffer_access & VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT) { 2454bf215546Sopenharmony_ci const struct v3d_vs_prog_data *prog_data = 2455bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs; 2456bf215546Sopenharmony_ci 2457bf215546Sopenharmony_ci for (int i = 0; i < ARRAY_SIZE(prog_data->vattr_sizes); i++) { 2458bf215546Sopenharmony_ci if (prog_data->vattr_sizes[i] > 0) 2459bf215546Sopenharmony_ci return true; 2460bf215546Sopenharmony_ci } 2461bf215546Sopenharmony_ci } 2462bf215546Sopenharmony_ci 2463bf215546Sopenharmony_ci /* UBO / SSBO read */ 2464bf215546Sopenharmony_ci if (buffer_access & (VK_ACCESS_UNIFORM_READ_BIT | 2465bf215546Sopenharmony_ci VK_ACCESS_SHADER_READ_BIT | 2466bf215546Sopenharmony_ci VK_ACCESS_MEMORY_READ_BIT)) { 2467bf215546Sopenharmony_ci 2468bf215546Sopenharmony_ci if (vs_bin_maps->ubo_map.num_desc > 0 || 2469bf215546Sopenharmony_ci vs_bin_maps->ssbo_map.num_desc > 0) { 2470bf215546Sopenharmony_ci return true; 2471bf215546Sopenharmony_ci } 2472bf215546Sopenharmony_ci 2473bf215546Sopenharmony_ci if (gs_bin_maps && (gs_bin_maps->ubo_map.num_desc > 0 || 2474bf215546Sopenharmony_ci gs_bin_maps->ssbo_map.num_desc > 0)) { 2475bf215546Sopenharmony_ci return true; 2476bf215546Sopenharmony_ci } 2477bf215546Sopenharmony_ci } 2478bf215546Sopenharmony_ci 2479bf215546Sopenharmony_ci /* SSBO write */ 2480bf215546Sopenharmony_ci if (buffer_access & (VK_ACCESS_SHADER_WRITE_BIT | 2481bf215546Sopenharmony_ci VK_ACCESS_MEMORY_WRITE_BIT)) { 2482bf215546Sopenharmony_ci if (vs_bin_maps->ssbo_map.num_desc > 0) 2483bf215546Sopenharmony_ci return true; 2484bf215546Sopenharmony_ci 2485bf215546Sopenharmony_ci if (gs_bin_maps && gs_bin_maps->ssbo_map.num_desc > 0) 2486bf215546Sopenharmony_ci return true; 2487bf215546Sopenharmony_ci } 2488bf215546Sopenharmony_ci } 2489bf215546Sopenharmony_ci 2490bf215546Sopenharmony_ci VkAccessFlags image_access = 2491bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_image_access; 2492bf215546Sopenharmony_ci if (image_access) { 2493bf215546Sopenharmony_ci /* Image load / store */ 2494bf215546Sopenharmony_ci if (image_access & (VK_ACCESS_SHADER_READ_BIT | 2495bf215546Sopenharmony_ci VK_ACCESS_SHADER_WRITE_BIT | 2496bf215546Sopenharmony_ci VK_ACCESS_MEMORY_READ_BIT | 2497bf215546Sopenharmony_ci VK_ACCESS_MEMORY_WRITE_BIT)) { 2498bf215546Sopenharmony_ci if (vs_bin_maps->texture_map.num_desc > 0 || 2499bf215546Sopenharmony_ci vs_bin_maps->sampler_map.num_desc > 0) { 2500bf215546Sopenharmony_ci return true; 2501bf215546Sopenharmony_ci } 2502bf215546Sopenharmony_ci } 2503bf215546Sopenharmony_ci } 2504bf215546Sopenharmony_ci 2505bf215546Sopenharmony_ci return false; 2506bf215546Sopenharmony_ci} 2507bf215546Sopenharmony_ci 2508bf215546Sopenharmony_cistatic void 2509bf215546Sopenharmony_ciconsume_bcl_sync(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_job *job) 2510bf215546Sopenharmony_ci{ 2511bf215546Sopenharmony_ci job->needs_bcl_sync = true; 2512bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_buffer_access = 0; 2513bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_image_access = 0; 2514bf215546Sopenharmony_ci} 2515bf215546Sopenharmony_ci 2516bf215546Sopenharmony_civoid 2517bf215546Sopenharmony_civ3dv_cmd_buffer_emit_pre_draw(struct v3dv_cmd_buffer *cmd_buffer, 2518bf215546Sopenharmony_ci bool indexed, bool indirect) 2519bf215546Sopenharmony_ci{ 2520bf215546Sopenharmony_ci assert(cmd_buffer->state.gfx.pipeline); 2521bf215546Sopenharmony_ci assert(!(cmd_buffer->state.gfx.pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT)); 2522bf215546Sopenharmony_ci 2523bf215546Sopenharmony_ci /* If we emitted a pipeline barrier right before this draw we won't have 2524bf215546Sopenharmony_ci * an active job. In that case, create a new job continuing the current 2525bf215546Sopenharmony_ci * subpass. 2526bf215546Sopenharmony_ci */ 2527bf215546Sopenharmony_ci if (!cmd_buffer->state.job) { 2528bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_resume(cmd_buffer, 2529bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx); 2530bf215546Sopenharmony_ci } 2531bf215546Sopenharmony_ci 2532bf215546Sopenharmony_ci /* Restart single sample job for MSAA pipeline if needed */ 2533bf215546Sopenharmony_ci cmd_buffer_restart_job_for_msaa_if_needed(cmd_buffer); 2534bf215546Sopenharmony_ci 2535bf215546Sopenharmony_ci /* If the job is configured to flush on every draw call we need to create 2536bf215546Sopenharmony_ci * a new job now. 2537bf215546Sopenharmony_ci */ 2538bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer_pre_draw_split_job(cmd_buffer); 2539bf215546Sopenharmony_ci job->draw_count++; 2540bf215546Sopenharmony_ci 2541bf215546Sopenharmony_ci /* Track VK_KHR_buffer_device_address usage in the job */ 2542bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 2543bf215546Sopenharmony_ci job->uses_buffer_device_address |= pipeline->uses_buffer_device_address; 2544bf215546Sopenharmony_ci 2545bf215546Sopenharmony_ci /* If this job is serialized (has consumed a barrier) then check if we need 2546bf215546Sopenharmony_ci * to sync at the binning stage by testing if the binning shaders involved 2547bf215546Sopenharmony_ci * with the draw call require access to external resources. 2548bf215546Sopenharmony_ci */ 2549bf215546Sopenharmony_ci if (job->serialize && (cmd_buffer->state.barrier.bcl_buffer_access || 2550bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_image_access)) { 2551bf215546Sopenharmony_ci assert(!job->needs_bcl_sync); 2552bf215546Sopenharmony_ci if (cmd_buffer_binning_sync_required(cmd_buffer, pipeline, 2553bf215546Sopenharmony_ci indexed, indirect)) { 2554bf215546Sopenharmony_ci consume_bcl_sync(cmd_buffer, job); 2555bf215546Sopenharmony_ci } 2556bf215546Sopenharmony_ci } 2557bf215546Sopenharmony_ci 2558bf215546Sopenharmony_ci /* GL shader state binds shaders, uniform and vertex attribute state. The 2559bf215546Sopenharmony_ci * compiler injects uniforms to handle some descriptor types (such as 2560bf215546Sopenharmony_ci * textures), so we need to regen that when descriptor state changes. 2561bf215546Sopenharmony_ci * 2562bf215546Sopenharmony_ci * We also need to emit new shader state if we have a dirty viewport since 2563bf215546Sopenharmony_ci * that will require that we new uniform state for QUNIFORM_VIEWPORT_*. 2564bf215546Sopenharmony_ci */ 2565bf215546Sopenharmony_ci uint32_t *dirty = &cmd_buffer->state.dirty; 2566bf215546Sopenharmony_ci 2567bf215546Sopenharmony_ci const uint32_t dirty_uniform_state = 2568bf215546Sopenharmony_ci *dirty & (V3DV_CMD_DIRTY_PIPELINE | 2569bf215546Sopenharmony_ci V3DV_CMD_DIRTY_PUSH_CONSTANTS | 2570bf215546Sopenharmony_ci V3DV_CMD_DIRTY_DESCRIPTOR_SETS | 2571bf215546Sopenharmony_ci V3DV_CMD_DIRTY_VIEWPORT | 2572bf215546Sopenharmony_ci V3DV_CMD_DIRTY_VIEW_INDEX); 2573bf215546Sopenharmony_ci 2574bf215546Sopenharmony_ci if (dirty_uniform_state) 2575bf215546Sopenharmony_ci update_gfx_uniform_state(cmd_buffer, dirty_uniform_state); 2576bf215546Sopenharmony_ci 2577bf215546Sopenharmony_ci struct v3dv_device *device = cmd_buffer->device; 2578bf215546Sopenharmony_ci 2579bf215546Sopenharmony_ci if (dirty_uniform_state || (*dirty & V3DV_CMD_DIRTY_VERTEX_BUFFER)) 2580bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_gl_shader_state)(cmd_buffer); 2581bf215546Sopenharmony_ci 2582bf215546Sopenharmony_ci if (*dirty & (V3DV_CMD_DIRTY_PIPELINE)) { 2583bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_configuration_bits)(cmd_buffer); 2584bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_varyings_state)(cmd_buffer); 2585bf215546Sopenharmony_ci } 2586bf215546Sopenharmony_ci 2587bf215546Sopenharmony_ci if (*dirty & (V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR)) { 2588bf215546Sopenharmony_ci emit_scissor(cmd_buffer); 2589bf215546Sopenharmony_ci } 2590bf215546Sopenharmony_ci 2591bf215546Sopenharmony_ci if (*dirty & V3DV_CMD_DIRTY_VIEWPORT) { 2592bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_viewport)(cmd_buffer); 2593bf215546Sopenharmony_ci } 2594bf215546Sopenharmony_ci 2595bf215546Sopenharmony_ci if (*dirty & V3DV_CMD_DIRTY_INDEX_BUFFER) 2596bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_index_buffer)(cmd_buffer); 2597bf215546Sopenharmony_ci 2598bf215546Sopenharmony_ci const uint32_t dynamic_stencil_dirty_flags = 2599bf215546Sopenharmony_ci V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK | 2600bf215546Sopenharmony_ci V3DV_CMD_DIRTY_STENCIL_WRITE_MASK | 2601bf215546Sopenharmony_ci V3DV_CMD_DIRTY_STENCIL_REFERENCE; 2602bf215546Sopenharmony_ci if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | dynamic_stencil_dirty_flags)) 2603bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_stencil)(cmd_buffer); 2604bf215546Sopenharmony_ci 2605bf215546Sopenharmony_ci if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_DEPTH_BIAS)) 2606bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_depth_bias)(cmd_buffer); 2607bf215546Sopenharmony_ci 2608bf215546Sopenharmony_ci if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_BLEND_CONSTANTS)) 2609bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_blend)(cmd_buffer); 2610bf215546Sopenharmony_ci 2611bf215546Sopenharmony_ci if (*dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY) 2612bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_occlusion_query)(cmd_buffer); 2613bf215546Sopenharmony_ci 2614bf215546Sopenharmony_ci if (*dirty & V3DV_CMD_DIRTY_LINE_WIDTH) 2615bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_line_width)(cmd_buffer); 2616bf215546Sopenharmony_ci 2617bf215546Sopenharmony_ci if (*dirty & V3DV_CMD_DIRTY_PIPELINE) 2618bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_sample_state)(cmd_buffer); 2619bf215546Sopenharmony_ci 2620bf215546Sopenharmony_ci if (*dirty & (V3DV_CMD_DIRTY_PIPELINE | V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE)) 2621bf215546Sopenharmony_ci v3dv_X(device, cmd_buffer_emit_color_write_mask)(cmd_buffer); 2622bf215546Sopenharmony_ci 2623bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PIPELINE; 2624bf215546Sopenharmony_ci} 2625bf215546Sopenharmony_ci 2626bf215546Sopenharmony_cistatic inline void 2627bf215546Sopenharmony_cicmd_buffer_set_view_index(struct v3dv_cmd_buffer *cmd_buffer, 2628bf215546Sopenharmony_ci uint32_t view_index) 2629bf215546Sopenharmony_ci{ 2630bf215546Sopenharmony_ci cmd_buffer->state.view_index = view_index; 2631bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VIEW_INDEX; 2632bf215546Sopenharmony_ci} 2633bf215546Sopenharmony_ci 2634bf215546Sopenharmony_cistatic void 2635bf215546Sopenharmony_cicmd_buffer_draw(struct v3dv_cmd_buffer *cmd_buffer, 2636bf215546Sopenharmony_ci struct v3dv_draw_info *info) 2637bf215546Sopenharmony_ci{ 2638bf215546Sopenharmony_ci 2639bf215546Sopenharmony_ci struct v3dv_render_pass *pass = cmd_buffer->state.pass; 2640bf215546Sopenharmony_ci if (likely(!pass->multiview_enabled)) { 2641bf215546Sopenharmony_ci v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, false, false); 2642bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info); 2643bf215546Sopenharmony_ci return; 2644bf215546Sopenharmony_ci } 2645bf215546Sopenharmony_ci 2646bf215546Sopenharmony_ci uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; 2647bf215546Sopenharmony_ci while (view_mask) { 2648bf215546Sopenharmony_ci cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); 2649bf215546Sopenharmony_ci v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, false, false); 2650bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw)(cmd_buffer, info); 2651bf215546Sopenharmony_ci } 2652bf215546Sopenharmony_ci} 2653bf215546Sopenharmony_ci 2654bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2655bf215546Sopenharmony_civ3dv_CmdDraw(VkCommandBuffer commandBuffer, 2656bf215546Sopenharmony_ci uint32_t vertexCount, 2657bf215546Sopenharmony_ci uint32_t instanceCount, 2658bf215546Sopenharmony_ci uint32_t firstVertex, 2659bf215546Sopenharmony_ci uint32_t firstInstance) 2660bf215546Sopenharmony_ci{ 2661bf215546Sopenharmony_ci if (vertexCount == 0 || instanceCount == 0) 2662bf215546Sopenharmony_ci return; 2663bf215546Sopenharmony_ci 2664bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2665bf215546Sopenharmony_ci struct v3dv_draw_info info = {}; 2666bf215546Sopenharmony_ci info.vertex_count = vertexCount; 2667bf215546Sopenharmony_ci info.instance_count = instanceCount; 2668bf215546Sopenharmony_ci info.first_instance = firstInstance; 2669bf215546Sopenharmony_ci info.first_vertex = firstVertex; 2670bf215546Sopenharmony_ci 2671bf215546Sopenharmony_ci cmd_buffer_draw(cmd_buffer, &info); 2672bf215546Sopenharmony_ci} 2673bf215546Sopenharmony_ci 2674bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2675bf215546Sopenharmony_civ3dv_CmdDrawIndexed(VkCommandBuffer commandBuffer, 2676bf215546Sopenharmony_ci uint32_t indexCount, 2677bf215546Sopenharmony_ci uint32_t instanceCount, 2678bf215546Sopenharmony_ci uint32_t firstIndex, 2679bf215546Sopenharmony_ci int32_t vertexOffset, 2680bf215546Sopenharmony_ci uint32_t firstInstance) 2681bf215546Sopenharmony_ci{ 2682bf215546Sopenharmony_ci if (indexCount == 0 || instanceCount == 0) 2683bf215546Sopenharmony_ci return; 2684bf215546Sopenharmony_ci 2685bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2686bf215546Sopenharmony_ci 2687bf215546Sopenharmony_ci struct v3dv_render_pass *pass = cmd_buffer->state.pass; 2688bf215546Sopenharmony_ci if (likely(!pass->multiview_enabled)) { 2689bf215546Sopenharmony_ci v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, true, false); 2690bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed) 2691bf215546Sopenharmony_ci (cmd_buffer, indexCount, instanceCount, 2692bf215546Sopenharmony_ci firstIndex, vertexOffset, firstInstance); 2693bf215546Sopenharmony_ci return; 2694bf215546Sopenharmony_ci } 2695bf215546Sopenharmony_ci 2696bf215546Sopenharmony_ci uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; 2697bf215546Sopenharmony_ci while (view_mask) { 2698bf215546Sopenharmony_ci cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); 2699bf215546Sopenharmony_ci v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, true, false); 2700bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indexed) 2701bf215546Sopenharmony_ci (cmd_buffer, indexCount, instanceCount, 2702bf215546Sopenharmony_ci firstIndex, vertexOffset, firstInstance); 2703bf215546Sopenharmony_ci } 2704bf215546Sopenharmony_ci} 2705bf215546Sopenharmony_ci 2706bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2707bf215546Sopenharmony_civ3dv_CmdDrawIndirect(VkCommandBuffer commandBuffer, 2708bf215546Sopenharmony_ci VkBuffer _buffer, 2709bf215546Sopenharmony_ci VkDeviceSize offset, 2710bf215546Sopenharmony_ci uint32_t drawCount, 2711bf215546Sopenharmony_ci uint32_t stride) 2712bf215546Sopenharmony_ci{ 2713bf215546Sopenharmony_ci /* drawCount is the number of draws to execute, and can be zero. */ 2714bf215546Sopenharmony_ci if (drawCount == 0) 2715bf215546Sopenharmony_ci return; 2716bf215546Sopenharmony_ci 2717bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2718bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer); 2719bf215546Sopenharmony_ci 2720bf215546Sopenharmony_ci struct v3dv_render_pass *pass = cmd_buffer->state.pass; 2721bf215546Sopenharmony_ci if (likely(!pass->multiview_enabled)) { 2722bf215546Sopenharmony_ci v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, false, true); 2723bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect) 2724bf215546Sopenharmony_ci (cmd_buffer, buffer, offset, drawCount, stride); 2725bf215546Sopenharmony_ci return; 2726bf215546Sopenharmony_ci } 2727bf215546Sopenharmony_ci 2728bf215546Sopenharmony_ci uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; 2729bf215546Sopenharmony_ci while (view_mask) { 2730bf215546Sopenharmony_ci cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); 2731bf215546Sopenharmony_ci v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, false, true); 2732bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, cmd_buffer_emit_draw_indirect) 2733bf215546Sopenharmony_ci (cmd_buffer, buffer, offset, drawCount, stride); 2734bf215546Sopenharmony_ci } 2735bf215546Sopenharmony_ci} 2736bf215546Sopenharmony_ci 2737bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2738bf215546Sopenharmony_civ3dv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, 2739bf215546Sopenharmony_ci VkBuffer _buffer, 2740bf215546Sopenharmony_ci VkDeviceSize offset, 2741bf215546Sopenharmony_ci uint32_t drawCount, 2742bf215546Sopenharmony_ci uint32_t stride) 2743bf215546Sopenharmony_ci{ 2744bf215546Sopenharmony_ci /* drawCount is the number of draws to execute, and can be zero. */ 2745bf215546Sopenharmony_ci if (drawCount == 0) 2746bf215546Sopenharmony_ci return; 2747bf215546Sopenharmony_ci 2748bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2749bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer); 2750bf215546Sopenharmony_ci 2751bf215546Sopenharmony_ci struct v3dv_render_pass *pass = cmd_buffer->state.pass; 2752bf215546Sopenharmony_ci if (likely(!pass->multiview_enabled)) { 2753bf215546Sopenharmony_ci v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, true, true); 2754bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect) 2755bf215546Sopenharmony_ci (cmd_buffer, buffer, offset, drawCount, stride); 2756bf215546Sopenharmony_ci return; 2757bf215546Sopenharmony_ci } 2758bf215546Sopenharmony_ci 2759bf215546Sopenharmony_ci uint32_t view_mask = pass->subpasses[cmd_buffer->state.subpass_idx].view_mask; 2760bf215546Sopenharmony_ci while (view_mask) { 2761bf215546Sopenharmony_ci cmd_buffer_set_view_index(cmd_buffer, u_bit_scan(&view_mask)); 2762bf215546Sopenharmony_ci v3dv_cmd_buffer_emit_pre_draw(cmd_buffer, true, true); 2763bf215546Sopenharmony_ci v3dv_X(cmd_buffer->device, cmd_buffer_emit_indexed_indirect) 2764bf215546Sopenharmony_ci (cmd_buffer, buffer, offset, drawCount, stride); 2765bf215546Sopenharmony_ci } 2766bf215546Sopenharmony_ci} 2767bf215546Sopenharmony_ci 2768bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2769bf215546Sopenharmony_civ3dv_CmdPipelineBarrier(VkCommandBuffer commandBuffer, 2770bf215546Sopenharmony_ci VkPipelineStageFlags srcStageMask, 2771bf215546Sopenharmony_ci VkPipelineStageFlags dstStageMask, 2772bf215546Sopenharmony_ci VkDependencyFlags dependencyFlags, 2773bf215546Sopenharmony_ci uint32_t memoryBarrierCount, 2774bf215546Sopenharmony_ci const VkMemoryBarrier *pMemoryBarriers, 2775bf215546Sopenharmony_ci uint32_t bufferBarrierCount, 2776bf215546Sopenharmony_ci const VkBufferMemoryBarrier *pBufferBarriers, 2777bf215546Sopenharmony_ci uint32_t imageBarrierCount, 2778bf215546Sopenharmony_ci const VkImageMemoryBarrier *pImageBarriers) 2779bf215546Sopenharmony_ci{ 2780bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2781bf215546Sopenharmony_ci 2782bf215546Sopenharmony_ci /* We can safely skip barriers for image layout transitions from UNDEFINED 2783bf215546Sopenharmony_ci * layout. 2784bf215546Sopenharmony_ci */ 2785bf215546Sopenharmony_ci if (imageBarrierCount > 0) { 2786bf215546Sopenharmony_ci bool all_undefined = true; 2787bf215546Sopenharmony_ci for (int i = 0; all_undefined && i < imageBarrierCount; i++) { 2788bf215546Sopenharmony_ci if (pImageBarriers[i].oldLayout != VK_IMAGE_LAYOUT_UNDEFINED) 2789bf215546Sopenharmony_ci all_undefined = false; 2790bf215546Sopenharmony_ci } 2791bf215546Sopenharmony_ci if (all_undefined) 2792bf215546Sopenharmony_ci imageBarrierCount = 0; 2793bf215546Sopenharmony_ci } 2794bf215546Sopenharmony_ci 2795bf215546Sopenharmony_ci if (memoryBarrierCount + bufferBarrierCount + imageBarrierCount == 0) 2796bf215546Sopenharmony_ci return; 2797bf215546Sopenharmony_ci 2798bf215546Sopenharmony_ci /* We only care about barriers between GPU jobs */ 2799bf215546Sopenharmony_ci if (srcStageMask == VK_PIPELINE_STAGE_HOST_BIT || 2800bf215546Sopenharmony_ci dstStageMask == VK_PIPELINE_STAGE_HOST_BIT) { 2801bf215546Sopenharmony_ci return; 2802bf215546Sopenharmony_ci } 2803bf215546Sopenharmony_ci 2804bf215546Sopenharmony_ci /* If we have a recording job, finish it here */ 2805bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 2806bf215546Sopenharmony_ci if (job) 2807bf215546Sopenharmony_ci v3dv_cmd_buffer_finish_job(cmd_buffer); 2808bf215546Sopenharmony_ci 2809bf215546Sopenharmony_ci /* Track the source of the barrier */ 2810bf215546Sopenharmony_ci uint8_t src_mask = 0; 2811bf215546Sopenharmony_ci if (srcStageMask & (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | 2812bf215546Sopenharmony_ci VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { 2813bf215546Sopenharmony_ci src_mask |= V3DV_BARRIER_COMPUTE_BIT; 2814bf215546Sopenharmony_ci } 2815bf215546Sopenharmony_ci 2816bf215546Sopenharmony_ci if (srcStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | 2817bf215546Sopenharmony_ci VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { 2818bf215546Sopenharmony_ci src_mask |= V3DV_BARRIER_TRANSFER_BIT; 2819bf215546Sopenharmony_ci } 2820bf215546Sopenharmony_ci 2821bf215546Sopenharmony_ci if (srcStageMask & (~(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | 2822bf215546Sopenharmony_ci VK_PIPELINE_STAGE_TRANSFER_BIT))) { 2823bf215546Sopenharmony_ci src_mask |= V3DV_BARRIER_GRAPHICS_BIT; 2824bf215546Sopenharmony_ci } 2825bf215546Sopenharmony_ci 2826bf215546Sopenharmony_ci /* Track consumer of the barrier */ 2827bf215546Sopenharmony_ci if (dstStageMask & (VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | 2828bf215546Sopenharmony_ci VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { 2829bf215546Sopenharmony_ci cmd_buffer->state.barrier.dst_mask |= V3DV_BARRIER_COMPUTE_BIT; 2830bf215546Sopenharmony_ci cmd_buffer->state.barrier.src_mask_compute |= src_mask; 2831bf215546Sopenharmony_ci } 2832bf215546Sopenharmony_ci 2833bf215546Sopenharmony_ci if (dstStageMask & (VK_PIPELINE_STAGE_TRANSFER_BIT | 2834bf215546Sopenharmony_ci VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { 2835bf215546Sopenharmony_ci cmd_buffer->state.barrier.dst_mask |= V3DV_BARRIER_TRANSFER_BIT; 2836bf215546Sopenharmony_ci cmd_buffer->state.barrier.src_mask_transfer |= src_mask; 2837bf215546Sopenharmony_ci } 2838bf215546Sopenharmony_ci 2839bf215546Sopenharmony_ci if (dstStageMask & (~(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | 2840bf215546Sopenharmony_ci VK_PIPELINE_STAGE_TRANSFER_BIT))) { 2841bf215546Sopenharmony_ci cmd_buffer->state.barrier.dst_mask |= V3DV_BARRIER_GRAPHICS_BIT; 2842bf215546Sopenharmony_ci cmd_buffer->state.barrier.src_mask_graphics |= src_mask; 2843bf215546Sopenharmony_ci 2844bf215546Sopenharmony_ci if (dstStageMask & (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | 2845bf215546Sopenharmony_ci VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | 2846bf215546Sopenharmony_ci VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | 2847bf215546Sopenharmony_ci VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | 2848bf215546Sopenharmony_ci VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | 2849bf215546Sopenharmony_ci VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT | 2850bf215546Sopenharmony_ci VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | 2851bf215546Sopenharmony_ci VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)) { 2852bf215546Sopenharmony_ci for (int i = 0; i < memoryBarrierCount; i++) { 2853bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_buffer_access |= 2854bf215546Sopenharmony_ci pMemoryBarriers[i].dstAccessMask; 2855bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_image_access |= 2856bf215546Sopenharmony_ci pMemoryBarriers[i].dstAccessMask; 2857bf215546Sopenharmony_ci } 2858bf215546Sopenharmony_ci for (int i = 0; i < bufferBarrierCount; i++) { 2859bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_buffer_access |= 2860bf215546Sopenharmony_ci pBufferBarriers[i].dstAccessMask; 2861bf215546Sopenharmony_ci } 2862bf215546Sopenharmony_ci for (int i = 0; i < imageBarrierCount; i++) { 2863bf215546Sopenharmony_ci if (pImageBarriers[i].oldLayout != VK_IMAGE_LAYOUT_UNDEFINED) { 2864bf215546Sopenharmony_ci cmd_buffer->state.barrier.bcl_image_access |= 2865bf215546Sopenharmony_ci pImageBarriers[i].dstAccessMask; 2866bf215546Sopenharmony_ci } 2867bf215546Sopenharmony_ci } 2868bf215546Sopenharmony_ci } 2869bf215546Sopenharmony_ci } 2870bf215546Sopenharmony_ci} 2871bf215546Sopenharmony_ci 2872bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2873bf215546Sopenharmony_civ3dv_CmdBindVertexBuffers(VkCommandBuffer commandBuffer, 2874bf215546Sopenharmony_ci uint32_t firstBinding, 2875bf215546Sopenharmony_ci uint32_t bindingCount, 2876bf215546Sopenharmony_ci const VkBuffer *pBuffers, 2877bf215546Sopenharmony_ci const VkDeviceSize *pOffsets) 2878bf215546Sopenharmony_ci{ 2879bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2880bf215546Sopenharmony_ci struct v3dv_vertex_binding *vb = cmd_buffer->state.vertex_bindings; 2881bf215546Sopenharmony_ci 2882bf215546Sopenharmony_ci /* We have to defer setting up vertex buffer since we need the buffer 2883bf215546Sopenharmony_ci * stride from the pipeline. 2884bf215546Sopenharmony_ci */ 2885bf215546Sopenharmony_ci 2886bf215546Sopenharmony_ci assert(firstBinding + bindingCount <= MAX_VBS); 2887bf215546Sopenharmony_ci bool vb_state_changed = false; 2888bf215546Sopenharmony_ci for (uint32_t i = 0; i < bindingCount; i++) { 2889bf215546Sopenharmony_ci if (vb[firstBinding + i].buffer != v3dv_buffer_from_handle(pBuffers[i])) { 2890bf215546Sopenharmony_ci vb[firstBinding + i].buffer = v3dv_buffer_from_handle(pBuffers[i]); 2891bf215546Sopenharmony_ci vb_state_changed = true; 2892bf215546Sopenharmony_ci } 2893bf215546Sopenharmony_ci if (vb[firstBinding + i].offset != pOffsets[i]) { 2894bf215546Sopenharmony_ci vb[firstBinding + i].offset = pOffsets[i]; 2895bf215546Sopenharmony_ci vb_state_changed = true; 2896bf215546Sopenharmony_ci } 2897bf215546Sopenharmony_ci } 2898bf215546Sopenharmony_ci 2899bf215546Sopenharmony_ci if (vb_state_changed) 2900bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_VERTEX_BUFFER; 2901bf215546Sopenharmony_ci} 2902bf215546Sopenharmony_ci 2903bf215546Sopenharmony_cistatic uint32_t 2904bf215546Sopenharmony_ciget_index_size(VkIndexType index_type) 2905bf215546Sopenharmony_ci{ 2906bf215546Sopenharmony_ci switch (index_type) { 2907bf215546Sopenharmony_ci case VK_INDEX_TYPE_UINT8_EXT: 2908bf215546Sopenharmony_ci return 1; 2909bf215546Sopenharmony_ci break; 2910bf215546Sopenharmony_ci case VK_INDEX_TYPE_UINT16: 2911bf215546Sopenharmony_ci return 2; 2912bf215546Sopenharmony_ci break; 2913bf215546Sopenharmony_ci case VK_INDEX_TYPE_UINT32: 2914bf215546Sopenharmony_ci return 4; 2915bf215546Sopenharmony_ci break; 2916bf215546Sopenharmony_ci default: 2917bf215546Sopenharmony_ci unreachable("Unsupported index type"); 2918bf215546Sopenharmony_ci } 2919bf215546Sopenharmony_ci} 2920bf215546Sopenharmony_ci 2921bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2922bf215546Sopenharmony_civ3dv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, 2923bf215546Sopenharmony_ci VkBuffer buffer, 2924bf215546Sopenharmony_ci VkDeviceSize offset, 2925bf215546Sopenharmony_ci VkIndexType indexType) 2926bf215546Sopenharmony_ci{ 2927bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2928bf215546Sopenharmony_ci 2929bf215546Sopenharmony_ci const uint32_t index_size = get_index_size(indexType); 2930bf215546Sopenharmony_ci if (buffer == cmd_buffer->state.index_buffer.buffer && 2931bf215546Sopenharmony_ci offset == cmd_buffer->state.index_buffer.offset && 2932bf215546Sopenharmony_ci index_size == cmd_buffer->state.index_buffer.index_size) { 2933bf215546Sopenharmony_ci return; 2934bf215546Sopenharmony_ci } 2935bf215546Sopenharmony_ci 2936bf215546Sopenharmony_ci cmd_buffer->state.index_buffer.buffer = buffer; 2937bf215546Sopenharmony_ci cmd_buffer->state.index_buffer.offset = offset; 2938bf215546Sopenharmony_ci cmd_buffer->state.index_buffer.index_size = index_size; 2939bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_INDEX_BUFFER; 2940bf215546Sopenharmony_ci} 2941bf215546Sopenharmony_ci 2942bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2943bf215546Sopenharmony_civ3dv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, 2944bf215546Sopenharmony_ci VkStencilFaceFlags faceMask, 2945bf215546Sopenharmony_ci uint32_t compareMask) 2946bf215546Sopenharmony_ci{ 2947bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2948bf215546Sopenharmony_ci 2949bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 2950bf215546Sopenharmony_ci cmd_buffer->state.dynamic.stencil_compare_mask.front = compareMask & 0xff; 2951bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_BACK_BIT) 2952bf215546Sopenharmony_ci cmd_buffer->state.dynamic.stencil_compare_mask.back = compareMask & 0xff; 2953bf215546Sopenharmony_ci 2954bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK; 2955bf215546Sopenharmony_ci} 2956bf215546Sopenharmony_ci 2957bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2958bf215546Sopenharmony_civ3dv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, 2959bf215546Sopenharmony_ci VkStencilFaceFlags faceMask, 2960bf215546Sopenharmony_ci uint32_t writeMask) 2961bf215546Sopenharmony_ci{ 2962bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2963bf215546Sopenharmony_ci 2964bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 2965bf215546Sopenharmony_ci cmd_buffer->state.dynamic.stencil_write_mask.front = writeMask & 0xff; 2966bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_BACK_BIT) 2967bf215546Sopenharmony_ci cmd_buffer->state.dynamic.stencil_write_mask.back = writeMask & 0xff; 2968bf215546Sopenharmony_ci 2969bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_WRITE_MASK; 2970bf215546Sopenharmony_ci} 2971bf215546Sopenharmony_ci 2972bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2973bf215546Sopenharmony_civ3dv_CmdSetStencilReference(VkCommandBuffer commandBuffer, 2974bf215546Sopenharmony_ci VkStencilFaceFlags faceMask, 2975bf215546Sopenharmony_ci uint32_t reference) 2976bf215546Sopenharmony_ci{ 2977bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2978bf215546Sopenharmony_ci 2979bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 2980bf215546Sopenharmony_ci cmd_buffer->state.dynamic.stencil_reference.front = reference & 0xff; 2981bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_BACK_BIT) 2982bf215546Sopenharmony_ci cmd_buffer->state.dynamic.stencil_reference.back = reference & 0xff; 2983bf215546Sopenharmony_ci 2984bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_STENCIL_REFERENCE; 2985bf215546Sopenharmony_ci} 2986bf215546Sopenharmony_ci 2987bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2988bf215546Sopenharmony_civ3dv_CmdSetDepthBias(VkCommandBuffer commandBuffer, 2989bf215546Sopenharmony_ci float depthBiasConstantFactor, 2990bf215546Sopenharmony_ci float depthBiasClamp, 2991bf215546Sopenharmony_ci float depthBiasSlopeFactor) 2992bf215546Sopenharmony_ci{ 2993bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 2994bf215546Sopenharmony_ci 2995bf215546Sopenharmony_ci cmd_buffer->state.dynamic.depth_bias.constant_factor = depthBiasConstantFactor; 2996bf215546Sopenharmony_ci cmd_buffer->state.dynamic.depth_bias.depth_bias_clamp = depthBiasClamp; 2997bf215546Sopenharmony_ci cmd_buffer->state.dynamic.depth_bias.slope_factor = depthBiasSlopeFactor; 2998bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DEPTH_BIAS; 2999bf215546Sopenharmony_ci} 3000bf215546Sopenharmony_ci 3001bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3002bf215546Sopenharmony_civ3dv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, 3003bf215546Sopenharmony_ci float minDepthBounds, 3004bf215546Sopenharmony_ci float maxDepthBounds) 3005bf215546Sopenharmony_ci{ 3006bf215546Sopenharmony_ci /* We do not support depth bounds testing so we just ingore this. We are 3007bf215546Sopenharmony_ci * already asserting that pipelines don't enable the feature anyway. 3008bf215546Sopenharmony_ci */ 3009bf215546Sopenharmony_ci} 3010bf215546Sopenharmony_ci 3011bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3012bf215546Sopenharmony_civ3dv_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, 3013bf215546Sopenharmony_ci uint32_t lineStippleFactor, 3014bf215546Sopenharmony_ci uint16_t lineStipplePattern) 3015bf215546Sopenharmony_ci{ 3016bf215546Sopenharmony_ci /* We do not support stippled line rasterization so we just ignore this. */ 3017bf215546Sopenharmony_ci} 3018bf215546Sopenharmony_ci 3019bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3020bf215546Sopenharmony_civ3dv_CmdSetLineWidth(VkCommandBuffer commandBuffer, 3021bf215546Sopenharmony_ci float lineWidth) 3022bf215546Sopenharmony_ci{ 3023bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3024bf215546Sopenharmony_ci 3025bf215546Sopenharmony_ci cmd_buffer->state.dynamic.line_width = lineWidth; 3026bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_LINE_WIDTH; 3027bf215546Sopenharmony_ci} 3028bf215546Sopenharmony_ci 3029bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3030bf215546Sopenharmony_civ3dv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, 3031bf215546Sopenharmony_ci VkPipelineBindPoint pipelineBindPoint, 3032bf215546Sopenharmony_ci VkPipelineLayout _layout, 3033bf215546Sopenharmony_ci uint32_t firstSet, 3034bf215546Sopenharmony_ci uint32_t descriptorSetCount, 3035bf215546Sopenharmony_ci const VkDescriptorSet *pDescriptorSets, 3036bf215546Sopenharmony_ci uint32_t dynamicOffsetCount, 3037bf215546Sopenharmony_ci const uint32_t *pDynamicOffsets) 3038bf215546Sopenharmony_ci{ 3039bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3040bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, _layout); 3041bf215546Sopenharmony_ci 3042bf215546Sopenharmony_ci uint32_t dyn_index = 0; 3043bf215546Sopenharmony_ci 3044bf215546Sopenharmony_ci assert(firstSet + descriptorSetCount <= MAX_SETS); 3045bf215546Sopenharmony_ci 3046bf215546Sopenharmony_ci struct v3dv_descriptor_state *descriptor_state = 3047bf215546Sopenharmony_ci pipelineBindPoint == VK_PIPELINE_BIND_POINT_COMPUTE ? 3048bf215546Sopenharmony_ci &cmd_buffer->state.compute.descriptor_state : 3049bf215546Sopenharmony_ci &cmd_buffer->state.gfx.descriptor_state; 3050bf215546Sopenharmony_ci 3051bf215546Sopenharmony_ci VkShaderStageFlags dirty_stages = 0; 3052bf215546Sopenharmony_ci bool descriptor_state_changed = false; 3053bf215546Sopenharmony_ci for (uint32_t i = 0; i < descriptorSetCount; i++) { 3054bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_descriptor_set, set, pDescriptorSets[i]); 3055bf215546Sopenharmony_ci uint32_t index = firstSet + i; 3056bf215546Sopenharmony_ci 3057bf215546Sopenharmony_ci descriptor_state->valid |= (1u << index); 3058bf215546Sopenharmony_ci if (descriptor_state->descriptor_sets[index] != set) { 3059bf215546Sopenharmony_ci descriptor_state->descriptor_sets[index] = set; 3060bf215546Sopenharmony_ci dirty_stages |= set->layout->shader_stages; 3061bf215546Sopenharmony_ci descriptor_state_changed = true; 3062bf215546Sopenharmony_ci } 3063bf215546Sopenharmony_ci 3064bf215546Sopenharmony_ci for (uint32_t j = 0; j < set->layout->dynamic_offset_count; j++, dyn_index++) { 3065bf215546Sopenharmony_ci uint32_t idx = j + layout->set[i + firstSet].dynamic_offset_start; 3066bf215546Sopenharmony_ci 3067bf215546Sopenharmony_ci if (descriptor_state->dynamic_offsets[idx] != pDynamicOffsets[dyn_index]) { 3068bf215546Sopenharmony_ci descriptor_state->dynamic_offsets[idx] = pDynamicOffsets[dyn_index]; 3069bf215546Sopenharmony_ci dirty_stages |= set->layout->shader_stages; 3070bf215546Sopenharmony_ci descriptor_state_changed = true; 3071bf215546Sopenharmony_ci } 3072bf215546Sopenharmony_ci } 3073bf215546Sopenharmony_ci } 3074bf215546Sopenharmony_ci 3075bf215546Sopenharmony_ci if (descriptor_state_changed) { 3076bf215546Sopenharmony_ci if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS) { 3077bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_DESCRIPTOR_SETS; 3078bf215546Sopenharmony_ci cmd_buffer->state.dirty_descriptor_stages |= dirty_stages & VK_SHADER_STAGE_ALL_GRAPHICS; 3079bf215546Sopenharmony_ci } else { 3080bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS; 3081bf215546Sopenharmony_ci cmd_buffer->state.dirty_descriptor_stages |= VK_SHADER_STAGE_COMPUTE_BIT; 3082bf215546Sopenharmony_ci } 3083bf215546Sopenharmony_ci } 3084bf215546Sopenharmony_ci} 3085bf215546Sopenharmony_ci 3086bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3087bf215546Sopenharmony_civ3dv_CmdPushConstants(VkCommandBuffer commandBuffer, 3088bf215546Sopenharmony_ci VkPipelineLayout layout, 3089bf215546Sopenharmony_ci VkShaderStageFlags stageFlags, 3090bf215546Sopenharmony_ci uint32_t offset, 3091bf215546Sopenharmony_ci uint32_t size, 3092bf215546Sopenharmony_ci const void *pValues) 3093bf215546Sopenharmony_ci{ 3094bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3095bf215546Sopenharmony_ci 3096bf215546Sopenharmony_ci if (!memcmp((uint8_t *) cmd_buffer->state.push_constants_data + offset, 3097bf215546Sopenharmony_ci pValues, size)) { 3098bf215546Sopenharmony_ci return; 3099bf215546Sopenharmony_ci } 3100bf215546Sopenharmony_ci 3101bf215546Sopenharmony_ci memcpy((uint8_t *) cmd_buffer->state.push_constants_data + offset, 3102bf215546Sopenharmony_ci pValues, size); 3103bf215546Sopenharmony_ci cmd_buffer->state.push_constants_size = 3104bf215546Sopenharmony_ci MAX2(offset + size, cmd_buffer->state.push_constants_size); 3105bf215546Sopenharmony_ci 3106bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_PUSH_CONSTANTS | 3107bf215546Sopenharmony_ci V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO; 3108bf215546Sopenharmony_ci cmd_buffer->state.dirty_push_constants_stages |= stageFlags; 3109bf215546Sopenharmony_ci} 3110bf215546Sopenharmony_ci 3111bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3112bf215546Sopenharmony_civ3dv_CmdSetBlendConstants(VkCommandBuffer commandBuffer, 3113bf215546Sopenharmony_ci const float blendConstants[4]) 3114bf215546Sopenharmony_ci{ 3115bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3116bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 3117bf215546Sopenharmony_ci 3118bf215546Sopenharmony_ci if (!memcmp(state->dynamic.blend_constants, blendConstants, 3119bf215546Sopenharmony_ci sizeof(state->dynamic.blend_constants))) { 3120bf215546Sopenharmony_ci return; 3121bf215546Sopenharmony_ci } 3122bf215546Sopenharmony_ci 3123bf215546Sopenharmony_ci memcpy(state->dynamic.blend_constants, blendConstants, 3124bf215546Sopenharmony_ci sizeof(state->dynamic.blend_constants)); 3125bf215546Sopenharmony_ci 3126bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_BLEND_CONSTANTS; 3127bf215546Sopenharmony_ci} 3128bf215546Sopenharmony_ci 3129bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3130bf215546Sopenharmony_civ3dv_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer, 3131bf215546Sopenharmony_ci uint32_t attachmentCount, 3132bf215546Sopenharmony_ci const VkBool32 *pColorWriteEnables) 3133bf215546Sopenharmony_ci{ 3134bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3135bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 3136bf215546Sopenharmony_ci uint32_t color_write_enable = 0; 3137bf215546Sopenharmony_ci 3138bf215546Sopenharmony_ci for (uint32_t i = 0; i < attachmentCount; i++) 3139bf215546Sopenharmony_ci color_write_enable |= pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0; 3140bf215546Sopenharmony_ci 3141bf215546Sopenharmony_ci if (state->dynamic.color_write_enable == color_write_enable) 3142bf215546Sopenharmony_ci return; 3143bf215546Sopenharmony_ci 3144bf215546Sopenharmony_ci state->dynamic.color_write_enable = color_write_enable; 3145bf215546Sopenharmony_ci 3146bf215546Sopenharmony_ci state->dirty |= V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE; 3147bf215546Sopenharmony_ci} 3148bf215546Sopenharmony_ci 3149bf215546Sopenharmony_civoid 3150bf215546Sopenharmony_civ3dv_cmd_buffer_reset_queries(struct v3dv_cmd_buffer *cmd_buffer, 3151bf215546Sopenharmony_ci struct v3dv_query_pool *pool, 3152bf215546Sopenharmony_ci uint32_t first, 3153bf215546Sopenharmony_ci uint32_t count) 3154bf215546Sopenharmony_ci{ 3155bf215546Sopenharmony_ci /* Resets can only happen outside a render pass instance so we should not 3156bf215546Sopenharmony_ci * be in the middle of job recording. 3157bf215546Sopenharmony_ci */ 3158bf215546Sopenharmony_ci assert(cmd_buffer->state.pass == NULL); 3159bf215546Sopenharmony_ci assert(cmd_buffer->state.job == NULL); 3160bf215546Sopenharmony_ci 3161bf215546Sopenharmony_ci assert(first < pool->query_count); 3162bf215546Sopenharmony_ci assert(first + count <= pool->query_count); 3163bf215546Sopenharmony_ci 3164bf215546Sopenharmony_ci struct v3dv_job *job = 3165bf215546Sopenharmony_ci v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 3166bf215546Sopenharmony_ci V3DV_JOB_TYPE_CPU_RESET_QUERIES, 3167bf215546Sopenharmony_ci cmd_buffer, -1); 3168bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 3169bf215546Sopenharmony_ci 3170bf215546Sopenharmony_ci job->cpu.query_reset.pool = pool; 3171bf215546Sopenharmony_ci job->cpu.query_reset.first = first; 3172bf215546Sopenharmony_ci job->cpu.query_reset.count = count; 3173bf215546Sopenharmony_ci 3174bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 3175bf215546Sopenharmony_ci} 3176bf215546Sopenharmony_ci 3177bf215546Sopenharmony_civoid 3178bf215546Sopenharmony_civ3dv_cmd_buffer_ensure_array_state(struct v3dv_cmd_buffer *cmd_buffer, 3179bf215546Sopenharmony_ci uint32_t slot_size, 3180bf215546Sopenharmony_ci uint32_t used_count, 3181bf215546Sopenharmony_ci uint32_t *alloc_count, 3182bf215546Sopenharmony_ci void **ptr) 3183bf215546Sopenharmony_ci{ 3184bf215546Sopenharmony_ci if (used_count >= *alloc_count) { 3185bf215546Sopenharmony_ci const uint32_t prev_slot_count = *alloc_count; 3186bf215546Sopenharmony_ci void *old_buffer = *ptr; 3187bf215546Sopenharmony_ci 3188bf215546Sopenharmony_ci const uint32_t new_slot_count = MAX2(*alloc_count * 2, 4); 3189bf215546Sopenharmony_ci const uint32_t bytes = new_slot_count * slot_size; 3190bf215546Sopenharmony_ci *ptr = vk_alloc(&cmd_buffer->device->vk.alloc, bytes, 8, 3191bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 3192bf215546Sopenharmony_ci if (*ptr == NULL) { 3193bf215546Sopenharmony_ci fprintf(stderr, "Error: failed to allocate CPU buffer for query.\n"); 3194bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 3195bf215546Sopenharmony_ci return; 3196bf215546Sopenharmony_ci } 3197bf215546Sopenharmony_ci 3198bf215546Sopenharmony_ci memcpy(*ptr, old_buffer, prev_slot_count * slot_size); 3199bf215546Sopenharmony_ci *alloc_count = new_slot_count; 3200bf215546Sopenharmony_ci } 3201bf215546Sopenharmony_ci assert(used_count < *alloc_count); 3202bf215546Sopenharmony_ci} 3203bf215546Sopenharmony_ci 3204bf215546Sopenharmony_civoid 3205bf215546Sopenharmony_civ3dv_cmd_buffer_begin_query(struct v3dv_cmd_buffer *cmd_buffer, 3206bf215546Sopenharmony_ci struct v3dv_query_pool *pool, 3207bf215546Sopenharmony_ci uint32_t query, 3208bf215546Sopenharmony_ci VkQueryControlFlags flags) 3209bf215546Sopenharmony_ci{ 3210bf215546Sopenharmony_ci assert(query < pool->query_count); 3211bf215546Sopenharmony_ci switch (pool->query_type) { 3212bf215546Sopenharmony_ci case VK_QUERY_TYPE_OCCLUSION: 3213bf215546Sopenharmony_ci /* FIXME: we only support one active occlusion query for now */ 3214bf215546Sopenharmony_ci assert(cmd_buffer->state.query.active_query.bo == NULL); 3215bf215546Sopenharmony_ci 3216bf215546Sopenharmony_ci cmd_buffer->state.query.active_query.bo = pool->queries[query].bo; 3217bf215546Sopenharmony_ci cmd_buffer->state.query.active_query.offset = pool->queries[query].offset; 3218bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY; 3219bf215546Sopenharmony_ci break; 3220bf215546Sopenharmony_ci case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: { 3221bf215546Sopenharmony_ci assert(cmd_buffer->state.query.active_query.perf == NULL); 3222bf215546Sopenharmony_ci if (cmd_buffer->state.pass) 3223bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_finish(cmd_buffer); 3224bf215546Sopenharmony_ci 3225bf215546Sopenharmony_ci cmd_buffer->state.query.active_query.perf = 3226bf215546Sopenharmony_ci &pool->queries[query].perf; 3227bf215546Sopenharmony_ci 3228bf215546Sopenharmony_ci if (cmd_buffer->state.pass) { 3229bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_resume(cmd_buffer, 3230bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx); 3231bf215546Sopenharmony_ci } 3232bf215546Sopenharmony_ci break; 3233bf215546Sopenharmony_ci } 3234bf215546Sopenharmony_ci default: 3235bf215546Sopenharmony_ci unreachable("Unsupported query type"); 3236bf215546Sopenharmony_ci } 3237bf215546Sopenharmony_ci} 3238bf215546Sopenharmony_ci 3239bf215546Sopenharmony_cistatic void 3240bf215546Sopenharmony_civ3dv_cmd_buffer_schedule_end_query(struct v3dv_cmd_buffer *cmd_buffer, 3241bf215546Sopenharmony_ci struct v3dv_query_pool *pool, 3242bf215546Sopenharmony_ci uint32_t query) 3243bf215546Sopenharmony_ci{ 3244bf215546Sopenharmony_ci assert(query < pool->query_count); 3245bf215546Sopenharmony_ci 3246bf215546Sopenharmony_ci if (cmd_buffer->state.pass && 3247bf215546Sopenharmony_ci pool->query_type != VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR) { 3248bf215546Sopenharmony_ci /* Queue the EndQuery in the command buffer state, we will create a CPU 3249bf215546Sopenharmony_ci * job to flag all of these queries as possibly available right after the 3250bf215546Sopenharmony_ci * render pass job in which they have been recorded. 3251bf215546Sopenharmony_ci */ 3252bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 3253bf215546Sopenharmony_ci v3dv_cmd_buffer_ensure_array_state(cmd_buffer, 3254bf215546Sopenharmony_ci sizeof(struct v3dv_end_query_cpu_job_info), 3255bf215546Sopenharmony_ci state->query.end.used_count, 3256bf215546Sopenharmony_ci &state->query.end.alloc_count, 3257bf215546Sopenharmony_ci (void **) &state->query.end.states); 3258bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 3259bf215546Sopenharmony_ci 3260bf215546Sopenharmony_ci struct v3dv_end_query_cpu_job_info *info = 3261bf215546Sopenharmony_ci &state->query.end.states[state->query.end.used_count++]; 3262bf215546Sopenharmony_ci 3263bf215546Sopenharmony_ci info->pool = pool; 3264bf215546Sopenharmony_ci info->query = query; 3265bf215546Sopenharmony_ci 3266bf215546Sopenharmony_ci /* From the Vulkan spec: 3267bf215546Sopenharmony_ci * 3268bf215546Sopenharmony_ci * "If queries are used while executing a render pass instance that has 3269bf215546Sopenharmony_ci * multiview enabled, the query uses N consecutive query indices in 3270bf215546Sopenharmony_ci * the query pool (starting at query) where N is the number of bits set 3271bf215546Sopenharmony_ci * in the view mask in the subpass the query is used in. How the 3272bf215546Sopenharmony_ci * numerical results of the query are distributed among the queries is 3273bf215546Sopenharmony_ci * implementation-dependent." 3274bf215546Sopenharmony_ci * 3275bf215546Sopenharmony_ci * In our case, only the first query is used but this means we still need 3276bf215546Sopenharmony_ci * to flag the other queries as available so we don't emit errors when 3277bf215546Sopenharmony_ci * the applications attempt to retrive values from them. 3278bf215546Sopenharmony_ci */ 3279bf215546Sopenharmony_ci struct v3dv_render_pass *pass = cmd_buffer->state.pass; 3280bf215546Sopenharmony_ci if (!pass->multiview_enabled) { 3281bf215546Sopenharmony_ci info->count = 1; 3282bf215546Sopenharmony_ci } else { 3283bf215546Sopenharmony_ci struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; 3284bf215546Sopenharmony_ci info->count = util_bitcount(subpass->view_mask); 3285bf215546Sopenharmony_ci } 3286bf215546Sopenharmony_ci } else { 3287bf215546Sopenharmony_ci /* Otherwise, schedule the CPU job immediately */ 3288bf215546Sopenharmony_ci struct v3dv_job *job = 3289bf215546Sopenharmony_ci v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 3290bf215546Sopenharmony_ci V3DV_JOB_TYPE_CPU_END_QUERY, 3291bf215546Sopenharmony_ci cmd_buffer, -1); 3292bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 3293bf215546Sopenharmony_ci 3294bf215546Sopenharmony_ci job->cpu.query_end.pool = pool; 3295bf215546Sopenharmony_ci job->cpu.query_end.query = query; 3296bf215546Sopenharmony_ci 3297bf215546Sopenharmony_ci /* Multiview queries cannot cross subpass boundaries */ 3298bf215546Sopenharmony_ci job->cpu.query_end.count = 1; 3299bf215546Sopenharmony_ci 3300bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 3301bf215546Sopenharmony_ci } 3302bf215546Sopenharmony_ci} 3303bf215546Sopenharmony_ci 3304bf215546Sopenharmony_cistatic void 3305bf215546Sopenharmony_civ3dv_cmd_buffer_end_occlusion_query(struct v3dv_cmd_buffer *cmd_buffer, 3306bf215546Sopenharmony_ci struct v3dv_query_pool *pool, 3307bf215546Sopenharmony_ci uint32_t query) 3308bf215546Sopenharmony_ci{ 3309bf215546Sopenharmony_ci assert(query < pool->query_count); 3310bf215546Sopenharmony_ci assert(cmd_buffer->state.query.active_query.bo != NULL); 3311bf215546Sopenharmony_ci 3312bf215546Sopenharmony_ci v3dv_cmd_buffer_schedule_end_query(cmd_buffer, pool, query); 3313bf215546Sopenharmony_ci 3314bf215546Sopenharmony_ci cmd_buffer->state.query.active_query.bo = NULL; 3315bf215546Sopenharmony_ci cmd_buffer->state.dirty |= V3DV_CMD_DIRTY_OCCLUSION_QUERY; 3316bf215546Sopenharmony_ci} 3317bf215546Sopenharmony_ci 3318bf215546Sopenharmony_cistatic void 3319bf215546Sopenharmony_civ3dv_cmd_buffer_end_performance_query(struct v3dv_cmd_buffer *cmd_buffer, 3320bf215546Sopenharmony_ci struct v3dv_query_pool *pool, 3321bf215546Sopenharmony_ci uint32_t query) 3322bf215546Sopenharmony_ci{ 3323bf215546Sopenharmony_ci assert(query < pool->query_count); 3324bf215546Sopenharmony_ci assert(cmd_buffer->state.query.active_query.perf != NULL); 3325bf215546Sopenharmony_ci 3326bf215546Sopenharmony_ci if (cmd_buffer->state.pass) 3327bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_finish(cmd_buffer); 3328bf215546Sopenharmony_ci 3329bf215546Sopenharmony_ci v3dv_cmd_buffer_schedule_end_query(cmd_buffer, pool, query); 3330bf215546Sopenharmony_ci 3331bf215546Sopenharmony_ci cmd_buffer->state.query.active_query.perf = NULL; 3332bf215546Sopenharmony_ci 3333bf215546Sopenharmony_ci if (cmd_buffer->state.pass) 3334bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx); 3335bf215546Sopenharmony_ci} 3336bf215546Sopenharmony_ci 3337bf215546Sopenharmony_civoid v3dv_cmd_buffer_end_query(struct v3dv_cmd_buffer *cmd_buffer, 3338bf215546Sopenharmony_ci struct v3dv_query_pool *pool, 3339bf215546Sopenharmony_ci uint32_t query) 3340bf215546Sopenharmony_ci{ 3341bf215546Sopenharmony_ci switch (pool->query_type) { 3342bf215546Sopenharmony_ci case VK_QUERY_TYPE_OCCLUSION: 3343bf215546Sopenharmony_ci v3dv_cmd_buffer_end_occlusion_query(cmd_buffer, pool, query); 3344bf215546Sopenharmony_ci break; 3345bf215546Sopenharmony_ci case VK_QUERY_TYPE_PERFORMANCE_QUERY_KHR: 3346bf215546Sopenharmony_ci v3dv_cmd_buffer_end_performance_query(cmd_buffer, pool, query); 3347bf215546Sopenharmony_ci break; 3348bf215546Sopenharmony_ci default: 3349bf215546Sopenharmony_ci unreachable("Unsupported query type"); 3350bf215546Sopenharmony_ci } 3351bf215546Sopenharmony_ci} 3352bf215546Sopenharmony_ci 3353bf215546Sopenharmony_civoid 3354bf215546Sopenharmony_civ3dv_cmd_buffer_copy_query_results(struct v3dv_cmd_buffer *cmd_buffer, 3355bf215546Sopenharmony_ci struct v3dv_query_pool *pool, 3356bf215546Sopenharmony_ci uint32_t first, 3357bf215546Sopenharmony_ci uint32_t count, 3358bf215546Sopenharmony_ci struct v3dv_buffer *dst, 3359bf215546Sopenharmony_ci uint32_t offset, 3360bf215546Sopenharmony_ci uint32_t stride, 3361bf215546Sopenharmony_ci VkQueryResultFlags flags) 3362bf215546Sopenharmony_ci{ 3363bf215546Sopenharmony_ci /* Copies can only happen outside a render pass instance so we should not 3364bf215546Sopenharmony_ci * be in the middle of job recording. 3365bf215546Sopenharmony_ci */ 3366bf215546Sopenharmony_ci assert(cmd_buffer->state.pass == NULL); 3367bf215546Sopenharmony_ci assert(cmd_buffer->state.job == NULL); 3368bf215546Sopenharmony_ci 3369bf215546Sopenharmony_ci assert(first < pool->query_count); 3370bf215546Sopenharmony_ci assert(first + count <= pool->query_count); 3371bf215546Sopenharmony_ci 3372bf215546Sopenharmony_ci struct v3dv_job *job = 3373bf215546Sopenharmony_ci v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 3374bf215546Sopenharmony_ci V3DV_JOB_TYPE_CPU_COPY_QUERY_RESULTS, 3375bf215546Sopenharmony_ci cmd_buffer, -1); 3376bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 3377bf215546Sopenharmony_ci 3378bf215546Sopenharmony_ci job->cpu.query_copy_results.pool = pool; 3379bf215546Sopenharmony_ci job->cpu.query_copy_results.first = first; 3380bf215546Sopenharmony_ci job->cpu.query_copy_results.count = count; 3381bf215546Sopenharmony_ci job->cpu.query_copy_results.dst = dst; 3382bf215546Sopenharmony_ci job->cpu.query_copy_results.offset = offset; 3383bf215546Sopenharmony_ci job->cpu.query_copy_results.stride = stride; 3384bf215546Sopenharmony_ci job->cpu.query_copy_results.flags = flags; 3385bf215546Sopenharmony_ci 3386bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 3387bf215546Sopenharmony_ci} 3388bf215546Sopenharmony_ci 3389bf215546Sopenharmony_civoid 3390bf215546Sopenharmony_civ3dv_cmd_buffer_add_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, 3391bf215546Sopenharmony_ci struct drm_v3d_submit_tfu *tfu) 3392bf215546Sopenharmony_ci{ 3393bf215546Sopenharmony_ci struct v3dv_device *device = cmd_buffer->device; 3394bf215546Sopenharmony_ci struct v3dv_job *job = vk_zalloc(&device->vk.alloc, 3395bf215546Sopenharmony_ci sizeof(struct v3dv_job), 8, 3396bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 3397bf215546Sopenharmony_ci if (!job) { 3398bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 3399bf215546Sopenharmony_ci return; 3400bf215546Sopenharmony_ci } 3401bf215546Sopenharmony_ci 3402bf215546Sopenharmony_ci v3dv_job_init(job, V3DV_JOB_TYPE_GPU_TFU, device, cmd_buffer, -1); 3403bf215546Sopenharmony_ci job->tfu = *tfu; 3404bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 3405bf215546Sopenharmony_ci} 3406bf215546Sopenharmony_ci 3407bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3408bf215546Sopenharmony_civ3dv_CmdSetEvent(VkCommandBuffer commandBuffer, 3409bf215546Sopenharmony_ci VkEvent _event, 3410bf215546Sopenharmony_ci VkPipelineStageFlags stageMask) 3411bf215546Sopenharmony_ci{ 3412bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3413bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_event, event, _event); 3414bf215546Sopenharmony_ci 3415bf215546Sopenharmony_ci /* Event (re)sets can only happen outside a render pass instance so we 3416bf215546Sopenharmony_ci * should not be in the middle of job recording. 3417bf215546Sopenharmony_ci */ 3418bf215546Sopenharmony_ci assert(cmd_buffer->state.pass == NULL); 3419bf215546Sopenharmony_ci assert(cmd_buffer->state.job == NULL); 3420bf215546Sopenharmony_ci 3421bf215546Sopenharmony_ci struct v3dv_job *job = 3422bf215546Sopenharmony_ci v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 3423bf215546Sopenharmony_ci V3DV_JOB_TYPE_CPU_SET_EVENT, 3424bf215546Sopenharmony_ci cmd_buffer, -1); 3425bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 3426bf215546Sopenharmony_ci 3427bf215546Sopenharmony_ci job->cpu.event_set.event = event; 3428bf215546Sopenharmony_ci job->cpu.event_set.state = 1; 3429bf215546Sopenharmony_ci 3430bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 3431bf215546Sopenharmony_ci} 3432bf215546Sopenharmony_ci 3433bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3434bf215546Sopenharmony_civ3dv_CmdResetEvent(VkCommandBuffer commandBuffer, 3435bf215546Sopenharmony_ci VkEvent _event, 3436bf215546Sopenharmony_ci VkPipelineStageFlags stageMask) 3437bf215546Sopenharmony_ci{ 3438bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3439bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_event, event, _event); 3440bf215546Sopenharmony_ci 3441bf215546Sopenharmony_ci /* Event (re)sets can only happen outside a render pass instance so we 3442bf215546Sopenharmony_ci * should not be in the middle of job recording. 3443bf215546Sopenharmony_ci */ 3444bf215546Sopenharmony_ci assert(cmd_buffer->state.pass == NULL); 3445bf215546Sopenharmony_ci assert(cmd_buffer->state.job == NULL); 3446bf215546Sopenharmony_ci 3447bf215546Sopenharmony_ci struct v3dv_job *job = 3448bf215546Sopenharmony_ci v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 3449bf215546Sopenharmony_ci V3DV_JOB_TYPE_CPU_SET_EVENT, 3450bf215546Sopenharmony_ci cmd_buffer, -1); 3451bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 3452bf215546Sopenharmony_ci 3453bf215546Sopenharmony_ci job->cpu.event_set.event = event; 3454bf215546Sopenharmony_ci job->cpu.event_set.state = 0; 3455bf215546Sopenharmony_ci 3456bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 3457bf215546Sopenharmony_ci} 3458bf215546Sopenharmony_ci 3459bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3460bf215546Sopenharmony_civ3dv_CmdWaitEvents(VkCommandBuffer commandBuffer, 3461bf215546Sopenharmony_ci uint32_t eventCount, 3462bf215546Sopenharmony_ci const VkEvent *pEvents, 3463bf215546Sopenharmony_ci VkPipelineStageFlags srcStageMask, 3464bf215546Sopenharmony_ci VkPipelineStageFlags dstStageMask, 3465bf215546Sopenharmony_ci uint32_t memoryBarrierCount, 3466bf215546Sopenharmony_ci const VkMemoryBarrier *pMemoryBarriers, 3467bf215546Sopenharmony_ci uint32_t bufferMemoryBarrierCount, 3468bf215546Sopenharmony_ci const VkBufferMemoryBarrier *pBufferMemoryBarriers, 3469bf215546Sopenharmony_ci uint32_t imageMemoryBarrierCount, 3470bf215546Sopenharmony_ci const VkImageMemoryBarrier *pImageMemoryBarriers) 3471bf215546Sopenharmony_ci{ 3472bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3473bf215546Sopenharmony_ci 3474bf215546Sopenharmony_ci assert(eventCount > 0); 3475bf215546Sopenharmony_ci 3476bf215546Sopenharmony_ci struct v3dv_job *job = 3477bf215546Sopenharmony_ci v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 3478bf215546Sopenharmony_ci V3DV_JOB_TYPE_CPU_WAIT_EVENTS, 3479bf215546Sopenharmony_ci cmd_buffer, -1); 3480bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 3481bf215546Sopenharmony_ci 3482bf215546Sopenharmony_ci const uint32_t event_list_size = sizeof(struct v3dv_event *) * eventCount; 3483bf215546Sopenharmony_ci 3484bf215546Sopenharmony_ci job->cpu.event_wait.events = 3485bf215546Sopenharmony_ci vk_alloc(&cmd_buffer->device->vk.alloc, event_list_size, 8, 3486bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 3487bf215546Sopenharmony_ci if (!job->cpu.event_wait.events) { 3488bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 3489bf215546Sopenharmony_ci return; 3490bf215546Sopenharmony_ci } 3491bf215546Sopenharmony_ci job->cpu.event_wait.event_count = eventCount; 3492bf215546Sopenharmony_ci 3493bf215546Sopenharmony_ci for (uint32_t i = 0; i < eventCount; i++) 3494bf215546Sopenharmony_ci job->cpu.event_wait.events[i] = v3dv_event_from_handle(pEvents[i]); 3495bf215546Sopenharmony_ci 3496bf215546Sopenharmony_ci /* vkCmdWaitEvents can be recorded inside a render pass, so we might have 3497bf215546Sopenharmony_ci * an active job. 3498bf215546Sopenharmony_ci * 3499bf215546Sopenharmony_ci * If we are inside a render pass, because we vkCmd(Re)SetEvent can't happen 3500bf215546Sopenharmony_ci * inside a render pass, it is safe to move the wait job so it happens right 3501bf215546Sopenharmony_ci * before the current job we are currently recording for the subpass, if any 3502bf215546Sopenharmony_ci * (it would actually be safe to move it all the way back to right before 3503bf215546Sopenharmony_ci * the start of the render pass). 3504bf215546Sopenharmony_ci * 3505bf215546Sopenharmony_ci * If we are outside a render pass then we should not have any on-going job 3506bf215546Sopenharmony_ci * and we are free to just add the wait job without restrictions. 3507bf215546Sopenharmony_ci */ 3508bf215546Sopenharmony_ci assert(cmd_buffer->state.pass || !cmd_buffer->state.job); 3509bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 3510bf215546Sopenharmony_ci} 3511bf215546Sopenharmony_ci 3512bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3513bf215546Sopenharmony_civ3dv_CmdWriteTimestamp(VkCommandBuffer commandBuffer, 3514bf215546Sopenharmony_ci VkPipelineStageFlagBits pipelineStage, 3515bf215546Sopenharmony_ci VkQueryPool queryPool, 3516bf215546Sopenharmony_ci uint32_t query) 3517bf215546Sopenharmony_ci{ 3518bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3519bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_query_pool, query_pool, queryPool); 3520bf215546Sopenharmony_ci 3521bf215546Sopenharmony_ci /* If this is called inside a render pass we need to finish the current 3522bf215546Sopenharmony_ci * job here... 3523bf215546Sopenharmony_ci */ 3524bf215546Sopenharmony_ci struct v3dv_render_pass *pass = cmd_buffer->state.pass; 3525bf215546Sopenharmony_ci if (pass) 3526bf215546Sopenharmony_ci v3dv_cmd_buffer_finish_job(cmd_buffer); 3527bf215546Sopenharmony_ci 3528bf215546Sopenharmony_ci struct v3dv_job *job = 3529bf215546Sopenharmony_ci v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 3530bf215546Sopenharmony_ci V3DV_JOB_TYPE_CPU_TIMESTAMP_QUERY, 3531bf215546Sopenharmony_ci cmd_buffer, -1); 3532bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 3533bf215546Sopenharmony_ci 3534bf215546Sopenharmony_ci job->cpu.query_timestamp.pool = query_pool; 3535bf215546Sopenharmony_ci job->cpu.query_timestamp.query = query; 3536bf215546Sopenharmony_ci 3537bf215546Sopenharmony_ci if (!pass || !pass->multiview_enabled) { 3538bf215546Sopenharmony_ci job->cpu.query_timestamp.count = 1; 3539bf215546Sopenharmony_ci } else { 3540bf215546Sopenharmony_ci struct v3dv_subpass *subpass = 3541bf215546Sopenharmony_ci &pass->subpasses[cmd_buffer->state.subpass_idx]; 3542bf215546Sopenharmony_ci job->cpu.query_timestamp.count = util_bitcount(subpass->view_mask); 3543bf215546Sopenharmony_ci } 3544bf215546Sopenharmony_ci 3545bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 3546bf215546Sopenharmony_ci cmd_buffer->state.job = NULL; 3547bf215546Sopenharmony_ci 3548bf215546Sopenharmony_ci /* ...and resume the subpass after the timestamp */ 3549bf215546Sopenharmony_ci if (cmd_buffer->state.pass) 3550bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx); 3551bf215546Sopenharmony_ci} 3552bf215546Sopenharmony_ci 3553bf215546Sopenharmony_cistatic void 3554bf215546Sopenharmony_cicmd_buffer_emit_pre_dispatch(struct v3dv_cmd_buffer *cmd_buffer) 3555bf215546Sopenharmony_ci{ 3556bf215546Sopenharmony_ci assert(cmd_buffer->state.compute.pipeline); 3557bf215546Sopenharmony_ci assert(cmd_buffer->state.compute.pipeline->active_stages == 3558bf215546Sopenharmony_ci VK_SHADER_STAGE_COMPUTE_BIT); 3559bf215546Sopenharmony_ci 3560bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_COMPUTE_PIPELINE | 3561bf215546Sopenharmony_ci V3DV_CMD_DIRTY_COMPUTE_DESCRIPTOR_SETS); 3562bf215546Sopenharmony_ci cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT; 3563bf215546Sopenharmony_ci cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_COMPUTE_BIT; 3564bf215546Sopenharmony_ci} 3565bf215546Sopenharmony_ci 3566bf215546Sopenharmony_ci#define V3D_CSD_CFG012_WG_COUNT_SHIFT 16 3567bf215546Sopenharmony_ci#define V3D_CSD_CFG012_WG_OFFSET_SHIFT 0 3568bf215546Sopenharmony_ci/* Allow this dispatch to start while the last one is still running. */ 3569bf215546Sopenharmony_ci#define V3D_CSD_CFG3_OVERLAP_WITH_PREV (1 << 26) 3570bf215546Sopenharmony_ci/* Maximum supergroup ID. 6 bits. */ 3571bf215546Sopenharmony_ci#define V3D_CSD_CFG3_MAX_SG_ID_SHIFT 20 3572bf215546Sopenharmony_ci/* Batches per supergroup minus 1. 8 bits. */ 3573bf215546Sopenharmony_ci#define V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT 12 3574bf215546Sopenharmony_ci/* Workgroups per supergroup, 0 means 16 */ 3575bf215546Sopenharmony_ci#define V3D_CSD_CFG3_WGS_PER_SG_SHIFT 8 3576bf215546Sopenharmony_ci#define V3D_CSD_CFG3_WG_SIZE_SHIFT 0 3577bf215546Sopenharmony_ci 3578bf215546Sopenharmony_ci#define V3D_CSD_CFG5_PROPAGATE_NANS (1 << 2) 3579bf215546Sopenharmony_ci#define V3D_CSD_CFG5_SINGLE_SEG (1 << 1) 3580bf215546Sopenharmony_ci#define V3D_CSD_CFG5_THREADING (1 << 0) 3581bf215546Sopenharmony_ci 3582bf215546Sopenharmony_civoid 3583bf215546Sopenharmony_civ3dv_cmd_buffer_rewrite_indirect_csd_job( 3584bf215546Sopenharmony_ci struct v3dv_csd_indirect_cpu_job_info *info, 3585bf215546Sopenharmony_ci const uint32_t *wg_counts) 3586bf215546Sopenharmony_ci{ 3587bf215546Sopenharmony_ci assert(info->csd_job); 3588bf215546Sopenharmony_ci struct v3dv_job *job = info->csd_job; 3589bf215546Sopenharmony_ci 3590bf215546Sopenharmony_ci assert(job->type == V3DV_JOB_TYPE_GPU_CSD); 3591bf215546Sopenharmony_ci assert(wg_counts[0] > 0 && wg_counts[1] > 0 && wg_counts[2] > 0); 3592bf215546Sopenharmony_ci 3593bf215546Sopenharmony_ci struct drm_v3d_submit_csd *submit = &job->csd.submit; 3594bf215546Sopenharmony_ci 3595bf215546Sopenharmony_ci job->csd.wg_count[0] = wg_counts[0]; 3596bf215546Sopenharmony_ci job->csd.wg_count[1] = wg_counts[1]; 3597bf215546Sopenharmony_ci job->csd.wg_count[2] = wg_counts[2]; 3598bf215546Sopenharmony_ci 3599bf215546Sopenharmony_ci submit->cfg[0] = wg_counts[0] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 3600bf215546Sopenharmony_ci submit->cfg[1] = wg_counts[1] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 3601bf215546Sopenharmony_ci submit->cfg[2] = wg_counts[2] << V3D_CSD_CFG012_WG_COUNT_SHIFT; 3602bf215546Sopenharmony_ci 3603bf215546Sopenharmony_ci submit->cfg[4] = DIV_ROUND_UP(info->wg_size, 16) * 3604bf215546Sopenharmony_ci (wg_counts[0] * wg_counts[1] * wg_counts[2]) - 1; 3605bf215546Sopenharmony_ci assert(submit->cfg[4] != ~0); 3606bf215546Sopenharmony_ci 3607bf215546Sopenharmony_ci if (info->needs_wg_uniform_rewrite) { 3608bf215546Sopenharmony_ci /* Make sure the GPU is not currently accessing the indirect CL for this 3609bf215546Sopenharmony_ci * job, since we are about to overwrite some of the uniform data. 3610bf215546Sopenharmony_ci */ 3611bf215546Sopenharmony_ci v3dv_bo_wait(job->device, job->indirect.bo, PIPE_TIMEOUT_INFINITE); 3612bf215546Sopenharmony_ci 3613bf215546Sopenharmony_ci for (uint32_t i = 0; i < 3; i++) { 3614bf215546Sopenharmony_ci if (info->wg_uniform_offsets[i]) { 3615bf215546Sopenharmony_ci /* Sanity check that our uniform pointers are within the allocated 3616bf215546Sopenharmony_ci * BO space for our indirect CL. 3617bf215546Sopenharmony_ci */ 3618bf215546Sopenharmony_ci assert(info->wg_uniform_offsets[i] >= (uint32_t *) job->indirect.base); 3619bf215546Sopenharmony_ci assert(info->wg_uniform_offsets[i] < (uint32_t *) job->indirect.next); 3620bf215546Sopenharmony_ci *(info->wg_uniform_offsets[i]) = wg_counts[i]; 3621bf215546Sopenharmony_ci } 3622bf215546Sopenharmony_ci } 3623bf215546Sopenharmony_ci } 3624bf215546Sopenharmony_ci} 3625bf215546Sopenharmony_ci 3626bf215546Sopenharmony_cistatic struct v3dv_job * 3627bf215546Sopenharmony_cicmd_buffer_create_csd_job(struct v3dv_cmd_buffer *cmd_buffer, 3628bf215546Sopenharmony_ci uint32_t base_offset_x, 3629bf215546Sopenharmony_ci uint32_t base_offset_y, 3630bf215546Sopenharmony_ci uint32_t base_offset_z, 3631bf215546Sopenharmony_ci uint32_t group_count_x, 3632bf215546Sopenharmony_ci uint32_t group_count_y, 3633bf215546Sopenharmony_ci uint32_t group_count_z, 3634bf215546Sopenharmony_ci uint32_t **wg_uniform_offsets_out, 3635bf215546Sopenharmony_ci uint32_t *wg_size_out) 3636bf215546Sopenharmony_ci{ 3637bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = cmd_buffer->state.compute.pipeline; 3638bf215546Sopenharmony_ci assert(pipeline && pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]); 3639bf215546Sopenharmony_ci struct v3dv_shader_variant *cs_variant = 3640bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]; 3641bf215546Sopenharmony_ci 3642bf215546Sopenharmony_ci struct v3dv_job *job = vk_zalloc(&cmd_buffer->device->vk.alloc, 3643bf215546Sopenharmony_ci sizeof(struct v3dv_job), 8, 3644bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 3645bf215546Sopenharmony_ci if (!job) { 3646bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 3647bf215546Sopenharmony_ci return NULL; 3648bf215546Sopenharmony_ci } 3649bf215546Sopenharmony_ci 3650bf215546Sopenharmony_ci v3dv_job_init(job, V3DV_JOB_TYPE_GPU_CSD, cmd_buffer->device, cmd_buffer, -1); 3651bf215546Sopenharmony_ci cmd_buffer->state.job = job; 3652bf215546Sopenharmony_ci 3653bf215546Sopenharmony_ci struct drm_v3d_submit_csd *submit = &job->csd.submit; 3654bf215546Sopenharmony_ci 3655bf215546Sopenharmony_ci job->csd.wg_count[0] = group_count_x; 3656bf215546Sopenharmony_ci job->csd.wg_count[1] = group_count_y; 3657bf215546Sopenharmony_ci job->csd.wg_count[2] = group_count_z; 3658bf215546Sopenharmony_ci 3659bf215546Sopenharmony_ci job->csd.wg_base[0] = base_offset_x; 3660bf215546Sopenharmony_ci job->csd.wg_base[1] = base_offset_y; 3661bf215546Sopenharmony_ci job->csd.wg_base[2] = base_offset_z; 3662bf215546Sopenharmony_ci 3663bf215546Sopenharmony_ci submit->cfg[0] |= group_count_x << V3D_CSD_CFG012_WG_COUNT_SHIFT; 3664bf215546Sopenharmony_ci submit->cfg[1] |= group_count_y << V3D_CSD_CFG012_WG_COUNT_SHIFT; 3665bf215546Sopenharmony_ci submit->cfg[2] |= group_count_z << V3D_CSD_CFG012_WG_COUNT_SHIFT; 3666bf215546Sopenharmony_ci 3667bf215546Sopenharmony_ci const struct v3d_compute_prog_data *cpd = 3668bf215546Sopenharmony_ci cs_variant->prog_data.cs; 3669bf215546Sopenharmony_ci 3670bf215546Sopenharmony_ci const uint32_t num_wgs = group_count_x * group_count_y * group_count_z; 3671bf215546Sopenharmony_ci const uint32_t wg_size = cpd->local_size[0] * 3672bf215546Sopenharmony_ci cpd->local_size[1] * 3673bf215546Sopenharmony_ci cpd->local_size[2]; 3674bf215546Sopenharmony_ci 3675bf215546Sopenharmony_ci uint32_t wgs_per_sg = 3676bf215546Sopenharmony_ci v3d_csd_choose_workgroups_per_supergroup( 3677bf215546Sopenharmony_ci &cmd_buffer->device->devinfo, 3678bf215546Sopenharmony_ci cs_variant->prog_data.cs->has_subgroups, 3679bf215546Sopenharmony_ci cs_variant->prog_data.cs->base.has_control_barrier, 3680bf215546Sopenharmony_ci cs_variant->prog_data.cs->base.threads, 3681bf215546Sopenharmony_ci num_wgs, wg_size); 3682bf215546Sopenharmony_ci 3683bf215546Sopenharmony_ci uint32_t batches_per_sg = DIV_ROUND_UP(wgs_per_sg * wg_size, 16); 3684bf215546Sopenharmony_ci uint32_t whole_sgs = num_wgs / wgs_per_sg; 3685bf215546Sopenharmony_ci uint32_t rem_wgs = num_wgs - whole_sgs * wgs_per_sg; 3686bf215546Sopenharmony_ci uint32_t num_batches = batches_per_sg * whole_sgs + 3687bf215546Sopenharmony_ci DIV_ROUND_UP(rem_wgs * wg_size, 16); 3688bf215546Sopenharmony_ci 3689bf215546Sopenharmony_ci submit->cfg[3] |= (wgs_per_sg & 0xf) << V3D_CSD_CFG3_WGS_PER_SG_SHIFT; 3690bf215546Sopenharmony_ci submit->cfg[3] |= (batches_per_sg - 1) << V3D_CSD_CFG3_BATCHES_PER_SG_M1_SHIFT; 3691bf215546Sopenharmony_ci submit->cfg[3] |= (wg_size & 0xff) << V3D_CSD_CFG3_WG_SIZE_SHIFT; 3692bf215546Sopenharmony_ci if (wg_size_out) 3693bf215546Sopenharmony_ci *wg_size_out = wg_size; 3694bf215546Sopenharmony_ci 3695bf215546Sopenharmony_ci submit->cfg[4] = num_batches - 1; 3696bf215546Sopenharmony_ci assert(submit->cfg[4] != ~0); 3697bf215546Sopenharmony_ci 3698bf215546Sopenharmony_ci assert(pipeline->shared_data->assembly_bo); 3699bf215546Sopenharmony_ci struct v3dv_bo *cs_assembly_bo = pipeline->shared_data->assembly_bo; 3700bf215546Sopenharmony_ci 3701bf215546Sopenharmony_ci submit->cfg[5] = cs_assembly_bo->offset + cs_variant->assembly_offset; 3702bf215546Sopenharmony_ci submit->cfg[5] |= V3D_CSD_CFG5_PROPAGATE_NANS; 3703bf215546Sopenharmony_ci if (cs_variant->prog_data.base->single_seg) 3704bf215546Sopenharmony_ci submit->cfg[5] |= V3D_CSD_CFG5_SINGLE_SEG; 3705bf215546Sopenharmony_ci if (cs_variant->prog_data.base->threads == 4) 3706bf215546Sopenharmony_ci submit->cfg[5] |= V3D_CSD_CFG5_THREADING; 3707bf215546Sopenharmony_ci 3708bf215546Sopenharmony_ci if (cs_variant->prog_data.cs->shared_size > 0) { 3709bf215546Sopenharmony_ci job->csd.shared_memory = 3710bf215546Sopenharmony_ci v3dv_bo_alloc(cmd_buffer->device, 3711bf215546Sopenharmony_ci cs_variant->prog_data.cs->shared_size * wgs_per_sg, 3712bf215546Sopenharmony_ci "shared_vars", true); 3713bf215546Sopenharmony_ci if (!job->csd.shared_memory) { 3714bf215546Sopenharmony_ci v3dv_flag_oom(cmd_buffer, NULL); 3715bf215546Sopenharmony_ci return job; 3716bf215546Sopenharmony_ci } 3717bf215546Sopenharmony_ci } 3718bf215546Sopenharmony_ci 3719bf215546Sopenharmony_ci v3dv_job_add_bo_unchecked(job, cs_assembly_bo); 3720bf215546Sopenharmony_ci struct v3dv_cl_reloc uniforms = 3721bf215546Sopenharmony_ci v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, 3722bf215546Sopenharmony_ci cs_variant, 3723bf215546Sopenharmony_ci wg_uniform_offsets_out); 3724bf215546Sopenharmony_ci submit->cfg[6] = uniforms.bo->offset + uniforms.offset; 3725bf215546Sopenharmony_ci 3726bf215546Sopenharmony_ci 3727bf215546Sopenharmony_ci /* Track VK_KHR_buffer_device_address usage in the job */ 3728bf215546Sopenharmony_ci job->uses_buffer_device_address |= pipeline->uses_buffer_device_address; 3729bf215546Sopenharmony_ci 3730bf215546Sopenharmony_ci v3dv_job_add_bo(job, uniforms.bo); 3731bf215546Sopenharmony_ci 3732bf215546Sopenharmony_ci return job; 3733bf215546Sopenharmony_ci} 3734bf215546Sopenharmony_ci 3735bf215546Sopenharmony_cistatic void 3736bf215546Sopenharmony_cicmd_buffer_dispatch(struct v3dv_cmd_buffer *cmd_buffer, 3737bf215546Sopenharmony_ci uint32_t base_offset_x, 3738bf215546Sopenharmony_ci uint32_t base_offset_y, 3739bf215546Sopenharmony_ci uint32_t base_offset_z, 3740bf215546Sopenharmony_ci uint32_t group_count_x, 3741bf215546Sopenharmony_ci uint32_t group_count_y, 3742bf215546Sopenharmony_ci uint32_t group_count_z) 3743bf215546Sopenharmony_ci{ 3744bf215546Sopenharmony_ci if (group_count_x == 0 || group_count_y == 0 || group_count_z == 0) 3745bf215546Sopenharmony_ci return; 3746bf215546Sopenharmony_ci 3747bf215546Sopenharmony_ci struct v3dv_job *job = 3748bf215546Sopenharmony_ci cmd_buffer_create_csd_job(cmd_buffer, 3749bf215546Sopenharmony_ci base_offset_x, 3750bf215546Sopenharmony_ci base_offset_y, 3751bf215546Sopenharmony_ci base_offset_z, 3752bf215546Sopenharmony_ci group_count_x, 3753bf215546Sopenharmony_ci group_count_y, 3754bf215546Sopenharmony_ci group_count_z, 3755bf215546Sopenharmony_ci NULL, NULL); 3756bf215546Sopenharmony_ci 3757bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 3758bf215546Sopenharmony_ci cmd_buffer->state.job = NULL; 3759bf215546Sopenharmony_ci} 3760bf215546Sopenharmony_ci 3761bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3762bf215546Sopenharmony_civ3dv_CmdDispatch(VkCommandBuffer commandBuffer, 3763bf215546Sopenharmony_ci uint32_t groupCountX, 3764bf215546Sopenharmony_ci uint32_t groupCountY, 3765bf215546Sopenharmony_ci uint32_t groupCountZ) 3766bf215546Sopenharmony_ci{ 3767bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3768bf215546Sopenharmony_ci 3769bf215546Sopenharmony_ci cmd_buffer_emit_pre_dispatch(cmd_buffer); 3770bf215546Sopenharmony_ci cmd_buffer_dispatch(cmd_buffer, 0, 0, 0, 3771bf215546Sopenharmony_ci groupCountX, groupCountY, groupCountZ); 3772bf215546Sopenharmony_ci} 3773bf215546Sopenharmony_ci 3774bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3775bf215546Sopenharmony_civ3dv_CmdDispatchBase(VkCommandBuffer commandBuffer, 3776bf215546Sopenharmony_ci uint32_t baseGroupX, 3777bf215546Sopenharmony_ci uint32_t baseGroupY, 3778bf215546Sopenharmony_ci uint32_t baseGroupZ, 3779bf215546Sopenharmony_ci uint32_t groupCountX, 3780bf215546Sopenharmony_ci uint32_t groupCountY, 3781bf215546Sopenharmony_ci uint32_t groupCountZ) 3782bf215546Sopenharmony_ci{ 3783bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3784bf215546Sopenharmony_ci 3785bf215546Sopenharmony_ci cmd_buffer_emit_pre_dispatch(cmd_buffer); 3786bf215546Sopenharmony_ci cmd_buffer_dispatch(cmd_buffer, 3787bf215546Sopenharmony_ci baseGroupX, baseGroupY, baseGroupZ, 3788bf215546Sopenharmony_ci groupCountX, groupCountY, groupCountZ); 3789bf215546Sopenharmony_ci} 3790bf215546Sopenharmony_ci 3791bf215546Sopenharmony_ci 3792bf215546Sopenharmony_cistatic void 3793bf215546Sopenharmony_cicmd_buffer_dispatch_indirect(struct v3dv_cmd_buffer *cmd_buffer, 3794bf215546Sopenharmony_ci struct v3dv_buffer *buffer, 3795bf215546Sopenharmony_ci uint32_t offset) 3796bf215546Sopenharmony_ci{ 3797bf215546Sopenharmony_ci /* We can't do indirect dispatches, so instead we record a CPU job that, 3798bf215546Sopenharmony_ci * when executed in the queue, will map the indirect buffer, read the 3799bf215546Sopenharmony_ci * dispatch parameters, and submit a regular dispatch. 3800bf215546Sopenharmony_ci */ 3801bf215546Sopenharmony_ci struct v3dv_job *job = 3802bf215546Sopenharmony_ci v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device, 3803bf215546Sopenharmony_ci V3DV_JOB_TYPE_CPU_CSD_INDIRECT, 3804bf215546Sopenharmony_ci cmd_buffer, -1); 3805bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 3806bf215546Sopenharmony_ci 3807bf215546Sopenharmony_ci /* We need to create a CSD job now, even if we still don't know the actual 3808bf215546Sopenharmony_ci * dispatch parameters, because the job setup needs to be done using the 3809bf215546Sopenharmony_ci * current command buffer state (i.e. pipeline, descriptor sets, push 3810bf215546Sopenharmony_ci * constants, etc.). So we create the job with default dispatch parameters 3811bf215546Sopenharmony_ci * and we will rewrite the parts we need at submit time if the indirect 3812bf215546Sopenharmony_ci * parameters don't match the ones we used to setup the job. 3813bf215546Sopenharmony_ci */ 3814bf215546Sopenharmony_ci struct v3dv_job *csd_job = 3815bf215546Sopenharmony_ci cmd_buffer_create_csd_job(cmd_buffer, 3816bf215546Sopenharmony_ci 0, 0, 0, 3817bf215546Sopenharmony_ci 1, 1, 1, 3818bf215546Sopenharmony_ci &job->cpu.csd_indirect.wg_uniform_offsets[0], 3819bf215546Sopenharmony_ci &job->cpu.csd_indirect.wg_size); 3820bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 3821bf215546Sopenharmony_ci assert(csd_job); 3822bf215546Sopenharmony_ci 3823bf215546Sopenharmony_ci job->cpu.csd_indirect.buffer = buffer; 3824bf215546Sopenharmony_ci job->cpu.csd_indirect.offset = offset; 3825bf215546Sopenharmony_ci job->cpu.csd_indirect.csd_job = csd_job; 3826bf215546Sopenharmony_ci 3827bf215546Sopenharmony_ci /* If the compute shader reads the workgroup sizes we will also need to 3828bf215546Sopenharmony_ci * rewrite the corresponding uniforms. 3829bf215546Sopenharmony_ci */ 3830bf215546Sopenharmony_ci job->cpu.csd_indirect.needs_wg_uniform_rewrite = 3831bf215546Sopenharmony_ci job->cpu.csd_indirect.wg_uniform_offsets[0] || 3832bf215546Sopenharmony_ci job->cpu.csd_indirect.wg_uniform_offsets[1] || 3833bf215546Sopenharmony_ci job->cpu.csd_indirect.wg_uniform_offsets[2]; 3834bf215546Sopenharmony_ci 3835bf215546Sopenharmony_ci list_addtail(&job->list_link, &cmd_buffer->jobs); 3836bf215546Sopenharmony_ci list_addtail(&csd_job->list_link, &cmd_buffer->jobs); 3837bf215546Sopenharmony_ci cmd_buffer->state.job = NULL; 3838bf215546Sopenharmony_ci} 3839bf215546Sopenharmony_ci 3840bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3841bf215546Sopenharmony_civ3dv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, 3842bf215546Sopenharmony_ci VkBuffer _buffer, 3843bf215546Sopenharmony_ci VkDeviceSize offset) 3844bf215546Sopenharmony_ci{ 3845bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer); 3846bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer); 3847bf215546Sopenharmony_ci 3848bf215546Sopenharmony_ci assert(offset <= UINT32_MAX); 3849bf215546Sopenharmony_ci 3850bf215546Sopenharmony_ci cmd_buffer_emit_pre_dispatch(cmd_buffer); 3851bf215546Sopenharmony_ci cmd_buffer_dispatch_indirect(cmd_buffer, buffer, offset); 3852bf215546Sopenharmony_ci} 3853bf215546Sopenharmony_ci 3854bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 3855bf215546Sopenharmony_civ3dv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) 3856bf215546Sopenharmony_ci{ 3857bf215546Sopenharmony_ci /* Nothing to do here since we only support a single device */ 3858bf215546Sopenharmony_ci assert(deviceMask == 0x1); 3859bf215546Sopenharmony_ci} 3860