1/* 2 * Copyright © 2021 Collabora Ltd. 3 * 4 * Derived from tu_cmd_buffer.c which is: 5 * Copyright © 2016 Red Hat. 6 * Copyright © 2016 Bas Nieuwenhuizen 7 * Copyright © 2015 Intel Corporation 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a 10 * copy of this software and associated documentation files (the "Software"), 11 * to deal in the Software without restriction, including without limitation 12 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 * and/or sell copies of the Software, and to permit persons to whom the 14 * Software is furnished to do so, subject to the following conditions: 15 * 16 * The above copyright notice and this permission notice (including the next 17 * paragraph) shall be included in all copies or substantial portions of the 18 * Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 26 * DEALINGS IN THE SOFTWARE. 27 */ 28 29#include "genxml/gen_macros.h" 30 31#include "panvk_cs.h" 32#include "panvk_private.h" 33 34#include "pan_blitter.h" 35#include "pan_cs.h" 36#include "pan_encoder.h" 37 38#include "util/rounding.h" 39#include "util/u_pack_color.h" 40#include "vk_format.h" 41 42static uint32_t 43panvk_debug_adjust_bo_flags(const struct panvk_device *device, 44 uint32_t bo_flags) 45{ 46 uint32_t debug_flags = 47 device->physical_device->instance->debug_flags; 48 49 if (debug_flags & PANVK_DEBUG_DUMP) 50 bo_flags &= ~PAN_BO_INVISIBLE; 51 52 return bo_flags; 53} 54 55static void 56panvk_cmd_prepare_fragment_job(struct panvk_cmd_buffer *cmdbuf) 57{ 58 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; 59 struct panvk_batch *batch = cmdbuf->state.batch; 60 struct panfrost_ptr job_ptr = 61 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, FRAGMENT_JOB); 62 63 GENX(pan_emit_fragment_job)(fbinfo, batch->fb.desc.gpu, job_ptr.cpu), 64 batch->fragment_job = job_ptr.gpu; 65 util_dynarray_append(&batch->jobs, void *, job_ptr.cpu); 66} 67 68void 69panvk_per_arch(cmd_close_batch)(struct panvk_cmd_buffer *cmdbuf) 70{ 71 struct panvk_batch *batch = cmdbuf->state.batch; 72 73 if (!batch) 74 return; 75 76 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; 77 78 assert(batch); 79 80 bool clear = fbinfo->zs.clear.z | fbinfo->zs.clear.s; 81 for (unsigned i = 0; i < fbinfo->rt_count; i++) 82 clear |= fbinfo->rts[i].clear; 83 84 if (!clear && !batch->scoreboard.first_job) { 85 if (util_dynarray_num_elements(&batch->event_ops, struct panvk_event_op) == 0) { 86 /* Content-less batch, let's drop it */ 87 vk_free(&cmdbuf->pool->vk.alloc, batch); 88 } else { 89 /* Batch has no jobs but is needed for synchronization, let's add a 90 * NULL job so the SUBMIT ioctl doesn't choke on it. 91 */ 92 struct panfrost_ptr ptr = pan_pool_alloc_desc(&cmdbuf->desc_pool.base, 93 JOB_HEADER); 94 util_dynarray_append(&batch->jobs, void *, ptr.cpu); 95 panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, 96 MALI_JOB_TYPE_NULL, false, false, 0, 0, 97 &ptr, false); 98 list_addtail(&batch->node, &cmdbuf->batches); 99 } 100 cmdbuf->state.batch = NULL; 101 return; 102 } 103 104 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 105 106 list_addtail(&batch->node, &cmdbuf->batches); 107 108 if (batch->scoreboard.first_tiler) { 109 struct panfrost_ptr preload_jobs[2]; 110 unsigned num_preload_jobs = 111 GENX(pan_preload_fb)(&cmdbuf->desc_pool.base, &batch->scoreboard, 112 &cmdbuf->state.fb.info, batch->tls.gpu, 113 batch->tiler.descs.gpu, preload_jobs); 114 for (unsigned i = 0; i < num_preload_jobs; i++) 115 util_dynarray_append(&batch->jobs, void *, preload_jobs[i].cpu); 116 } 117 118 if (batch->tlsinfo.tls.size) { 119 unsigned size = panfrost_get_total_stack_size(batch->tlsinfo.tls.size, 120 pdev->thread_tls_alloc, 121 pdev->core_id_range); 122 batch->tlsinfo.tls.ptr = 123 pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, size, 4096).gpu; 124 } 125 126 if (batch->tlsinfo.wls.size) { 127 assert(batch->wls_total_size); 128 batch->tlsinfo.wls.ptr = 129 pan_pool_alloc_aligned(&cmdbuf->tls_pool.base, batch->wls_total_size, 4096).gpu; 130 } 131 132 if (batch->tls.cpu) 133 GENX(pan_emit_tls)(&batch->tlsinfo, batch->tls.cpu); 134 135 if (batch->fb.desc.cpu) { 136 batch->fb.desc.gpu |= 137 GENX(pan_emit_fbd)(pdev, &cmdbuf->state.fb.info, &batch->tlsinfo, 138 &batch->tiler.ctx, batch->fb.desc.cpu); 139 140 panvk_cmd_prepare_fragment_job(cmdbuf); 141 } 142 143 cmdbuf->state.batch = NULL; 144} 145 146void 147panvk_per_arch(CmdNextSubpass2)(VkCommandBuffer commandBuffer, 148 const VkSubpassBeginInfo *pSubpassBeginInfo, 149 const VkSubpassEndInfo *pSubpassEndInfo) 150{ 151 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 152 153 panvk_per_arch(cmd_close_batch)(cmdbuf); 154 155 cmdbuf->state.subpass++; 156 panvk_cmd_fb_info_set_subpass(cmdbuf); 157 panvk_cmd_open_batch(cmdbuf); 158} 159 160void 161panvk_per_arch(CmdNextSubpass)(VkCommandBuffer cmd, VkSubpassContents contents) 162{ 163 VkSubpassBeginInfo binfo = { 164 .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, 165 .contents = contents 166 }; 167 VkSubpassEndInfo einfo = { 168 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, 169 }; 170 171 panvk_per_arch(CmdNextSubpass2)(cmd, &binfo, &einfo); 172} 173 174void 175panvk_per_arch(cmd_alloc_fb_desc)(struct panvk_cmd_buffer *cmdbuf) 176{ 177 struct panvk_batch *batch = cmdbuf->state.batch; 178 179 if (batch->fb.desc.gpu) 180 return; 181 182 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; 183 bool has_zs_ext = fbinfo->zs.view.zs || fbinfo->zs.view.s; 184 unsigned tags = MALI_FBD_TAG_IS_MFBD; 185 186 batch->fb.info = cmdbuf->state.framebuffer; 187 batch->fb.desc = 188 pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, 189 PAN_DESC(FRAMEBUFFER), 190 PAN_DESC_ARRAY(has_zs_ext ? 1 : 0, ZS_CRC_EXTENSION), 191 PAN_DESC_ARRAY(MAX2(fbinfo->rt_count, 1), RENDER_TARGET)); 192 193 /* Tag the pointer */ 194 batch->fb.desc.gpu |= tags; 195 196 memset(&cmdbuf->state.fb.info.bifrost.pre_post.dcds, 0, 197 sizeof(cmdbuf->state.fb.info.bifrost.pre_post.dcds)); 198} 199 200void 201panvk_per_arch(cmd_alloc_tls_desc)(struct panvk_cmd_buffer *cmdbuf, bool gfx) 202{ 203 struct panvk_batch *batch = cmdbuf->state.batch; 204 205 assert(batch); 206 if (!batch->tls.gpu) { 207 batch->tls = 208 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE); 209 } 210} 211 212static void 213panvk_cmd_prepare_draw_sysvals(struct panvk_cmd_buffer *cmdbuf, 214 struct panvk_cmd_bind_point_state *bind_point_state, 215 struct panvk_draw_info *draw) 216{ 217 struct panvk_sysvals *sysvals = &bind_point_state->desc_state.sysvals; 218 219 unsigned base_vertex = draw->index_size ? draw->vertex_offset : 0; 220 if (sysvals->first_vertex != draw->offset_start || 221 sysvals->base_vertex != base_vertex || 222 sysvals->base_instance != draw->first_instance) { 223 sysvals->first_vertex = draw->offset_start; 224 sysvals->base_vertex = base_vertex; 225 sysvals->base_instance = draw->first_instance; 226 bind_point_state->desc_state.sysvals_ptr = 0; 227 } 228 229 if (cmdbuf->state.dirty & PANVK_DYNAMIC_BLEND_CONSTANTS) { 230 memcpy(&sysvals->blend_constants, cmdbuf->state.blend.constants, 231 sizeof(cmdbuf->state.blend.constants)); 232 bind_point_state->desc_state.sysvals_ptr = 0; 233 } 234 235 if (cmdbuf->state.dirty & PANVK_DYNAMIC_VIEWPORT) { 236 panvk_sysval_upload_viewport_scale(&cmdbuf->state.viewport, 237 &sysvals->viewport_scale); 238 panvk_sysval_upload_viewport_offset(&cmdbuf->state.viewport, 239 &sysvals->viewport_offset); 240 bind_point_state->desc_state.sysvals_ptr = 0; 241 } 242} 243 244static void 245panvk_cmd_prepare_sysvals(struct panvk_cmd_buffer *cmdbuf, 246 struct panvk_cmd_bind_point_state *bind_point_state) 247{ 248 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; 249 250 if (desc_state->sysvals_ptr) 251 return; 252 253 struct panfrost_ptr sysvals = 254 pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, 255 sizeof(desc_state->sysvals), 16); 256 memcpy(sysvals.cpu, &desc_state->sysvals, sizeof(desc_state->sysvals)); 257 desc_state->sysvals_ptr = sysvals.gpu; 258} 259 260static void 261panvk_cmd_prepare_push_constants(struct panvk_cmd_buffer *cmdbuf, 262 struct panvk_cmd_bind_point_state *bind_point_state) 263{ 264 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; 265 const struct panvk_pipeline *pipeline = bind_point_state->pipeline; 266 267 if (!pipeline->layout->push_constants.size || desc_state->push_constants) 268 return; 269 270 struct panfrost_ptr push_constants = 271 pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, 272 ALIGN_POT(pipeline->layout->push_constants.size, 16), 273 16); 274 275 memcpy(push_constants.cpu, cmdbuf->push_constants, 276 pipeline->layout->push_constants.size); 277 desc_state->push_constants = push_constants.gpu; 278} 279 280static void 281panvk_cmd_prepare_ubos(struct panvk_cmd_buffer *cmdbuf, 282 struct panvk_cmd_bind_point_state *bind_point_state) 283{ 284 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; 285 const struct panvk_pipeline *pipeline = bind_point_state->pipeline; 286 287 if (!pipeline->num_ubos || desc_state->ubos) 288 return; 289 290 panvk_cmd_prepare_sysvals(cmdbuf, bind_point_state); 291 panvk_cmd_prepare_push_constants(cmdbuf, bind_point_state); 292 293 struct panfrost_ptr ubos = 294 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, 295 pipeline->num_ubos, 296 UNIFORM_BUFFER); 297 298 panvk_per_arch(emit_ubos)(pipeline, desc_state, ubos.cpu); 299 300 desc_state->ubos = ubos.gpu; 301} 302 303static void 304panvk_cmd_prepare_textures(struct panvk_cmd_buffer *cmdbuf, 305 struct panvk_cmd_bind_point_state *bind_point_state) 306{ 307 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; 308 const struct panvk_pipeline *pipeline = bind_point_state->pipeline; 309 unsigned num_textures = pipeline->layout->num_textures; 310 311 if (!num_textures || desc_state->textures) 312 return; 313 314 struct panfrost_ptr textures = 315 pan_pool_alloc_aligned(&cmdbuf->desc_pool.base, 316 num_textures * pan_size(TEXTURE), 317 pan_size(TEXTURE)); 318 319 void *texture = textures.cpu; 320 321 for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { 322 if (!desc_state->sets[i]) continue; 323 324 memcpy(texture, 325 desc_state->sets[i]->textures, 326 desc_state->sets[i]->layout->num_textures * 327 pan_size(TEXTURE)); 328 329 texture += desc_state->sets[i]->layout->num_textures * 330 pan_size(TEXTURE); 331 } 332 333 desc_state->textures = textures.gpu; 334} 335 336static void 337panvk_cmd_prepare_samplers(struct panvk_cmd_buffer *cmdbuf, 338 struct panvk_cmd_bind_point_state *bind_point_state) 339{ 340 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; 341 const struct panvk_pipeline *pipeline = bind_point_state->pipeline; 342 unsigned num_samplers = pipeline->layout->num_samplers; 343 344 if (!num_samplers || desc_state->samplers) 345 return; 346 347 struct panfrost_ptr samplers = 348 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, 349 num_samplers, 350 SAMPLER); 351 352 void *sampler = samplers.cpu; 353 354 /* Prepare the dummy sampler */ 355 pan_pack(sampler, SAMPLER, cfg) { 356 cfg.seamless_cube_map = false; 357 cfg.magnify_nearest = true; 358 cfg.minify_nearest = true; 359 cfg.normalized_coordinates = false; 360 } 361 362 sampler += pan_size(SAMPLER); 363 364 for (unsigned i = 0; i < ARRAY_SIZE(desc_state->sets); i++) { 365 if (!desc_state->sets[i]) continue; 366 367 memcpy(sampler, 368 desc_state->sets[i]->samplers, 369 desc_state->sets[i]->layout->num_samplers * 370 pan_size(SAMPLER)); 371 372 sampler += desc_state->sets[i]->layout->num_samplers * 373 pan_size(SAMPLER); 374 } 375 376 desc_state->samplers = samplers.gpu; 377} 378 379static void 380panvk_draw_prepare_fs_rsd(struct panvk_cmd_buffer *cmdbuf, 381 struct panvk_draw_info *draw) 382{ 383 const struct panvk_pipeline *pipeline = 384 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); 385 386 if (!pipeline->fs.dynamic_rsd) { 387 draw->fs_rsd = pipeline->rsds[MESA_SHADER_FRAGMENT]; 388 return; 389 } 390 391 if (!cmdbuf->state.fs_rsd) { 392 struct panfrost_ptr rsd = 393 pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, 394 PAN_DESC(RENDERER_STATE), 395 PAN_DESC_ARRAY(pipeline->blend.state.rt_count, 396 BLEND)); 397 398 struct mali_renderer_state_packed rsd_dyn; 399 struct mali_renderer_state_packed *rsd_templ = 400 (struct mali_renderer_state_packed *)&pipeline->fs.rsd_template; 401 402 STATIC_ASSERT(sizeof(pipeline->fs.rsd_template) >= sizeof(*rsd_templ)); 403 404 panvk_per_arch(emit_dyn_fs_rsd)(pipeline, &cmdbuf->state, &rsd_dyn); 405 pan_merge(rsd_dyn, (*rsd_templ), RENDERER_STATE); 406 memcpy(rsd.cpu, &rsd_dyn, sizeof(rsd_dyn)); 407 408 void *bd = rsd.cpu + pan_size(RENDERER_STATE); 409 for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) { 410 if (pipeline->blend.constant[i].index != (uint8_t)~0) { 411 struct mali_blend_packed bd_dyn; 412 struct mali_blend_packed *bd_templ = 413 (struct mali_blend_packed *)&pipeline->blend.bd_template[i]; 414 415 STATIC_ASSERT(sizeof(pipeline->blend.bd_template[0]) >= sizeof(*bd_templ)); 416 panvk_per_arch(emit_blend_constant)(cmdbuf->device, pipeline, i, 417 cmdbuf->state.blend.constants, 418 &bd_dyn); 419 pan_merge(bd_dyn, (*bd_templ), BLEND); 420 memcpy(bd, &bd_dyn, sizeof(bd_dyn)); 421 } 422 bd += pan_size(BLEND); 423 } 424 425 cmdbuf->state.fs_rsd = rsd.gpu; 426 } 427 428 draw->fs_rsd = cmdbuf->state.fs_rsd; 429} 430 431void 432panvk_per_arch(cmd_get_tiler_context)(struct panvk_cmd_buffer *cmdbuf, 433 unsigned width, unsigned height) 434{ 435 struct panvk_batch *batch = cmdbuf->state.batch; 436 437 if (batch->tiler.descs.cpu) 438 return; 439 440 batch->tiler.descs = 441 pan_pool_alloc_desc_aggregate(&cmdbuf->desc_pool.base, 442 PAN_DESC(TILER_CONTEXT), 443 PAN_DESC(TILER_HEAP)); 444 STATIC_ASSERT(sizeof(batch->tiler.templ) >= 445 pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); 446 447 struct panfrost_ptr desc = { 448 .gpu = batch->tiler.descs.gpu, 449 .cpu = batch->tiler.templ, 450 }; 451 452 panvk_per_arch(emit_tiler_context)(cmdbuf->device, width, height, &desc); 453 memcpy(batch->tiler.descs.cpu, batch->tiler.templ, 454 pan_size(TILER_CONTEXT) + pan_size(TILER_HEAP)); 455 batch->tiler.ctx.bifrost = batch->tiler.descs.gpu; 456} 457 458void 459panvk_per_arch(cmd_prepare_tiler_context)(struct panvk_cmd_buffer *cmdbuf) 460{ 461 const struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; 462 463 panvk_per_arch(cmd_get_tiler_context)(cmdbuf, 464 fbinfo->width, 465 fbinfo->height); 466} 467 468static void 469panvk_draw_prepare_tiler_context(struct panvk_cmd_buffer *cmdbuf, 470 struct panvk_draw_info *draw) 471{ 472 struct panvk_batch *batch = cmdbuf->state.batch; 473 474 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf); 475 draw->tiler_ctx = &batch->tiler.ctx; 476} 477 478static void 479panvk_draw_prepare_varyings(struct panvk_cmd_buffer *cmdbuf, 480 struct panvk_draw_info *draw) 481{ 482 const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); 483 struct panvk_varyings_info *varyings = &cmdbuf->state.varyings; 484 485 panvk_varyings_alloc(varyings, &cmdbuf->varying_pool.base, 486 draw->padded_vertex_count * draw->instance_count); 487 488 unsigned buf_count = panvk_varyings_buf_count(varyings); 489 struct panfrost_ptr bufs = 490 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, 491 buf_count + 1, 492 ATTRIBUTE_BUFFER); 493 494 panvk_per_arch(emit_varying_bufs)(varyings, bufs.cpu); 495 496 /* We need an empty entry to stop prefetching on Bifrost */ 497 memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * buf_count), 0, 498 pan_size(ATTRIBUTE_BUFFER)); 499 500 if (BITSET_TEST(varyings->active, VARYING_SLOT_POS)) { 501 draw->position = varyings->buf[varyings->varying[VARYING_SLOT_POS].buf].address + 502 varyings->varying[VARYING_SLOT_POS].offset; 503 } 504 505 if (pipeline->ia.writes_point_size) { 506 draw->psiz = varyings->buf[varyings->varying[VARYING_SLOT_PSIZ].buf].address + 507 varyings->varying[VARYING_SLOT_POS].offset; 508 } else if (pipeline->ia.topology == MALI_DRAW_MODE_LINES || 509 pipeline->ia.topology == MALI_DRAW_MODE_LINE_STRIP || 510 pipeline->ia.topology == MALI_DRAW_MODE_LINE_LOOP) { 511 draw->line_width = pipeline->dynamic_state_mask & PANVK_DYNAMIC_LINE_WIDTH ? 512 cmdbuf->state.rast.line_width : pipeline->rast.line_width; 513 } else { 514 draw->line_width = 1.0f; 515 } 516 draw->varying_bufs = bufs.gpu; 517 518 for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) { 519 if (!varyings->stage[s].count) continue; 520 521 struct panfrost_ptr attribs = 522 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, 523 varyings->stage[s].count, 524 ATTRIBUTE); 525 526 panvk_per_arch(emit_varyings)(cmdbuf->device, varyings, s, attribs.cpu); 527 draw->stages[s].varyings = attribs.gpu; 528 } 529} 530 531static void 532panvk_fill_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf, 533 struct panvk_cmd_bind_point_state *bind_point_state, 534 void *attrib_bufs, void *attribs, 535 unsigned first_buf) 536{ 537 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; 538 const struct panvk_pipeline *pipeline = bind_point_state->pipeline; 539 540 for (unsigned s = 0; s < pipeline->layout->num_sets; s++) { 541 const struct panvk_descriptor_set *set = desc_state->sets[s]; 542 543 if (!set) continue; 544 545 const struct panvk_descriptor_set_layout *layout = set->layout; 546 unsigned img_idx = pipeline->layout->sets[s].img_offset; 547 unsigned offset = img_idx * pan_size(ATTRIBUTE_BUFFER) * 2; 548 unsigned size = layout->num_imgs * pan_size(ATTRIBUTE_BUFFER) * 2; 549 550 memcpy(attrib_bufs + offset, desc_state->sets[s]->img_attrib_bufs, size); 551 552 offset = img_idx * pan_size(ATTRIBUTE); 553 for (unsigned i = 0; i < layout->num_imgs; i++) { 554 pan_pack(attribs + offset, ATTRIBUTE, cfg) { 555 cfg.buffer_index = first_buf + (img_idx + i) * 2; 556 cfg.format = desc_state->sets[s]->img_fmts[i]; 557 } 558 offset += pan_size(ATTRIBUTE); 559 } 560 } 561} 562 563static void 564panvk_prepare_non_vs_attribs(struct panvk_cmd_buffer *cmdbuf, 565 struct panvk_cmd_bind_point_state *bind_point_state) 566{ 567 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; 568 const struct panvk_pipeline *pipeline = bind_point_state->pipeline; 569 570 if (desc_state->non_vs_attribs || !pipeline->img_access_mask) 571 return; 572 573 unsigned attrib_count = pipeline->layout->num_imgs; 574 unsigned attrib_buf_count = (pipeline->layout->num_imgs * 2); 575 struct panfrost_ptr bufs = 576 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, 577 attrib_buf_count + 1, 578 ATTRIBUTE_BUFFER); 579 struct panfrost_ptr attribs = 580 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_count, 581 ATTRIBUTE); 582 583 panvk_fill_non_vs_attribs(cmdbuf, bind_point_state, bufs.cpu, attribs.cpu, 0); 584 585 desc_state->non_vs_attrib_bufs = bufs.gpu; 586 desc_state->non_vs_attribs = attribs.gpu; 587} 588 589static void 590panvk_draw_prepare_vs_attribs(struct panvk_cmd_buffer *cmdbuf, 591 struct panvk_draw_info *draw) 592{ 593 struct panvk_cmd_bind_point_state *bind_point_state = 594 panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS); 595 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; 596 const struct panvk_pipeline *pipeline = bind_point_state->pipeline; 597 unsigned num_imgs = 598 pipeline->img_access_mask & BITFIELD_BIT(MESA_SHADER_VERTEX) ? 599 pipeline->layout->num_imgs : 0; 600 unsigned attrib_count = pipeline->attribs.attrib_count + num_imgs; 601 602 if (desc_state->vs_attribs || !attrib_count) 603 return; 604 605 if (!pipeline->attribs.buf_count) { 606 panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state); 607 desc_state->vs_attrib_bufs = desc_state->non_vs_attrib_bufs; 608 desc_state->vs_attribs = desc_state->non_vs_attribs; 609 return; 610 } 611 612 unsigned attrib_buf_count = pipeline->attribs.buf_count * 2; 613 struct panfrost_ptr bufs = 614 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, 615 attrib_buf_count + 1, 616 ATTRIBUTE_BUFFER); 617 struct panfrost_ptr attribs = 618 pan_pool_alloc_desc_array(&cmdbuf->desc_pool.base, attrib_count, 619 ATTRIBUTE); 620 621 panvk_per_arch(emit_attrib_bufs)(&pipeline->attribs, 622 cmdbuf->state.vb.bufs, 623 cmdbuf->state.vb.count, 624 draw, bufs.cpu); 625 panvk_per_arch(emit_attribs)(cmdbuf->device, draw, &pipeline->attribs, 626 cmdbuf->state.vb.bufs, cmdbuf->state.vb.count, 627 attribs.cpu); 628 629 if (attrib_count > pipeline->attribs.buf_count) { 630 unsigned bufs_offset = pipeline->attribs.buf_count * pan_size(ATTRIBUTE_BUFFER) * 2; 631 unsigned attribs_offset = pipeline->attribs.buf_count * pan_size(ATTRIBUTE); 632 633 panvk_fill_non_vs_attribs(cmdbuf, bind_point_state, 634 bufs.cpu + bufs_offset, attribs.cpu + attribs_offset, 635 pipeline->attribs.buf_count * 2); 636 } 637 638 /* A NULL entry is needed to stop prefecting on Bifrost */ 639 memset(bufs.cpu + (pan_size(ATTRIBUTE_BUFFER) * attrib_buf_count), 0, 640 pan_size(ATTRIBUTE_BUFFER)); 641 642 desc_state->vs_attrib_bufs = bufs.gpu; 643 desc_state->vs_attribs = attribs.gpu; 644} 645 646static void 647panvk_draw_prepare_attributes(struct panvk_cmd_buffer *cmdbuf, 648 struct panvk_draw_info *draw) 649{ 650 struct panvk_cmd_bind_point_state *bind_point_state = 651 panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS); 652 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; 653 const struct panvk_pipeline *pipeline = bind_point_state->pipeline; 654 655 for (unsigned i = 0; i < ARRAY_SIZE(draw->stages); i++) { 656 if (i == MESA_SHADER_VERTEX) { 657 panvk_draw_prepare_vs_attribs(cmdbuf, draw); 658 draw->stages[i].attributes = desc_state->vs_attribs; 659 draw->stages[i].attribute_bufs = desc_state->vs_attrib_bufs; 660 } else if (pipeline->img_access_mask & BITFIELD_BIT(i)) { 661 panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state); 662 draw->stages[i].attributes = desc_state->non_vs_attribs; 663 draw->stages[i].attribute_bufs = desc_state->non_vs_attrib_bufs; 664 } 665 } 666} 667 668static void 669panvk_draw_prepare_viewport(struct panvk_cmd_buffer *cmdbuf, 670 struct panvk_draw_info *draw) 671{ 672 const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); 673 674 if (pipeline->vpd) { 675 draw->viewport = pipeline->vpd; 676 } else if (cmdbuf->state.vpd) { 677 draw->viewport = cmdbuf->state.vpd; 678 } else { 679 struct panfrost_ptr vp = 680 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, VIEWPORT); 681 682 const VkViewport *viewport = 683 pipeline->dynamic_state_mask & PANVK_DYNAMIC_VIEWPORT ? 684 &cmdbuf->state.viewport : &pipeline->viewport; 685 const VkRect2D *scissor = 686 pipeline->dynamic_state_mask & PANVK_DYNAMIC_SCISSOR ? 687 &cmdbuf->state.scissor : &pipeline->scissor; 688 689 panvk_per_arch(emit_viewport)(viewport, scissor, vp.cpu); 690 draw->viewport = cmdbuf->state.vpd = vp.gpu; 691 } 692} 693 694static void 695panvk_draw_prepare_vertex_job(struct panvk_cmd_buffer *cmdbuf, 696 struct panvk_draw_info *draw) 697{ 698 const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); 699 struct panvk_batch *batch = cmdbuf->state.batch; 700 struct panfrost_ptr ptr = 701 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); 702 703 util_dynarray_append(&batch->jobs, void *, ptr.cpu); 704 draw->jobs.vertex = ptr; 705 panvk_per_arch(emit_vertex_job)(pipeline, draw, ptr.cpu); 706} 707 708static void 709panvk_draw_prepare_tiler_job(struct panvk_cmd_buffer *cmdbuf, 710 struct panvk_draw_info *draw) 711{ 712 const struct panvk_pipeline *pipeline = panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); 713 struct panvk_batch *batch = cmdbuf->state.batch; 714 struct panfrost_ptr ptr = 715 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, TILER_JOB); 716 717 util_dynarray_append(&batch->jobs, void *, ptr.cpu); 718 draw->jobs.tiler = ptr; 719 panvk_per_arch(emit_tiler_job)(pipeline, draw, ptr.cpu); 720} 721 722static void 723panvk_cmd_draw(struct panvk_cmd_buffer *cmdbuf, 724 struct panvk_draw_info *draw) 725{ 726 struct panvk_batch *batch = cmdbuf->state.batch; 727 struct panvk_cmd_bind_point_state *bind_point_state = 728 panvk_cmd_get_bind_point_state(cmdbuf, GRAPHICS); 729 const struct panvk_pipeline *pipeline = 730 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); 731 732 /* There are only 16 bits in the descriptor for the job ID, make sure all 733 * the 3 (2 in Bifrost) jobs in this draw are in the same batch. 734 */ 735 if (batch->scoreboard.job_index >= (UINT16_MAX - 3)) { 736 panvk_per_arch(cmd_close_batch)(cmdbuf); 737 panvk_cmd_preload_fb_after_batch_split(cmdbuf); 738 batch = panvk_cmd_open_batch(cmdbuf); 739 } 740 741 if (pipeline->rast.enable) 742 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); 743 744 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); 745 746 panvk_cmd_prepare_draw_sysvals(cmdbuf, bind_point_state, draw); 747 panvk_cmd_prepare_ubos(cmdbuf, bind_point_state); 748 panvk_cmd_prepare_textures(cmdbuf, bind_point_state); 749 panvk_cmd_prepare_samplers(cmdbuf, bind_point_state); 750 751 /* TODO: indexed draws */ 752 struct panvk_descriptor_state *desc_state = 753 panvk_cmd_get_desc_state(cmdbuf, GRAPHICS); 754 755 draw->tls = batch->tls.gpu; 756 draw->fb = batch->fb.desc.gpu; 757 draw->ubos = desc_state->ubos; 758 draw->textures = desc_state->textures; 759 draw->samplers = desc_state->samplers; 760 761 STATIC_ASSERT(sizeof(draw->invocation) >= sizeof(struct mali_invocation_packed)); 762 panfrost_pack_work_groups_compute((struct mali_invocation_packed *)&draw->invocation, 763 1, draw->vertex_range, draw->instance_count, 764 1, 1, 1, true, false); 765 766 panvk_draw_prepare_fs_rsd(cmdbuf, draw); 767 panvk_draw_prepare_varyings(cmdbuf, draw); 768 panvk_draw_prepare_attributes(cmdbuf, draw); 769 panvk_draw_prepare_viewport(cmdbuf, draw); 770 panvk_draw_prepare_tiler_context(cmdbuf, draw); 771 panvk_draw_prepare_vertex_job(cmdbuf, draw); 772 panvk_draw_prepare_tiler_job(cmdbuf, draw); 773 batch->tlsinfo.tls.size = MAX2(pipeline->tls_size, batch->tlsinfo.tls.size); 774 assert(!pipeline->wls_size); 775 776 unsigned vjob_id = 777 panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, 778 MALI_JOB_TYPE_VERTEX, false, false, 0, 0, 779 &draw->jobs.vertex, false); 780 781 if (pipeline->rast.enable) { 782 panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, 783 MALI_JOB_TYPE_TILER, false, false, vjob_id, 0, 784 &draw->jobs.tiler, false); 785 } 786 787 /* Clear the dirty flags all at once */ 788 desc_state->dirty = cmdbuf->state.dirty = 0; 789} 790 791void 792panvk_per_arch(CmdDraw)(VkCommandBuffer commandBuffer, 793 uint32_t vertexCount, 794 uint32_t instanceCount, 795 uint32_t firstVertex, 796 uint32_t firstInstance) 797{ 798 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 799 800 if (instanceCount == 0 || vertexCount == 0) 801 return; 802 803 struct panvk_draw_info draw = { 804 .first_vertex = firstVertex, 805 .vertex_count = vertexCount, 806 .vertex_range = vertexCount, 807 .first_instance = firstInstance, 808 .instance_count = instanceCount, 809 .padded_vertex_count = instanceCount > 1 ? 810 panfrost_padded_vertex_count(vertexCount) : 811 vertexCount, 812 .offset_start = firstVertex, 813 }; 814 815 panvk_cmd_draw(cmdbuf, &draw); 816} 817 818static void 819panvk_index_minmax_search(struct panvk_cmd_buffer *cmdbuf, 820 uint32_t start, uint32_t count, 821 bool restart, 822 uint32_t *min, uint32_t *max) 823{ 824 void *ptr = cmdbuf->state.ib.buffer->bo->ptr.cpu + 825 cmdbuf->state.ib.buffer->bo_offset + 826 cmdbuf->state.ib.offset; 827 828 fprintf(stderr, "WARNING: Crawling index buffers from the CPU isn't valid in Vulkan\n"); 829 830 assert(cmdbuf->state.ib.buffer); 831 assert(cmdbuf->state.ib.buffer->bo); 832 assert(cmdbuf->state.ib.buffer->bo->ptr.cpu); 833 834 *max = 0; 835 836 /* TODO: Use panfrost_minmax_cache */ 837 /* TODO: Read full cacheline of data to mitigate the uncached 838 * mapping slowness. 839 */ 840 switch (cmdbuf->state.ib.index_size) { 841#define MINMAX_SEARCH_CASE(sz) \ 842 case sz: { \ 843 uint ## sz ## _t *indices = ptr; \ 844 *min = UINT ## sz ## _MAX; \ 845 for (uint32_t i = 0; i < count; i++) { \ 846 if (restart && indices[i + start] == UINT ## sz ##_MAX) continue; \ 847 *min = MIN2(indices[i + start], *min); \ 848 *max = MAX2(indices[i + start], *max); \ 849 } \ 850 break; \ 851 } 852 MINMAX_SEARCH_CASE(32) 853 MINMAX_SEARCH_CASE(16) 854 MINMAX_SEARCH_CASE(8) 855#undef MINMAX_SEARCH_CASE 856 default: 857 unreachable("Invalid index size"); 858 } 859} 860 861void 862panvk_per_arch(CmdDrawIndexed)(VkCommandBuffer commandBuffer, 863 uint32_t indexCount, 864 uint32_t instanceCount, 865 uint32_t firstIndex, 866 int32_t vertexOffset, 867 uint32_t firstInstance) 868{ 869 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 870 uint32_t min_vertex, max_vertex; 871 872 if (instanceCount == 0 || indexCount == 0) 873 return; 874 875 const struct panvk_pipeline *pipeline = 876 panvk_cmd_get_pipeline(cmdbuf, GRAPHICS); 877 bool primitive_restart = pipeline->ia.primitive_restart; 878 879 panvk_index_minmax_search(cmdbuf, firstIndex, indexCount, primitive_restart, 880 &min_vertex, &max_vertex); 881 882 unsigned vertex_range = max_vertex - min_vertex + 1; 883 struct panvk_draw_info draw = { 884 .index_size = cmdbuf->state.ib.index_size, 885 .first_index = firstIndex, 886 .index_count = indexCount, 887 .vertex_offset = vertexOffset, 888 .first_instance = firstInstance, 889 .instance_count = instanceCount, 890 .vertex_range = vertex_range, 891 .vertex_count = indexCount + abs(vertexOffset), 892 .padded_vertex_count = instanceCount > 1 ? 893 panfrost_padded_vertex_count(vertex_range) : 894 vertex_range, 895 .offset_start = min_vertex + vertexOffset, 896 .indices = panvk_buffer_gpu_ptr(cmdbuf->state.ib.buffer, 897 cmdbuf->state.ib.offset) + 898 (firstIndex * (cmdbuf->state.ib.index_size / 8)), 899 }; 900 901 panvk_cmd_draw(cmdbuf, &draw); 902} 903 904VkResult 905panvk_per_arch(EndCommandBuffer)(VkCommandBuffer commandBuffer) 906{ 907 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 908 VkResult ret = 909 cmdbuf->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY ? 910 cmdbuf->vk.cmd_queue.error : cmdbuf->record_result; 911 912 panvk_per_arch(cmd_close_batch)(cmdbuf); 913 cmdbuf->status = ret == VK_SUCCESS ? 914 PANVK_CMD_BUFFER_STATUS_EXECUTABLE : 915 PANVK_CMD_BUFFER_STATUS_INVALID; 916 return ret; 917} 918 919void 920panvk_per_arch(CmdEndRenderPass2)(VkCommandBuffer commandBuffer, 921 const VkSubpassEndInfo *pSubpassEndInfo) 922{ 923 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 924 925 panvk_per_arch(cmd_close_batch)(cmdbuf); 926 vk_free(&cmdbuf->pool->vk.alloc, cmdbuf->state.clear); 927 cmdbuf->state.batch = NULL; 928 cmdbuf->state.pass = NULL; 929 cmdbuf->state.subpass = NULL; 930 cmdbuf->state.framebuffer = NULL; 931 cmdbuf->state.clear = NULL; 932} 933 934void 935panvk_per_arch(CmdEndRenderPass)(VkCommandBuffer cmd) 936{ 937 VkSubpassEndInfo einfo = { 938 .sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO, 939 }; 940 941 panvk_per_arch(CmdEndRenderPass2)(cmd, &einfo); 942} 943 944 945void 946panvk_per_arch(CmdPipelineBarrier2)(VkCommandBuffer commandBuffer, 947 const VkDependencyInfo *pDependencyInfo) 948{ 949 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 950 951 /* Caches are flushed/invalidated at batch boundaries for now, nothing to do 952 * for memory barriers assuming we implement barriers with the creation of a 953 * new batch. 954 * FIXME: We can probably do better with a CacheFlush job that has the 955 * barrier flag set to true. 956 */ 957 if (cmdbuf->state.batch) { 958 panvk_per_arch(cmd_close_batch)(cmdbuf); 959 panvk_cmd_preload_fb_after_batch_split(cmdbuf); 960 panvk_cmd_open_batch(cmdbuf); 961 } 962} 963 964static void 965panvk_add_set_event_operation(struct panvk_cmd_buffer *cmdbuf, 966 struct panvk_event *event, 967 enum panvk_event_op_type type) 968{ 969 struct panvk_event_op op = { 970 .type = type, 971 .event = event, 972 }; 973 974 if (cmdbuf->state.batch == NULL) { 975 /* No open batch, let's create a new one so this operation happens in 976 * the right order. 977 */ 978 panvk_cmd_open_batch(cmdbuf); 979 util_dynarray_append(&cmdbuf->state.batch->event_ops, 980 struct panvk_event_op, 981 op); 982 panvk_per_arch(cmd_close_batch)(cmdbuf); 983 } else { 984 /* Let's close the current batch so the operation executes before any 985 * future commands. 986 */ 987 util_dynarray_append(&cmdbuf->state.batch->event_ops, 988 struct panvk_event_op, 989 op); 990 panvk_per_arch(cmd_close_batch)(cmdbuf); 991 panvk_cmd_preload_fb_after_batch_split(cmdbuf); 992 panvk_cmd_open_batch(cmdbuf); 993 } 994} 995 996static void 997panvk_add_wait_event_operation(struct panvk_cmd_buffer *cmdbuf, 998 struct panvk_event *event) 999{ 1000 struct panvk_event_op op = { 1001 .type = PANVK_EVENT_OP_WAIT, 1002 .event = event, 1003 }; 1004 1005 if (cmdbuf->state.batch == NULL) { 1006 /* No open batch, let's create a new one and have it wait for this event. */ 1007 panvk_cmd_open_batch(cmdbuf); 1008 util_dynarray_append(&cmdbuf->state.batch->event_ops, 1009 struct panvk_event_op, 1010 op); 1011 } else { 1012 /* Let's close the current batch so any future commands wait on the 1013 * event signal operation. 1014 */ 1015 if (cmdbuf->state.batch->fragment_job || 1016 cmdbuf->state.batch->scoreboard.first_job) { 1017 panvk_per_arch(cmd_close_batch)(cmdbuf); 1018 panvk_cmd_preload_fb_after_batch_split(cmdbuf); 1019 panvk_cmd_open_batch(cmdbuf); 1020 } 1021 util_dynarray_append(&cmdbuf->state.batch->event_ops, 1022 struct panvk_event_op, 1023 op); 1024 } 1025} 1026 1027void 1028panvk_per_arch(CmdSetEvent2)(VkCommandBuffer commandBuffer, 1029 VkEvent _event, 1030 const VkDependencyInfo *pDependencyInfo) 1031{ 1032 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1033 VK_FROM_HANDLE(panvk_event, event, _event); 1034 1035 /* vkCmdSetEvent cannot be called inside a render pass */ 1036 assert(cmdbuf->state.pass == NULL); 1037 1038 panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_SET); 1039} 1040 1041void 1042panvk_per_arch(CmdResetEvent2)(VkCommandBuffer commandBuffer, 1043 VkEvent _event, 1044 VkPipelineStageFlags2 stageMask) 1045{ 1046 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1047 VK_FROM_HANDLE(panvk_event, event, _event); 1048 1049 /* vkCmdResetEvent cannot be called inside a render pass */ 1050 assert(cmdbuf->state.pass == NULL); 1051 1052 panvk_add_set_event_operation(cmdbuf, event, PANVK_EVENT_OP_RESET); 1053} 1054 1055void 1056panvk_per_arch(CmdWaitEvents2)(VkCommandBuffer commandBuffer, 1057 uint32_t eventCount, 1058 const VkEvent *pEvents, 1059 const VkDependencyInfo *pDependencyInfos) 1060{ 1061 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1062 1063 assert(eventCount > 0); 1064 1065 for (uint32_t i = 0; i < eventCount; i++) { 1066 VK_FROM_HANDLE(panvk_event, event, pEvents[i]); 1067 panvk_add_wait_event_operation(cmdbuf, event); 1068 } 1069} 1070 1071static VkResult 1072panvk_reset_cmdbuf(struct panvk_cmd_buffer *cmdbuf) 1073{ 1074 vk_command_buffer_reset(&cmdbuf->vk); 1075 1076 cmdbuf->record_result = VK_SUCCESS; 1077 1078 list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { 1079 list_del(&batch->node); 1080 util_dynarray_fini(&batch->jobs); 1081 util_dynarray_fini(&batch->event_ops); 1082 1083 vk_free(&cmdbuf->pool->vk.alloc, batch); 1084 } 1085 1086 panvk_pool_reset(&cmdbuf->desc_pool); 1087 panvk_pool_reset(&cmdbuf->tls_pool); 1088 panvk_pool_reset(&cmdbuf->varying_pool); 1089 cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; 1090 1091 for (unsigned i = 0; i < MAX_BIND_POINTS; i++) 1092 memset(&cmdbuf->bind_points[i].desc_state.sets, 0, sizeof(cmdbuf->bind_points[0].desc_state.sets)); 1093 1094 return cmdbuf->record_result; 1095} 1096 1097static void 1098panvk_destroy_cmdbuf(struct panvk_cmd_buffer *cmdbuf) 1099{ 1100 struct panvk_device *device = cmdbuf->device; 1101 1102 list_del(&cmdbuf->pool_link); 1103 1104 list_for_each_entry_safe(struct panvk_batch, batch, &cmdbuf->batches, node) { 1105 list_del(&batch->node); 1106 util_dynarray_fini(&batch->jobs); 1107 util_dynarray_fini(&batch->event_ops); 1108 1109 vk_free(&cmdbuf->pool->vk.alloc, batch); 1110 } 1111 1112 panvk_pool_cleanup(&cmdbuf->desc_pool); 1113 panvk_pool_cleanup(&cmdbuf->tls_pool); 1114 panvk_pool_cleanup(&cmdbuf->varying_pool); 1115 vk_command_buffer_finish(&cmdbuf->vk); 1116 vk_free(&device->vk.alloc, cmdbuf); 1117} 1118 1119static VkResult 1120panvk_create_cmdbuf(struct panvk_device *device, 1121 struct panvk_cmd_pool *pool, 1122 VkCommandBufferLevel level, 1123 struct panvk_cmd_buffer **cmdbuf_out) 1124{ 1125 struct panvk_cmd_buffer *cmdbuf; 1126 1127 cmdbuf = vk_zalloc(&device->vk.alloc, sizeof(*cmdbuf), 1128 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1129 if (!cmdbuf) 1130 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 1131 1132 VkResult result = vk_command_buffer_init(&cmdbuf->vk, &pool->vk, level); 1133 if (result != VK_SUCCESS) { 1134 vk_free(&device->vk.alloc, cmdbuf); 1135 return result; 1136 } 1137 1138 cmdbuf->device = device; 1139 cmdbuf->pool = pool; 1140 1141 if (pool) { 1142 list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers); 1143 cmdbuf->queue_family_index = pool->vk.queue_family_index; 1144 } else { 1145 /* Init the pool_link so we can safely call list_del when we destroy 1146 * the command buffer 1147 */ 1148 list_inithead(&cmdbuf->pool_link); 1149 cmdbuf->queue_family_index = PANVK_QUEUE_GENERAL; 1150 } 1151 1152 panvk_pool_init(&cmdbuf->desc_pool, &device->physical_device->pdev, 1153 pool ? &pool->desc_bo_pool : NULL, 0, 64 * 1024, 1154 "Command buffer descriptor pool", true); 1155 panvk_pool_init(&cmdbuf->tls_pool, &device->physical_device->pdev, 1156 pool ? &pool->tls_bo_pool : NULL, 1157 panvk_debug_adjust_bo_flags(device, PAN_BO_INVISIBLE), 1158 64 * 1024, "TLS pool", false); 1159 panvk_pool_init(&cmdbuf->varying_pool, &device->physical_device->pdev, 1160 pool ? &pool->varying_bo_pool : NULL, 1161 panvk_debug_adjust_bo_flags(device, PAN_BO_INVISIBLE), 1162 64 * 1024, "Varyings pool", false); 1163 list_inithead(&cmdbuf->batches); 1164 cmdbuf->status = PANVK_CMD_BUFFER_STATUS_INITIAL; 1165 *cmdbuf_out = cmdbuf; 1166 return VK_SUCCESS; 1167} 1168 1169VkResult 1170panvk_per_arch(AllocateCommandBuffers)(VkDevice _device, 1171 const VkCommandBufferAllocateInfo *pAllocateInfo, 1172 VkCommandBuffer *pCommandBuffers) 1173{ 1174 VK_FROM_HANDLE(panvk_device, device, _device); 1175 VK_FROM_HANDLE(panvk_cmd_pool, pool, pAllocateInfo->commandPool); 1176 1177 VkResult result = VK_SUCCESS; 1178 unsigned i; 1179 1180 for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { 1181 struct panvk_cmd_buffer *cmdbuf = NULL; 1182 1183 if (!list_is_empty(&pool->free_cmd_buffers)) { 1184 cmdbuf = list_first_entry( 1185 &pool->free_cmd_buffers, struct panvk_cmd_buffer, pool_link); 1186 1187 list_del(&cmdbuf->pool_link); 1188 list_addtail(&cmdbuf->pool_link, &pool->active_cmd_buffers); 1189 1190 vk_command_buffer_finish(&cmdbuf->vk); 1191 result = vk_command_buffer_init(&cmdbuf->vk, &pool->vk, pAllocateInfo->level); 1192 } else { 1193 result = panvk_create_cmdbuf(device, pool, pAllocateInfo->level, &cmdbuf); 1194 } 1195 1196 if (result != VK_SUCCESS) 1197 goto err_free_cmd_bufs; 1198 1199 pCommandBuffers[i] = panvk_cmd_buffer_to_handle(cmdbuf); 1200 } 1201 1202 return VK_SUCCESS; 1203 1204err_free_cmd_bufs: 1205 panvk_per_arch(FreeCommandBuffers)(_device, pAllocateInfo->commandPool, i, 1206 pCommandBuffers); 1207 for (unsigned j = 0; j < i; j++) 1208 pCommandBuffers[j] = VK_NULL_HANDLE; 1209 1210 return result; 1211} 1212 1213void 1214panvk_per_arch(FreeCommandBuffers)(VkDevice device, 1215 VkCommandPool commandPool, 1216 uint32_t commandBufferCount, 1217 const VkCommandBuffer *pCommandBuffers) 1218{ 1219 for (uint32_t i = 0; i < commandBufferCount; i++) { 1220 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, pCommandBuffers[i]); 1221 1222 if (cmdbuf) { 1223 if (cmdbuf->pool) { 1224 list_del(&cmdbuf->pool_link); 1225 panvk_reset_cmdbuf(cmdbuf); 1226 list_addtail(&cmdbuf->pool_link, 1227 &cmdbuf->pool->free_cmd_buffers); 1228 } else 1229 panvk_destroy_cmdbuf(cmdbuf); 1230 } 1231 } 1232} 1233 1234VkResult 1235panvk_per_arch(ResetCommandBuffer)(VkCommandBuffer commandBuffer, 1236 VkCommandBufferResetFlags flags) 1237{ 1238 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1239 1240 return panvk_reset_cmdbuf(cmdbuf); 1241} 1242 1243VkResult 1244panvk_per_arch(BeginCommandBuffer)(VkCommandBuffer commandBuffer, 1245 const VkCommandBufferBeginInfo *pBeginInfo) 1246{ 1247 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1248 VkResult result = VK_SUCCESS; 1249 1250 if (cmdbuf->status != PANVK_CMD_BUFFER_STATUS_INITIAL) { 1251 /* If the command buffer has already been reset with 1252 * vkResetCommandBuffer, no need to do it again. 1253 */ 1254 result = panvk_reset_cmdbuf(cmdbuf); 1255 if (result != VK_SUCCESS) 1256 return result; 1257 } 1258 1259 memset(&cmdbuf->state, 0, sizeof(cmdbuf->state)); 1260 1261 cmdbuf->status = PANVK_CMD_BUFFER_STATUS_RECORDING; 1262 1263 return VK_SUCCESS; 1264} 1265 1266void 1267panvk_per_arch(DestroyCommandPool)(VkDevice _device, 1268 VkCommandPool commandPool, 1269 const VkAllocationCallbacks *pAllocator) 1270{ 1271 VK_FROM_HANDLE(panvk_device, device, _device); 1272 VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); 1273 1274 list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, 1275 &pool->active_cmd_buffers, pool_link) 1276 panvk_destroy_cmdbuf(cmdbuf); 1277 1278 list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, 1279 &pool->free_cmd_buffers, pool_link) 1280 panvk_destroy_cmdbuf(cmdbuf); 1281 1282 panvk_bo_pool_cleanup(&pool->desc_bo_pool); 1283 panvk_bo_pool_cleanup(&pool->varying_bo_pool); 1284 panvk_bo_pool_cleanup(&pool->tls_bo_pool); 1285 1286 vk_command_pool_finish(&pool->vk); 1287 vk_free2(&device->vk.alloc, pAllocator, pool); 1288} 1289 1290VkResult 1291panvk_per_arch(ResetCommandPool)(VkDevice device, 1292 VkCommandPool commandPool, 1293 VkCommandPoolResetFlags flags) 1294{ 1295 VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); 1296 VkResult result; 1297 1298 list_for_each_entry(struct panvk_cmd_buffer, cmdbuf, &pool->active_cmd_buffers, 1299 pool_link) 1300 { 1301 result = panvk_reset_cmdbuf(cmdbuf); 1302 if (result != VK_SUCCESS) 1303 return result; 1304 } 1305 1306 return VK_SUCCESS; 1307} 1308 1309void 1310panvk_per_arch(TrimCommandPool)(VkDevice device, 1311 VkCommandPool commandPool, 1312 VkCommandPoolTrimFlags flags) 1313{ 1314 VK_FROM_HANDLE(panvk_cmd_pool, pool, commandPool); 1315 1316 if (!pool) 1317 return; 1318 1319 list_for_each_entry_safe(struct panvk_cmd_buffer, cmdbuf, 1320 &pool->free_cmd_buffers, pool_link) 1321 panvk_destroy_cmdbuf(cmdbuf); 1322} 1323 1324void 1325panvk_per_arch(CmdDispatch)(VkCommandBuffer commandBuffer, 1326 uint32_t x, 1327 uint32_t y, 1328 uint32_t z) 1329{ 1330 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1331 const struct panfrost_device *pdev = 1332 &cmdbuf->device->physical_device->pdev; 1333 struct panvk_dispatch_info dispatch = { 1334 .wg_count = { x, y, z }, 1335 }; 1336 1337 panvk_per_arch(cmd_close_batch)(cmdbuf); 1338 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1339 1340 struct panvk_cmd_bind_point_state *bind_point_state = 1341 panvk_cmd_get_bind_point_state(cmdbuf, COMPUTE); 1342 struct panvk_descriptor_state *desc_state = &bind_point_state->desc_state; 1343 const struct panvk_pipeline *pipeline = bind_point_state->pipeline; 1344 struct panfrost_ptr job = 1345 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, COMPUTE_JOB); 1346 1347 struct panvk_sysvals *sysvals = &desc_state->sysvals; 1348 sysvals->num_work_groups.u32[0] = x; 1349 sysvals->num_work_groups.u32[1] = y; 1350 sysvals->num_work_groups.u32[2] = z; 1351 sysvals->local_group_size.u32[0] = pipeline->cs.local_size.x; 1352 sysvals->local_group_size.u32[1] = pipeline->cs.local_size.y; 1353 sysvals->local_group_size.u32[2] = pipeline->cs.local_size.z; 1354 desc_state->sysvals_ptr = 0; 1355 1356 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); 1357 dispatch.tsd = batch->tls.gpu; 1358 1359 panvk_prepare_non_vs_attribs(cmdbuf, bind_point_state); 1360 dispatch.attributes = desc_state->non_vs_attribs; 1361 dispatch.attribute_bufs = desc_state->non_vs_attrib_bufs; 1362 1363 panvk_cmd_prepare_ubos(cmdbuf, bind_point_state); 1364 dispatch.ubos = desc_state->ubos; 1365 1366 panvk_cmd_prepare_textures(cmdbuf, bind_point_state); 1367 dispatch.textures = desc_state->textures; 1368 1369 panvk_cmd_prepare_samplers(cmdbuf, bind_point_state); 1370 dispatch.samplers = desc_state->samplers; 1371 1372 panvk_per_arch(emit_compute_job)(pipeline, &dispatch, job.cpu); 1373 panfrost_add_job(&cmdbuf->desc_pool.base, &batch->scoreboard, 1374 MALI_JOB_TYPE_COMPUTE, false, false, 0, 0, 1375 &job, false); 1376 1377 batch->tlsinfo.tls.size = pipeline->tls_size; 1378 batch->tlsinfo.wls.size = pipeline->wls_size; 1379 if (batch->tlsinfo.wls.size) { 1380 batch->wls_total_size = 1381 pan_wls_mem_size(pdev, &dispatch.wg_count, batch->tlsinfo.wls.size); 1382 } 1383 1384 panvk_per_arch(cmd_close_batch)(cmdbuf); 1385 desc_state->dirty = 0; 1386} 1387