1/* 2 * Copyright © 2019 Raspberry Pi Ltd 3 * 4 * Based in part on v3d driver which is: 5 * 6 * Copyright © 2014-2017 Broadcom 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28#include "v3dv_private.h" 29 30/* The only version specific structure that we need is 31 * TMU_CONFIG_PARAMETER_1. This didn't seem to change significantly from 32 * previous V3D versions and we don't expect that to change, so for now let's 33 * just hardcode the V3D version here. 34 */ 35#define V3D_VERSION 41 36#include "broadcom/common/v3d_macros.h" 37#include "broadcom/cle/v3dx_pack.h" 38 39/* Our Vulkan resource indices represent indices in descriptor maps which 40 * include all shader stages, so we need to size the arrays below 41 * accordingly. For now we only support a maximum of 3 stages: VS, GS, FS. 42 */ 43#define MAX_STAGES 3 44 45#define MAX_TOTAL_TEXTURE_SAMPLERS (V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES) 46struct texture_bo_list { 47 struct v3dv_bo *tex[MAX_TOTAL_TEXTURE_SAMPLERS]; 48}; 49 50/* This tracks state BOs for both textures and samplers, so we 51 * multiply by 2. 52 */ 53#define MAX_TOTAL_STATES (2 * V3D_MAX_TEXTURE_SAMPLERS * MAX_STAGES) 54struct state_bo_list { 55 uint32_t count; 56 struct v3dv_bo *states[MAX_TOTAL_STATES]; 57}; 58 59#define MAX_TOTAL_UNIFORM_BUFFERS ((MAX_UNIFORM_BUFFERS + \ 60 MAX_INLINE_UNIFORM_BUFFERS) * MAX_STAGES) 61#define MAX_TOTAL_STORAGE_BUFFERS (MAX_STORAGE_BUFFERS * MAX_STAGES) 62struct buffer_bo_list { 63 struct v3dv_bo *ubo[MAX_TOTAL_UNIFORM_BUFFERS]; 64 struct v3dv_bo *ssbo[MAX_TOTAL_STORAGE_BUFFERS]; 65}; 66 67static bool 68state_bo_in_list(struct state_bo_list *list, struct v3dv_bo *bo) 69{ 70 for (int i = 0; i < list->count; i++) { 71 if (list->states[i] == bo) 72 return true; 73 } 74 return false; 75} 76 77static void 78push_constants_bo_free(VkDevice _device, 79 uint64_t bo_ptr, 80 VkAllocationCallbacks *alloc) 81{ 82 V3DV_FROM_HANDLE(v3dv_device, device, _device); 83 v3dv_bo_free(device, (struct v3dv_bo *)(uintptr_t) bo_ptr); 84} 85 86/* 87 * This method checks if the ubo used for push constants is needed to be 88 * updated or not. 89 * 90 * push contants ubo is only used for push constants accessed by a non-const 91 * index. 92 */ 93static void 94check_push_constants_ubo(struct v3dv_cmd_buffer *cmd_buffer, 95 struct v3dv_pipeline *pipeline) 96{ 97 if (!(cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO) || 98 pipeline->layout->push_constant_size == 0) 99 return; 100 101 if (cmd_buffer->push_constants_resource.bo == NULL) { 102 cmd_buffer->push_constants_resource.bo = 103 v3dv_bo_alloc(cmd_buffer->device, 4096, "push constants", true); 104 105 v3dv_job_add_bo(cmd_buffer->state.job, 106 cmd_buffer->push_constants_resource.bo); 107 108 if (!cmd_buffer->push_constants_resource.bo) { 109 fprintf(stderr, "Failed to allocate memory for push constants\n"); 110 abort(); 111 } 112 113 bool ok = v3dv_bo_map(cmd_buffer->device, 114 cmd_buffer->push_constants_resource.bo, 115 cmd_buffer->push_constants_resource.bo->size); 116 if (!ok) { 117 fprintf(stderr, "failed to map push constants buffer\n"); 118 abort(); 119 } 120 } else { 121 if (cmd_buffer->push_constants_resource.offset + 122 cmd_buffer->state.push_constants_size <= 123 cmd_buffer->push_constants_resource.bo->size) { 124 cmd_buffer->push_constants_resource.offset += 125 cmd_buffer->state.push_constants_size; 126 } else { 127 /* We ran out of space so we'll have to allocate a new buffer but we 128 * need to ensure the old one is preserved until the end of the command 129 * buffer life and make sure it is eventually freed. We use the 130 * private object machinery in the command buffer for this. 131 */ 132 v3dv_cmd_buffer_add_private_obj( 133 cmd_buffer, (uintptr_t) cmd_buffer->push_constants_resource.bo, 134 (v3dv_cmd_buffer_private_obj_destroy_cb) push_constants_bo_free); 135 136 /* Now call back so we create a new BO */ 137 cmd_buffer->push_constants_resource.bo = NULL; 138 check_push_constants_ubo(cmd_buffer, pipeline); 139 return; 140 } 141 } 142 143 assert(cmd_buffer->state.push_constants_size <= MAX_PUSH_CONSTANTS_SIZE); 144 memcpy(cmd_buffer->push_constants_resource.bo->map + 145 cmd_buffer->push_constants_resource.offset, 146 cmd_buffer->state.push_constants_data, 147 cmd_buffer->state.push_constants_size); 148 149 cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_PUSH_CONSTANTS_UBO; 150} 151 152/** V3D 4.x TMU configuration parameter 0 (texture) */ 153static void 154write_tmu_p0(struct v3dv_cmd_buffer *cmd_buffer, 155 struct v3dv_pipeline *pipeline, 156 enum broadcom_shader_stage stage, 157 struct v3dv_cl_out **uniforms, 158 uint32_t data, 159 struct texture_bo_list *tex_bos, 160 struct state_bo_list *state_bos) 161{ 162 uint32_t texture_idx = v3d_unit_data_get_unit(data); 163 164 struct v3dv_descriptor_state *descriptor_state = 165 v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); 166 167 /* We need to ensure that the texture bo is added to the job */ 168 struct v3dv_bo *texture_bo = 169 v3dv_descriptor_map_get_texture_bo(descriptor_state, 170 &pipeline->shared_data->maps[stage]->texture_map, 171 pipeline->layout, texture_idx); 172 assert(texture_bo); 173 assert(texture_idx < V3D_MAX_TEXTURE_SAMPLERS); 174 tex_bos->tex[texture_idx] = texture_bo; 175 176 struct v3dv_cl_reloc state_reloc = 177 v3dv_descriptor_map_get_texture_shader_state(cmd_buffer->device, descriptor_state, 178 &pipeline->shared_data->maps[stage]->texture_map, 179 pipeline->layout, 180 texture_idx); 181 182 cl_aligned_u32(uniforms, state_reloc.bo->offset + 183 state_reloc.offset + 184 v3d_unit_data_get_offset(data)); 185 186 /* Texture and Sampler states are typically suballocated, so they are 187 * usually the same BO: only flag them once to avoid trying to add them 188 * multiple times to the job later. 189 */ 190 if (!state_bo_in_list(state_bos, state_reloc.bo)) { 191 assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS); 192 state_bos->states[state_bos->count++] = state_reloc.bo; 193 } 194} 195 196/** V3D 4.x TMU configuration parameter 1 (sampler) */ 197static void 198write_tmu_p1(struct v3dv_cmd_buffer *cmd_buffer, 199 struct v3dv_pipeline *pipeline, 200 enum broadcom_shader_stage stage, 201 struct v3dv_cl_out **uniforms, 202 uint32_t data, 203 struct state_bo_list *state_bos) 204{ 205 uint32_t sampler_idx = v3d_unit_data_get_unit(data); 206 struct v3dv_descriptor_state *descriptor_state = 207 v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); 208 209 assert(sampler_idx != V3DV_NO_SAMPLER_16BIT_IDX && 210 sampler_idx != V3DV_NO_SAMPLER_32BIT_IDX); 211 212 struct v3dv_cl_reloc sampler_state_reloc = 213 v3dv_descriptor_map_get_sampler_state(cmd_buffer->device, descriptor_state, 214 &pipeline->shared_data->maps[stage]->sampler_map, 215 pipeline->layout, sampler_idx); 216 217 const struct v3dv_sampler *sampler = 218 v3dv_descriptor_map_get_sampler(descriptor_state, 219 &pipeline->shared_data->maps[stage]->sampler_map, 220 pipeline->layout, sampler_idx); 221 assert(sampler); 222 223 /* Set unnormalized coordinates flag from sampler object */ 224 uint32_t p1_packed = v3d_unit_data_get_offset(data); 225 if (sampler->unnormalized_coordinates) { 226 struct V3DX(TMU_CONFIG_PARAMETER_1) p1_unpacked; 227 V3DX(TMU_CONFIG_PARAMETER_1_unpack)((uint8_t *)&p1_packed, &p1_unpacked); 228 p1_unpacked.unnormalized_coordinates = true; 229 V3DX(TMU_CONFIG_PARAMETER_1_pack)(NULL, (uint8_t *)&p1_packed, 230 &p1_unpacked); 231 } 232 233 cl_aligned_u32(uniforms, sampler_state_reloc.bo->offset + 234 sampler_state_reloc.offset + 235 p1_packed); 236 237 /* Texture and Sampler states are typically suballocated, so they are 238 * usually the same BO: only flag them once to avoid trying to add them 239 * multiple times to the job later. 240 */ 241 if (!state_bo_in_list(state_bos, sampler_state_reloc.bo)) { 242 assert(state_bos->count < 2 * V3D_MAX_TEXTURE_SAMPLERS); 243 state_bos->states[state_bos->count++] = sampler_state_reloc.bo; 244 } 245} 246 247static void 248write_ubo_ssbo_uniforms(struct v3dv_cmd_buffer *cmd_buffer, 249 struct v3dv_pipeline *pipeline, 250 enum broadcom_shader_stage stage, 251 struct v3dv_cl_out **uniforms, 252 enum quniform_contents content, 253 uint32_t data, 254 struct buffer_bo_list *buffer_bos) 255{ 256 struct v3dv_descriptor_state *descriptor_state = 257 v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); 258 259 struct v3dv_descriptor_map *map = 260 content == QUNIFORM_UBO_ADDR || content == QUNIFORM_GET_UBO_SIZE ? 261 &pipeline->shared_data->maps[stage]->ubo_map : 262 &pipeline->shared_data->maps[stage]->ssbo_map; 263 264 uint32_t offset = 265 content == QUNIFORM_UBO_ADDR ? 266 v3d_unit_data_get_offset(data) : 267 0; 268 269 uint32_t dynamic_offset = 0; 270 271 /* For ubos, index is shifted, as 0 is reserved for push constants 272 * and 1..MAX_INLINE_UNIFORM_BUFFERS are reserved for inline uniform 273 * buffers. 274 */ 275 uint32_t index = v3d_unit_data_get_unit(data); 276 if (content == QUNIFORM_UBO_ADDR && index == 0) { 277 /* Ensure the push constants UBO is created and updated. This also 278 * adds the BO to the job so we don't need to track it in buffer_bos. 279 */ 280 check_push_constants_ubo(cmd_buffer, pipeline); 281 282 struct v3dv_cl_reloc *resource = 283 &cmd_buffer->push_constants_resource; 284 assert(resource->bo); 285 286 cl_aligned_u32(uniforms, resource->bo->offset + 287 resource->offset + 288 offset + dynamic_offset); 289 } else { 290 if (content == QUNIFORM_UBO_ADDR) { 291 /* We reserve index 0 for push constants and artificially increase our 292 * indices by one for that reason, fix that now before accessing the 293 * descriptor map. 294 */ 295 assert(index > 0); 296 index--; 297 } else { 298 index = data; 299 } 300 301 struct v3dv_descriptor *descriptor = 302 v3dv_descriptor_map_get_descriptor(descriptor_state, map, 303 pipeline->layout, 304 index, &dynamic_offset); 305 306 /* Inline UBO descriptors store UBO data in descriptor pool memory, 307 * instead of an external buffer. 308 */ 309 assert(descriptor); 310 311 if (content == QUNIFORM_GET_SSBO_SIZE || 312 content == QUNIFORM_GET_UBO_SIZE) { 313 cl_aligned_u32(uniforms, descriptor->range); 314 } else { 315 /* Inline uniform buffers store their contents in pool memory instead 316 * of an external buffer. 317 */ 318 struct v3dv_bo *bo; 319 uint32_t addr; 320 if (descriptor->type == VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK) { 321 assert(dynamic_offset == 0); 322 struct v3dv_cl_reloc reloc = 323 v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device, 324 descriptor_state, map, 325 pipeline->layout, index, 326 NULL); 327 bo = reloc.bo; 328 addr = reloc.bo->offset + reloc.offset + offset; 329 } else { 330 assert(descriptor->buffer); 331 assert(descriptor->buffer->mem); 332 assert(descriptor->buffer->mem->bo); 333 334 bo = descriptor->buffer->mem->bo; 335 addr = bo->offset + 336 descriptor->buffer->mem_offset + 337 descriptor->offset + 338 offset + dynamic_offset; 339 } 340 341 cl_aligned_u32(uniforms, addr); 342 343 if (content == QUNIFORM_UBO_ADDR) { 344 assert(index < MAX_TOTAL_UNIFORM_BUFFERS); 345 buffer_bos->ubo[index] = bo; 346 } else { 347 assert(index < MAX_TOTAL_STORAGE_BUFFERS); 348 buffer_bos->ssbo[index] = bo; 349 } 350 } 351 } 352} 353 354static void 355write_inline_uniform(struct v3dv_cl_out **uniforms, 356 uint32_t index, 357 uint32_t offset, 358 struct v3dv_cmd_buffer *cmd_buffer, 359 struct v3dv_pipeline *pipeline, 360 enum broadcom_shader_stage stage) 361{ 362 assert(index < MAX_INLINE_UNIFORM_BUFFERS); 363 364 struct v3dv_descriptor_state *descriptor_state = 365 v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); 366 367 struct v3dv_descriptor_map *map = 368 &pipeline->shared_data->maps[stage]->ubo_map; 369 370 struct v3dv_cl_reloc reloc = 371 v3dv_descriptor_map_get_descriptor_bo(cmd_buffer->device, 372 descriptor_state, map, 373 pipeline->layout, index, 374 NULL); 375 376 /* Offset comes in 32-bit units */ 377 uint32_t *addr = reloc.bo->map + reloc.offset + 4 * offset; 378 cl_aligned_u32(uniforms, *addr); 379} 380 381static uint32_t 382get_texture_size_from_image_view(struct v3dv_image_view *image_view, 383 enum quniform_contents contents, 384 uint32_t data) 385{ 386 switch(contents) { 387 case QUNIFORM_IMAGE_WIDTH: 388 case QUNIFORM_TEXTURE_WIDTH: 389 /* We don't u_minify the values, as we are using the image_view 390 * extents 391 */ 392 return image_view->vk.extent.width; 393 case QUNIFORM_IMAGE_HEIGHT: 394 case QUNIFORM_TEXTURE_HEIGHT: 395 return image_view->vk.extent.height; 396 case QUNIFORM_IMAGE_DEPTH: 397 case QUNIFORM_TEXTURE_DEPTH: 398 return image_view->vk.extent.depth; 399 case QUNIFORM_IMAGE_ARRAY_SIZE: 400 case QUNIFORM_TEXTURE_ARRAY_SIZE: 401 if (image_view->vk.view_type != VK_IMAGE_VIEW_TYPE_CUBE_ARRAY) { 402 return image_view->vk.layer_count; 403 } else { 404 assert(image_view->vk.layer_count % 6 == 0); 405 return image_view->vk.layer_count / 6; 406 } 407 case QUNIFORM_TEXTURE_LEVELS: 408 return image_view->vk.level_count; 409 case QUNIFORM_TEXTURE_SAMPLES: 410 assert(image_view->vk.image); 411 return image_view->vk.image->samples; 412 default: 413 unreachable("Bad texture size field"); 414 } 415} 416 417 418static uint32_t 419get_texture_size_from_buffer_view(struct v3dv_buffer_view *buffer_view, 420 enum quniform_contents contents, 421 uint32_t data) 422{ 423 switch(contents) { 424 case QUNIFORM_IMAGE_WIDTH: 425 case QUNIFORM_TEXTURE_WIDTH: 426 return buffer_view->num_elements; 427 /* Only size can be queried for texel buffers */ 428 default: 429 unreachable("Bad texture size field for texel buffers"); 430 } 431} 432 433static uint32_t 434get_texture_size(struct v3dv_cmd_buffer *cmd_buffer, 435 struct v3dv_pipeline *pipeline, 436 enum broadcom_shader_stage stage, 437 enum quniform_contents contents, 438 uint32_t data) 439{ 440 uint32_t texture_idx = data; 441 442 struct v3dv_descriptor_state *descriptor_state = 443 v3dv_cmd_buffer_get_descriptor_state(cmd_buffer, pipeline); 444 445 struct v3dv_descriptor *descriptor = 446 v3dv_descriptor_map_get_descriptor(descriptor_state, 447 &pipeline->shared_data->maps[stage]->texture_map, 448 pipeline->layout, 449 texture_idx, NULL); 450 451 assert(descriptor); 452 453 switch (descriptor->type) { 454 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 455 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 456 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: 457 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 458 return get_texture_size_from_image_view(descriptor->image_view, 459 contents, data); 460 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 461 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 462 return get_texture_size_from_buffer_view(descriptor->buffer_view, 463 contents, data); 464 default: 465 unreachable("Wrong descriptor for getting texture size"); 466 } 467} 468 469struct v3dv_cl_reloc 470v3dv_write_uniforms_wg_offsets(struct v3dv_cmd_buffer *cmd_buffer, 471 struct v3dv_pipeline *pipeline, 472 struct v3dv_shader_variant *variant, 473 uint32_t **wg_count_offsets) 474{ 475 struct v3d_uniform_list *uinfo = 476 &variant->prog_data.base->uniforms; 477 struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 478 479 struct v3dv_job *job = cmd_buffer->state.job; 480 assert(job); 481 assert(job->cmd_buffer == cmd_buffer); 482 483 struct texture_bo_list tex_bos = { 0 }; 484 struct state_bo_list state_bos = { 0 }; 485 struct buffer_bo_list buffer_bos = { 0 }; 486 487 /* The hardware always pre-fetches the next uniform (also when there 488 * aren't any), so we always allocate space for an extra slot. This 489 * fixes MMU exceptions reported since Linux kernel 5.4 when the 490 * uniforms fill up the tail bytes of a page in the indirect 491 * BO. In that scenario, when the hardware pre-fetches after reading 492 * the last uniform it will read beyond the end of the page and trigger 493 * the MMU exception. 494 */ 495 v3dv_cl_ensure_space(&job->indirect, (uinfo->count + 1) * 4, 4); 496 497 struct v3dv_cl_reloc uniform_stream = v3dv_cl_get_address(&job->indirect); 498 499 struct v3dv_cl_out *uniforms = cl_start(&job->indirect); 500 501 for (int i = 0; i < uinfo->count; i++) { 502 uint32_t data = uinfo->data[i]; 503 504 switch (uinfo->contents[i]) { 505 case QUNIFORM_CONSTANT: 506 cl_aligned_u32(&uniforms, data); 507 break; 508 509 case QUNIFORM_UNIFORM: 510 cl_aligned_u32(&uniforms, cmd_buffer->state.push_constants_data[data]); 511 break; 512 513 case QUNIFORM_INLINE_UBO_0: 514 case QUNIFORM_INLINE_UBO_1: 515 case QUNIFORM_INLINE_UBO_2: 516 case QUNIFORM_INLINE_UBO_3: 517 write_inline_uniform(&uniforms, 518 uinfo->contents[i] - QUNIFORM_INLINE_UBO_0, data, 519 cmd_buffer, pipeline, variant->stage); 520 break; 521 522 case QUNIFORM_VIEWPORT_X_SCALE: 523 cl_aligned_f(&uniforms, dynamic->viewport.scale[0][0] * 256.0f); 524 break; 525 526 case QUNIFORM_VIEWPORT_Y_SCALE: 527 cl_aligned_f(&uniforms, dynamic->viewport.scale[0][1] * 256.0f); 528 break; 529 530 case QUNIFORM_VIEWPORT_Z_OFFSET: 531 cl_aligned_f(&uniforms, dynamic->viewport.translate[0][2]); 532 break; 533 534 case QUNIFORM_VIEWPORT_Z_SCALE: 535 cl_aligned_f(&uniforms, dynamic->viewport.scale[0][2]); 536 break; 537 538 case QUNIFORM_SSBO_OFFSET: 539 case QUNIFORM_UBO_ADDR: 540 case QUNIFORM_GET_SSBO_SIZE: 541 case QUNIFORM_GET_UBO_SIZE: 542 write_ubo_ssbo_uniforms(cmd_buffer, pipeline, variant->stage, &uniforms, 543 uinfo->contents[i], data, &buffer_bos); 544 545 break; 546 547 case QUNIFORM_IMAGE_TMU_CONFIG_P0: 548 case QUNIFORM_TMU_CONFIG_P0: 549 write_tmu_p0(cmd_buffer, pipeline, variant->stage, 550 &uniforms, data, &tex_bos, &state_bos); 551 break; 552 553 case QUNIFORM_TMU_CONFIG_P1: 554 write_tmu_p1(cmd_buffer, pipeline, variant->stage, 555 &uniforms, data, &state_bos); 556 break; 557 558 case QUNIFORM_IMAGE_WIDTH: 559 case QUNIFORM_IMAGE_HEIGHT: 560 case QUNIFORM_IMAGE_DEPTH: 561 case QUNIFORM_IMAGE_ARRAY_SIZE: 562 case QUNIFORM_TEXTURE_WIDTH: 563 case QUNIFORM_TEXTURE_HEIGHT: 564 case QUNIFORM_TEXTURE_DEPTH: 565 case QUNIFORM_TEXTURE_ARRAY_SIZE: 566 case QUNIFORM_TEXTURE_LEVELS: 567 case QUNIFORM_TEXTURE_SAMPLES: 568 cl_aligned_u32(&uniforms, 569 get_texture_size(cmd_buffer, 570 pipeline, 571 variant->stage, 572 uinfo->contents[i], 573 data)); 574 break; 575 576 /* We generate this from geometry shaders to cap the generated gl_Layer 577 * to be within the number of layers of the framebuffer so we prevent the 578 * binner from trying to access tile state memory out of bounds (for 579 * layers that don't exist). 580 * 581 * Unfortunately, for secondary command buffers we may not know the 582 * number of layers in the framebuffer at this stage. Since we are 583 * only using this to sanitize the shader and it should not have any 584 * impact on correct shaders that emit valid values for gl_Layer, 585 * we just work around it by using the largest number of layers we 586 * support. 587 * 588 * FIXME: we could do better than this by recording in the job that 589 * the value at this uniform offset is not correct, and patch it when 590 * we execute the secondary command buffer into a primary, since we do 591 * have the correct number of layers at that point, but again, since this 592 * is only for sanityzing the shader and it only affects the specific case 593 * of secondary command buffers without framebuffer info available it 594 * might not be worth the trouble. 595 * 596 * With multiview the number of layers is dictated by the view mask 597 * and not by the framebuffer layers. We do set the job's frame tiling 598 * information correctly from the view mask in that case, however, 599 * secondary command buffers may not have valid frame tiling data, 600 * so when multiview is enabled, we always set the number of layers 601 * from the subpass view mask. 602 */ 603 case QUNIFORM_FB_LAYERS: { 604 const struct v3dv_cmd_buffer_state *state = &job->cmd_buffer->state; 605 const uint32_t view_mask = 606 state->pass->subpasses[state->subpass_idx].view_mask; 607 608 uint32_t num_layers; 609 if (view_mask != 0) { 610 num_layers = util_last_bit(view_mask); 611 } else if (job->frame_tiling.layers != 0) { 612 num_layers = job->frame_tiling.layers; 613 } else if (cmd_buffer->state.framebuffer) { 614 num_layers = cmd_buffer->state.framebuffer->layers; 615 } else { 616 assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 617 num_layers = 2048; 618#if DEBUG 619 fprintf(stderr, "Skipping gl_LayerID shader sanity check for " 620 "secondary command buffer\n"); 621#endif 622 } 623 cl_aligned_u32(&uniforms, num_layers); 624 break; 625 } 626 627 case QUNIFORM_VIEW_INDEX: 628 cl_aligned_u32(&uniforms, job->cmd_buffer->state.view_index); 629 break; 630 631 case QUNIFORM_NUM_WORK_GROUPS: 632 assert(job->type == V3DV_JOB_TYPE_GPU_CSD); 633 assert(job->csd.wg_count[data] > 0); 634 if (wg_count_offsets) 635 wg_count_offsets[data] = (uint32_t *) uniforms; 636 cl_aligned_u32(&uniforms, job->csd.wg_count[data]); 637 break; 638 639 case QUNIFORM_WORK_GROUP_BASE: 640 assert(job->type == V3DV_JOB_TYPE_GPU_CSD); 641 cl_aligned_u32(&uniforms, job->csd.wg_base[data]); 642 break; 643 644 case QUNIFORM_SHARED_OFFSET: 645 assert(job->type == V3DV_JOB_TYPE_GPU_CSD); 646 assert(job->csd.shared_memory); 647 cl_aligned_u32(&uniforms, job->csd.shared_memory->offset); 648 break; 649 650 case QUNIFORM_SPILL_OFFSET: 651 assert(pipeline->spill.bo); 652 cl_aligned_u32(&uniforms, pipeline->spill.bo->offset); 653 break; 654 655 case QUNIFORM_SPILL_SIZE_PER_THREAD: 656 assert(pipeline->spill.size_per_thread > 0); 657 cl_aligned_u32(&uniforms, pipeline->spill.size_per_thread); 658 break; 659 660 default: 661 unreachable("unsupported quniform_contents uniform type\n"); 662 } 663 } 664 665 cl_end(&job->indirect, uniforms); 666 667 for (int i = 0; i < MAX_TOTAL_TEXTURE_SAMPLERS; i++) { 668 if (tex_bos.tex[i]) 669 v3dv_job_add_bo(job, tex_bos.tex[i]); 670 } 671 672 for (int i = 0; i < state_bos.count; i++) 673 v3dv_job_add_bo(job, state_bos.states[i]); 674 675 for (int i = 0; i < MAX_TOTAL_UNIFORM_BUFFERS; i++) { 676 if (buffer_bos.ubo[i]) 677 v3dv_job_add_bo(job, buffer_bos.ubo[i]); 678 } 679 680 for (int i = 0; i < MAX_TOTAL_STORAGE_BUFFERS; i++) { 681 if (buffer_bos.ssbo[i]) 682 v3dv_job_add_bo(job, buffer_bos.ssbo[i]); 683 } 684 685 if (job->csd.shared_memory) 686 v3dv_job_add_bo(job, job->csd.shared_memory); 687 688 if (pipeline->spill.bo) 689 v3dv_job_add_bo(job, pipeline->spill.bo); 690 691 return uniform_stream; 692} 693 694struct v3dv_cl_reloc 695v3dv_write_uniforms(struct v3dv_cmd_buffer *cmd_buffer, 696 struct v3dv_pipeline *pipeline, 697 struct v3dv_shader_variant *variant) 698{ 699 return v3dv_write_uniforms_wg_offsets(cmd_buffer, pipeline, variant, NULL); 700} 701