1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Raspberry Pi Ltd 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "v3dv_private.h" 25bf215546Sopenharmony_ci#include "broadcom/common/v3d_macros.h" 26bf215546Sopenharmony_ci#include "broadcom/common/v3d_util.h" 27bf215546Sopenharmony_ci#include "broadcom/cle/v3dx_pack.h" 28bf215546Sopenharmony_ci#include "broadcom/compiler/v3d_compiler.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "util/half_float.h" 31bf215546Sopenharmony_ci#include "vulkan/util/vk_format.h" 32bf215546Sopenharmony_ci#include "util/u_pack_color.h" 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_civoid 35bf215546Sopenharmony_civ3dX(job_emit_binning_flush)(struct v3dv_job *job) 36bf215546Sopenharmony_ci{ 37bf215546Sopenharmony_ci assert(job); 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(FLUSH)); 40bf215546Sopenharmony_ci v3dv_return_if_oom(NULL, job); 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci cl_emit(&job->bcl, FLUSH, flush); 43bf215546Sopenharmony_ci} 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_civoid 46bf215546Sopenharmony_civ3dX(job_emit_binning_prolog)(struct v3dv_job *job, 47bf215546Sopenharmony_ci const struct v3dv_frame_tiling *tiling, 48bf215546Sopenharmony_ci uint32_t layers) 49bf215546Sopenharmony_ci{ 50bf215546Sopenharmony_ci /* This must go before the binning mode configuration. It is 51bf215546Sopenharmony_ci * required for layered framebuffers to work. 52bf215546Sopenharmony_ci */ 53bf215546Sopenharmony_ci cl_emit(&job->bcl, NUMBER_OF_LAYERS, config) { 54bf215546Sopenharmony_ci config.number_of_layers = layers; 55bf215546Sopenharmony_ci } 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci assert(!tiling->double_buffer || !tiling->msaa); 58bf215546Sopenharmony_ci cl_emit(&job->bcl, TILE_BINNING_MODE_CFG, config) { 59bf215546Sopenharmony_ci config.width_in_pixels = tiling->width; 60bf215546Sopenharmony_ci config.height_in_pixels = tiling->height; 61bf215546Sopenharmony_ci config.number_of_render_targets = MAX2(tiling->render_target_count, 1); 62bf215546Sopenharmony_ci config.multisample_mode_4x = tiling->msaa; 63bf215546Sopenharmony_ci config.double_buffer_in_non_ms_mode = tiling->double_buffer; 64bf215546Sopenharmony_ci config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; 65bf215546Sopenharmony_ci } 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_ci /* There's definitely nothing in the VCD cache we want. */ 68bf215546Sopenharmony_ci cl_emit(&job->bcl, FLUSH_VCD_CACHE, bin); 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci /* "Binning mode lists must have a Start Tile Binning item (6) after 71bf215546Sopenharmony_ci * any prefix state data before the binning list proper starts." 72bf215546Sopenharmony_ci */ 73bf215546Sopenharmony_ci cl_emit(&job->bcl, START_TILE_BINNING, bin); 74bf215546Sopenharmony_ci} 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_civoid 77bf215546Sopenharmony_civ3dX(cmd_buffer_end_render_pass_secondary)(struct v3dv_cmd_buffer *cmd_buffer) 78bf215546Sopenharmony_ci{ 79bf215546Sopenharmony_ci assert(cmd_buffer->state.job); 80bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&cmd_buffer->state.job->bcl, 81bf215546Sopenharmony_ci cl_packet_length(RETURN_FROM_SUB_LIST)); 82bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 83bf215546Sopenharmony_ci cl_emit(&cmd_buffer->state.job->bcl, RETURN_FROM_SUB_LIST, ret); 84bf215546Sopenharmony_ci} 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_civoid 87bf215546Sopenharmony_civ3dX(job_emit_clip_window)(struct v3dv_job *job, const VkRect2D *rect) 88bf215546Sopenharmony_ci{ 89bf215546Sopenharmony_ci assert(job); 90bf215546Sopenharmony_ci 91bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CLIP_WINDOW)); 92bf215546Sopenharmony_ci v3dv_return_if_oom(NULL, job); 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci cl_emit(&job->bcl, CLIP_WINDOW, clip) { 95bf215546Sopenharmony_ci clip.clip_window_left_pixel_coordinate = rect->offset.x; 96bf215546Sopenharmony_ci clip.clip_window_bottom_pixel_coordinate = rect->offset.y; 97bf215546Sopenharmony_ci clip.clip_window_width_in_pixels = rect->extent.width; 98bf215546Sopenharmony_ci clip.clip_window_height_in_pixels = rect->extent.height; 99bf215546Sopenharmony_ci } 100bf215546Sopenharmony_ci} 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_cistatic void 103bf215546Sopenharmony_cicmd_buffer_render_pass_emit_load(struct v3dv_cmd_buffer *cmd_buffer, 104bf215546Sopenharmony_ci struct v3dv_cl *cl, 105bf215546Sopenharmony_ci struct v3dv_image_view *iview, 106bf215546Sopenharmony_ci uint32_t layer, 107bf215546Sopenharmony_ci uint32_t buffer) 108bf215546Sopenharmony_ci{ 109bf215546Sopenharmony_ci const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; 110bf215546Sopenharmony_ci const struct v3d_resource_slice *slice = 111bf215546Sopenharmony_ci &image->slices[iview->vk.base_mip_level]; 112bf215546Sopenharmony_ci uint32_t layer_offset = 113bf215546Sopenharmony_ci v3dv_layer_offset(image, iview->vk.base_mip_level, 114bf215546Sopenharmony_ci iview->vk.base_array_layer + layer); 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) { 117bf215546Sopenharmony_ci load.buffer_to_load = buffer; 118bf215546Sopenharmony_ci load.address = v3dv_cl_address(image->mem->bo, layer_offset); 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci load.input_image_format = iview->format->rt_type; 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci /* If we create an image view with only the stencil format, we 123bf215546Sopenharmony_ci * re-interpret the format as RGBA8_UINT, as it is want we want in 124bf215546Sopenharmony_ci * general (see CreateImageView). 125bf215546Sopenharmony_ci * 126bf215546Sopenharmony_ci * However, when we are loading/storing tiles from the ZSTENCIL tile 127bf215546Sopenharmony_ci * buffer, we need to use the underlying DS format. 128bf215546Sopenharmony_ci */ 129bf215546Sopenharmony_ci if (buffer == ZSTENCIL && 130bf215546Sopenharmony_ci iview->format->rt_type == V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI) { 131bf215546Sopenharmony_ci assert(image->format->rt_type == V3D_OUTPUT_IMAGE_FORMAT_D24S8); 132bf215546Sopenharmony_ci load.input_image_format = image->format->rt_type; 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_ci load.r_b_swap = iview->swap_rb; 136bf215546Sopenharmony_ci load.channel_reverse = iview->channel_reverse; 137bf215546Sopenharmony_ci load.memory_format = slice->tiling; 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci if (slice->tiling == V3D_TILING_UIF_NO_XOR || 140bf215546Sopenharmony_ci slice->tiling == V3D_TILING_UIF_XOR) { 141bf215546Sopenharmony_ci load.height_in_ub_or_stride = 142bf215546Sopenharmony_ci slice->padded_height_of_output_image_in_uif_blocks; 143bf215546Sopenharmony_ci } else if (slice->tiling == V3D_TILING_RASTER) { 144bf215546Sopenharmony_ci load.height_in_ub_or_stride = slice->stride; 145bf215546Sopenharmony_ci } 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) 148bf215546Sopenharmony_ci load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 149bf215546Sopenharmony_ci else 150bf215546Sopenharmony_ci load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci} 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_cistatic bool 155bf215546Sopenharmony_cicheck_needs_load(const struct v3dv_cmd_buffer_state *state, 156bf215546Sopenharmony_ci VkImageAspectFlags aspect, 157bf215546Sopenharmony_ci uint32_t first_subpass_idx, 158bf215546Sopenharmony_ci VkAttachmentLoadOp load_op) 159bf215546Sopenharmony_ci{ 160bf215546Sopenharmony_ci /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are 161bf215546Sopenharmony_ci * testing does not exist in the image. 162bf215546Sopenharmony_ci */ 163bf215546Sopenharmony_ci if (!aspect) 164bf215546Sopenharmony_ci return false; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci /* Attachment (or view) load operations apply on the first subpass that 167bf215546Sopenharmony_ci * uses the attachment (or view), otherwise we always need to load. 168bf215546Sopenharmony_ci */ 169bf215546Sopenharmony_ci if (state->job->first_subpass > first_subpass_idx) 170bf215546Sopenharmony_ci return true; 171bf215546Sopenharmony_ci 172bf215546Sopenharmony_ci /* If the job is continuing a subpass started in another job, we always 173bf215546Sopenharmony_ci * need to load. 174bf215546Sopenharmony_ci */ 175bf215546Sopenharmony_ci if (state->job->is_subpass_continue) 176bf215546Sopenharmony_ci return true; 177bf215546Sopenharmony_ci 178bf215546Sopenharmony_ci /* If the area is not aligned to tile boundaries, we always need to load */ 179bf215546Sopenharmony_ci if (!state->tile_aligned_render_area) 180bf215546Sopenharmony_ci return true; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci /* The attachment load operations must be LOAD */ 183bf215546Sopenharmony_ci return load_op == VK_ATTACHMENT_LOAD_OP_LOAD; 184bf215546Sopenharmony_ci} 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_cistatic inline uint32_t 187bf215546Sopenharmony_civ3dv_zs_buffer(bool depth, bool stencil) 188bf215546Sopenharmony_ci{ 189bf215546Sopenharmony_ci if (depth && stencil) 190bf215546Sopenharmony_ci return ZSTENCIL; 191bf215546Sopenharmony_ci else if (depth) 192bf215546Sopenharmony_ci return Z; 193bf215546Sopenharmony_ci else if (stencil) 194bf215546Sopenharmony_ci return STENCIL; 195bf215546Sopenharmony_ci return NONE; 196bf215546Sopenharmony_ci} 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_cistatic void 199bf215546Sopenharmony_cicmd_buffer_render_pass_emit_loads(struct v3dv_cmd_buffer *cmd_buffer, 200bf215546Sopenharmony_ci struct v3dv_cl *cl, 201bf215546Sopenharmony_ci uint32_t layer) 202bf215546Sopenharmony_ci{ 203bf215546Sopenharmony_ci const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 204bf215546Sopenharmony_ci const struct v3dv_render_pass *pass = state->pass; 205bf215546Sopenharmony_ci const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci assert(!pass->multiview_enabled || layer < MAX_MULTIVIEW_VIEW_COUNT); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; i++) { 210bf215546Sopenharmony_ci uint32_t attachment_idx = subpass->color_attachments[i].attachment; 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci if (attachment_idx == VK_ATTACHMENT_UNUSED) 213bf215546Sopenharmony_ci continue; 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci const struct v3dv_render_pass_attachment *attachment = 216bf215546Sopenharmony_ci &state->pass->attachments[attachment_idx]; 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci /* According to the Vulkan spec: 219bf215546Sopenharmony_ci * 220bf215546Sopenharmony_ci * "The load operation for each sample in an attachment happens before 221bf215546Sopenharmony_ci * any recorded command which accesses the sample in the first subpass 222bf215546Sopenharmony_ci * where the attachment is used." 223bf215546Sopenharmony_ci * 224bf215546Sopenharmony_ci * If the load operation is CLEAR, we must only clear once on the first 225bf215546Sopenharmony_ci * subpass that uses the attachment (and in that case we don't LOAD). 226bf215546Sopenharmony_ci * After that, we always want to load so we don't lose any rendering done 227bf215546Sopenharmony_ci * by a previous subpass to the same attachment. We also want to load 228bf215546Sopenharmony_ci * if the current job is continuing subpass work started by a previous 229bf215546Sopenharmony_ci * job, for the same reason. 230bf215546Sopenharmony_ci * 231bf215546Sopenharmony_ci * If the render area is not aligned to tile boundaries then we have 232bf215546Sopenharmony_ci * tiles which are partially covered by it. In this case, we need to 233bf215546Sopenharmony_ci * load the tiles so we can preserve the pixels that are outside the 234bf215546Sopenharmony_ci * render area for any such tiles. 235bf215546Sopenharmony_ci */ 236bf215546Sopenharmony_ci uint32_t first_subpass = !pass->multiview_enabled ? 237bf215546Sopenharmony_ci attachment->first_subpass : 238bf215546Sopenharmony_ci attachment->views[layer].first_subpass; 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci bool needs_load = check_needs_load(state, 241bf215546Sopenharmony_ci VK_IMAGE_ASPECT_COLOR_BIT, 242bf215546Sopenharmony_ci first_subpass, 243bf215546Sopenharmony_ci attachment->desc.loadOp); 244bf215546Sopenharmony_ci if (needs_load) { 245bf215546Sopenharmony_ci struct v3dv_image_view *iview = 246bf215546Sopenharmony_ci state->attachments[attachment_idx].image_view; 247bf215546Sopenharmony_ci cmd_buffer_render_pass_emit_load(cmd_buffer, cl, iview, 248bf215546Sopenharmony_ci layer, RENDER_TARGET_0 + i); 249bf215546Sopenharmony_ci } 250bf215546Sopenharmony_ci } 251bf215546Sopenharmony_ci 252bf215546Sopenharmony_ci uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; 253bf215546Sopenharmony_ci if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { 254bf215546Sopenharmony_ci const struct v3dv_render_pass_attachment *ds_attachment = 255bf215546Sopenharmony_ci &state->pass->attachments[ds_attachment_idx]; 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci const VkImageAspectFlags ds_aspects = 258bf215546Sopenharmony_ci vk_format_aspects(ds_attachment->desc.format); 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci uint32_t ds_first_subpass = !pass->multiview_enabled ? 261bf215546Sopenharmony_ci ds_attachment->first_subpass : 262bf215546Sopenharmony_ci ds_attachment->views[layer].first_subpass; 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci const bool needs_depth_load = 265bf215546Sopenharmony_ci check_needs_load(state, 266bf215546Sopenharmony_ci ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 267bf215546Sopenharmony_ci ds_first_subpass, 268bf215546Sopenharmony_ci ds_attachment->desc.loadOp); 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci const bool needs_stencil_load = 271bf215546Sopenharmony_ci check_needs_load(state, 272bf215546Sopenharmony_ci ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 273bf215546Sopenharmony_ci ds_first_subpass, 274bf215546Sopenharmony_ci ds_attachment->desc.stencilLoadOp); 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci if (needs_depth_load || needs_stencil_load) { 277bf215546Sopenharmony_ci struct v3dv_image_view *iview = 278bf215546Sopenharmony_ci state->attachments[ds_attachment_idx].image_view; 279bf215546Sopenharmony_ci /* From the Vulkan spec: 280bf215546Sopenharmony_ci * 281bf215546Sopenharmony_ci * "When an image view of a depth/stencil image is used as a 282bf215546Sopenharmony_ci * depth/stencil framebuffer attachment, the aspectMask is ignored 283bf215546Sopenharmony_ci * and both depth and stencil image subresources are used." 284bf215546Sopenharmony_ci * 285bf215546Sopenharmony_ci * So we ignore the aspects from the subresource range of the image 286bf215546Sopenharmony_ci * view for the depth/stencil attachment, but we still need to restrict 287bf215546Sopenharmony_ci * the to aspects compatible with the render pass and the image. 288bf215546Sopenharmony_ci */ 289bf215546Sopenharmony_ci const uint32_t zs_buffer = 290bf215546Sopenharmony_ci v3dv_zs_buffer(needs_depth_load, needs_stencil_load); 291bf215546Sopenharmony_ci cmd_buffer_render_pass_emit_load(cmd_buffer, cl, 292bf215546Sopenharmony_ci iview, layer, zs_buffer); 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci } 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci cl_emit(cl, END_OF_LOADS, end); 297bf215546Sopenharmony_ci} 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_cistatic void 300bf215546Sopenharmony_cicmd_buffer_render_pass_emit_store(struct v3dv_cmd_buffer *cmd_buffer, 301bf215546Sopenharmony_ci struct v3dv_cl *cl, 302bf215546Sopenharmony_ci uint32_t attachment_idx, 303bf215546Sopenharmony_ci uint32_t layer, 304bf215546Sopenharmony_ci uint32_t buffer, 305bf215546Sopenharmony_ci bool clear, 306bf215546Sopenharmony_ci bool is_multisample_resolve) 307bf215546Sopenharmony_ci{ 308bf215546Sopenharmony_ci const struct v3dv_image_view *iview = 309bf215546Sopenharmony_ci cmd_buffer->state.attachments[attachment_idx].image_view; 310bf215546Sopenharmony_ci const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; 311bf215546Sopenharmony_ci const struct v3d_resource_slice *slice = 312bf215546Sopenharmony_ci &image->slices[iview->vk.base_mip_level]; 313bf215546Sopenharmony_ci uint32_t layer_offset = v3dv_layer_offset(image, 314bf215546Sopenharmony_ci iview->vk.base_mip_level, 315bf215546Sopenharmony_ci iview->vk.base_array_layer + layer); 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 318bf215546Sopenharmony_ci store.buffer_to_store = buffer; 319bf215546Sopenharmony_ci store.address = v3dv_cl_address(image->mem->bo, layer_offset); 320bf215546Sopenharmony_ci store.clear_buffer_being_stored = clear; 321bf215546Sopenharmony_ci 322bf215546Sopenharmony_ci store.output_image_format = iview->format->rt_type; 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_ci /* If we create an image view with only the stencil format, we 325bf215546Sopenharmony_ci * re-interpret the format as RGBA8_UINT, as it is want we want in 326bf215546Sopenharmony_ci * general (see CreateImageView). 327bf215546Sopenharmony_ci * 328bf215546Sopenharmony_ci * However, when we are loading/storing tiles from the ZSTENCIL tile 329bf215546Sopenharmony_ci * buffer, we need to use the underlying DS format. 330bf215546Sopenharmony_ci */ 331bf215546Sopenharmony_ci if (buffer == ZSTENCIL && 332bf215546Sopenharmony_ci iview->format->rt_type == V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI) { 333bf215546Sopenharmony_ci assert(image->format->rt_type == V3D_OUTPUT_IMAGE_FORMAT_D24S8); 334bf215546Sopenharmony_ci store.output_image_format = image->format->rt_type; 335bf215546Sopenharmony_ci } 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci store.r_b_swap = iview->swap_rb; 338bf215546Sopenharmony_ci store.channel_reverse = iview->channel_reverse; 339bf215546Sopenharmony_ci store.memory_format = slice->tiling; 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci if (slice->tiling == V3D_TILING_UIF_NO_XOR || 342bf215546Sopenharmony_ci slice->tiling == V3D_TILING_UIF_XOR) { 343bf215546Sopenharmony_ci store.height_in_ub_or_stride = 344bf215546Sopenharmony_ci slice->padded_height_of_output_image_in_uif_blocks; 345bf215546Sopenharmony_ci } else if (slice->tiling == V3D_TILING_RASTER) { 346bf215546Sopenharmony_ci store.height_in_ub_or_stride = slice->stride; 347bf215546Sopenharmony_ci } 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT) 350bf215546Sopenharmony_ci store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES; 351bf215546Sopenharmony_ci else if (is_multisample_resolve) 352bf215546Sopenharmony_ci store.decimate_mode = V3D_DECIMATE_MODE_4X; 353bf215546Sopenharmony_ci else 354bf215546Sopenharmony_ci store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0; 355bf215546Sopenharmony_ci } 356bf215546Sopenharmony_ci} 357bf215546Sopenharmony_ci 358bf215546Sopenharmony_cistatic bool 359bf215546Sopenharmony_cicheck_needs_clear(const struct v3dv_cmd_buffer_state *state, 360bf215546Sopenharmony_ci VkImageAspectFlags aspect, 361bf215546Sopenharmony_ci uint32_t first_subpass_idx, 362bf215546Sopenharmony_ci VkAttachmentLoadOp load_op, 363bf215546Sopenharmony_ci bool do_clear_with_draw) 364bf215546Sopenharmony_ci{ 365bf215546Sopenharmony_ci /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are 366bf215546Sopenharmony_ci * testing does not exist in the image. 367bf215546Sopenharmony_ci */ 368bf215546Sopenharmony_ci if (!aspect) 369bf215546Sopenharmony_ci return false; 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci /* If the aspect needs to be cleared with a draw call then we won't emit 372bf215546Sopenharmony_ci * the clear here. 373bf215546Sopenharmony_ci */ 374bf215546Sopenharmony_ci if (do_clear_with_draw) 375bf215546Sopenharmony_ci return false; 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci /* If this is resuming a subpass started with another job, then attachment 378bf215546Sopenharmony_ci * load operations don't apply. 379bf215546Sopenharmony_ci */ 380bf215546Sopenharmony_ci if (state->job->is_subpass_continue) 381bf215546Sopenharmony_ci return false; 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci /* If the render area is not aligned to tile boudaries we can't use the 384bf215546Sopenharmony_ci * TLB for a clear. 385bf215546Sopenharmony_ci */ 386bf215546Sopenharmony_ci if (!state->tile_aligned_render_area) 387bf215546Sopenharmony_ci return false; 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci /* If this job is running in a subpass other than the first subpass in 390bf215546Sopenharmony_ci * which this attachment (or view) is used then attachment load operations 391bf215546Sopenharmony_ci * don't apply. 392bf215546Sopenharmony_ci */ 393bf215546Sopenharmony_ci if (state->job->first_subpass != first_subpass_idx) 394bf215546Sopenharmony_ci return false; 395bf215546Sopenharmony_ci 396bf215546Sopenharmony_ci /* The attachment load operation must be CLEAR */ 397bf215546Sopenharmony_ci return load_op == VK_ATTACHMENT_LOAD_OP_CLEAR; 398bf215546Sopenharmony_ci} 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_cistatic bool 401bf215546Sopenharmony_cicheck_needs_store(const struct v3dv_cmd_buffer_state *state, 402bf215546Sopenharmony_ci VkImageAspectFlags aspect, 403bf215546Sopenharmony_ci uint32_t last_subpass_idx, 404bf215546Sopenharmony_ci VkAttachmentStoreOp store_op) 405bf215546Sopenharmony_ci{ 406bf215546Sopenharmony_ci /* We call this with image->vk.aspects & aspect, so 0 means the aspect we are 407bf215546Sopenharmony_ci * testing does not exist in the image. 408bf215546Sopenharmony_ci */ 409bf215546Sopenharmony_ci if (!aspect) 410bf215546Sopenharmony_ci return false; 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci /* Attachment (or view) store operations only apply on the last subpass 413bf215546Sopenharmony_ci * where the attachment (or view) is used, in other subpasses we always 414bf215546Sopenharmony_ci * need to store. 415bf215546Sopenharmony_ci */ 416bf215546Sopenharmony_ci if (state->subpass_idx < last_subpass_idx) 417bf215546Sopenharmony_ci return true; 418bf215546Sopenharmony_ci 419bf215546Sopenharmony_ci /* Attachment store operations only apply on the last job we emit on the the 420bf215546Sopenharmony_ci * last subpass where the attachment is used, otherwise we always need to 421bf215546Sopenharmony_ci * store. 422bf215546Sopenharmony_ci */ 423bf215546Sopenharmony_ci if (!state->job->is_subpass_finish) 424bf215546Sopenharmony_ci return true; 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_ci /* The attachment store operation must be STORE */ 427bf215546Sopenharmony_ci return store_op == VK_ATTACHMENT_STORE_OP_STORE; 428bf215546Sopenharmony_ci} 429bf215546Sopenharmony_ci 430bf215546Sopenharmony_cistatic void 431bf215546Sopenharmony_cicmd_buffer_render_pass_emit_stores(struct v3dv_cmd_buffer *cmd_buffer, 432bf215546Sopenharmony_ci struct v3dv_cl *cl, 433bf215546Sopenharmony_ci uint32_t layer) 434bf215546Sopenharmony_ci{ 435bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 436bf215546Sopenharmony_ci struct v3dv_render_pass *pass = state->pass; 437bf215546Sopenharmony_ci const struct v3dv_subpass *subpass = 438bf215546Sopenharmony_ci &pass->subpasses[state->subpass_idx]; 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_ci bool has_stores = false; 441bf215546Sopenharmony_ci bool use_global_zs_clear = false; 442bf215546Sopenharmony_ci bool use_global_rt_clear = false; 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci assert(!pass->multiview_enabled || layer < MAX_MULTIVIEW_VIEW_COUNT); 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci /* FIXME: separate stencil */ 447bf215546Sopenharmony_ci uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; 448bf215546Sopenharmony_ci if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { 449bf215546Sopenharmony_ci const struct v3dv_render_pass_attachment *ds_attachment = 450bf215546Sopenharmony_ci &state->pass->attachments[ds_attachment_idx]; 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci assert(state->job->first_subpass >= ds_attachment->first_subpass); 453bf215546Sopenharmony_ci assert(state->subpass_idx >= ds_attachment->first_subpass); 454bf215546Sopenharmony_ci assert(state->subpass_idx <= ds_attachment->last_subpass); 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci /* From the Vulkan spec, VkImageSubresourceRange: 457bf215546Sopenharmony_ci * 458bf215546Sopenharmony_ci * "When an image view of a depth/stencil image is used as a 459bf215546Sopenharmony_ci * depth/stencil framebuffer attachment, the aspectMask is ignored 460bf215546Sopenharmony_ci * and both depth and stencil image subresources are used." 461bf215546Sopenharmony_ci * 462bf215546Sopenharmony_ci * So we ignore the aspects from the subresource range of the image 463bf215546Sopenharmony_ci * view for the depth/stencil attachment, but we still need to restrict 464bf215546Sopenharmony_ci * the to aspects compatible with the render pass and the image. 465bf215546Sopenharmony_ci */ 466bf215546Sopenharmony_ci const VkImageAspectFlags aspects = 467bf215546Sopenharmony_ci vk_format_aspects(ds_attachment->desc.format); 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci /* Only clear once on the first subpass that uses the attachment */ 470bf215546Sopenharmony_ci uint32_t ds_first_subpass = !state->pass->multiview_enabled ? 471bf215546Sopenharmony_ci ds_attachment->first_subpass : 472bf215546Sopenharmony_ci ds_attachment->views[layer].first_subpass; 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci bool needs_depth_clear = 475bf215546Sopenharmony_ci check_needs_clear(state, 476bf215546Sopenharmony_ci aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 477bf215546Sopenharmony_ci ds_first_subpass, 478bf215546Sopenharmony_ci ds_attachment->desc.loadOp, 479bf215546Sopenharmony_ci subpass->do_depth_clear_with_draw); 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci bool needs_stencil_clear = 482bf215546Sopenharmony_ci check_needs_clear(state, 483bf215546Sopenharmony_ci aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 484bf215546Sopenharmony_ci ds_first_subpass, 485bf215546Sopenharmony_ci ds_attachment->desc.stencilLoadOp, 486bf215546Sopenharmony_ci subpass->do_stencil_clear_with_draw); 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci /* Skip the last store if it is not required */ 489bf215546Sopenharmony_ci uint32_t ds_last_subpass = !pass->multiview_enabled ? 490bf215546Sopenharmony_ci ds_attachment->last_subpass : 491bf215546Sopenharmony_ci ds_attachment->views[layer].last_subpass; 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci bool needs_depth_store = 494bf215546Sopenharmony_ci check_needs_store(state, 495bf215546Sopenharmony_ci aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 496bf215546Sopenharmony_ci ds_last_subpass, 497bf215546Sopenharmony_ci ds_attachment->desc.storeOp); 498bf215546Sopenharmony_ci 499bf215546Sopenharmony_ci bool needs_stencil_store = 500bf215546Sopenharmony_ci check_needs_store(state, 501bf215546Sopenharmony_ci aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 502bf215546Sopenharmony_ci ds_last_subpass, 503bf215546Sopenharmony_ci ds_attachment->desc.stencilStoreOp); 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci /* If we have a resolve, handle it before storing the tile */ 506bf215546Sopenharmony_ci const struct v3dv_cmd_buffer_attachment_state *ds_att_state = 507bf215546Sopenharmony_ci &state->attachments[ds_attachment_idx]; 508bf215546Sopenharmony_ci if (ds_att_state->use_tlb_resolve) { 509bf215546Sopenharmony_ci assert(ds_att_state->has_resolve); 510bf215546Sopenharmony_ci assert(subpass->resolve_depth || subpass->resolve_stencil); 511bf215546Sopenharmony_ci const uint32_t resolve_attachment_idx = 512bf215546Sopenharmony_ci subpass->ds_resolve_attachment.attachment; 513bf215546Sopenharmony_ci assert(resolve_attachment_idx != VK_ATTACHMENT_UNUSED); 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_ci const uint32_t zs_buffer = 516bf215546Sopenharmony_ci v3dv_zs_buffer(subpass->resolve_depth, subpass->resolve_stencil); 517bf215546Sopenharmony_ci cmd_buffer_render_pass_emit_store(cmd_buffer, cl, 518bf215546Sopenharmony_ci resolve_attachment_idx, layer, 519bf215546Sopenharmony_ci zs_buffer, 520bf215546Sopenharmony_ci false, false); 521bf215546Sopenharmony_ci has_stores = true; 522bf215546Sopenharmony_ci } else if (ds_att_state->has_resolve) { 523bf215546Sopenharmony_ci /* If we can't use the TLB to implement the resolve we will need to 524bf215546Sopenharmony_ci * store the attachment so we can implement it later using a blit. 525bf215546Sopenharmony_ci */ 526bf215546Sopenharmony_ci needs_depth_store = subpass->resolve_depth; 527bf215546Sopenharmony_ci needs_stencil_store = subpass->resolve_stencil; 528bf215546Sopenharmony_ci } 529bf215546Sopenharmony_ci 530bf215546Sopenharmony_ci /* GFXH-1689: The per-buffer store command's clear buffer bit is broken 531bf215546Sopenharmony_ci * for depth/stencil. 532bf215546Sopenharmony_ci * 533bf215546Sopenharmony_ci * There used to be some confusion regarding the Clear Tile Buffers 534bf215546Sopenharmony_ci * Z/S bit also being broken, but we confirmed with Broadcom that this 535bf215546Sopenharmony_ci * is not the case, it was just that some other hardware bugs (that we 536bf215546Sopenharmony_ci * need to work around, such as GFXH-1461) could cause this bit to behave 537bf215546Sopenharmony_ci * incorrectly. 538bf215546Sopenharmony_ci * 539bf215546Sopenharmony_ci * There used to be another issue where the RTs bit in the Clear Tile 540bf215546Sopenharmony_ci * Buffers packet also cleared Z/S, but Broadcom confirmed this is 541bf215546Sopenharmony_ci * fixed since V3D 4.1. 542bf215546Sopenharmony_ci * 543bf215546Sopenharmony_ci * So if we have to emit a clear of depth or stencil we don't use 544bf215546Sopenharmony_ci * the per-buffer store clear bit, even if we need to store the buffers, 545bf215546Sopenharmony_ci * instead we always have to use the Clear Tile Buffers Z/S bit. 546bf215546Sopenharmony_ci * If we have configured the job to do early Z/S clearing, then we 547bf215546Sopenharmony_ci * don't want to emit any Clear Tile Buffers command at all here. 548bf215546Sopenharmony_ci * 549bf215546Sopenharmony_ci * Note that GFXH-1689 is not reproduced in the simulator, where 550bf215546Sopenharmony_ci * using the clear buffer bit in depth/stencil stores works fine. 551bf215546Sopenharmony_ci */ 552bf215546Sopenharmony_ci use_global_zs_clear = !state->job->early_zs_clear && 553bf215546Sopenharmony_ci (needs_depth_clear || needs_stencil_clear); 554bf215546Sopenharmony_ci if (needs_depth_store || needs_stencil_store) { 555bf215546Sopenharmony_ci const uint32_t zs_buffer = 556bf215546Sopenharmony_ci v3dv_zs_buffer(needs_depth_store, needs_stencil_store); 557bf215546Sopenharmony_ci cmd_buffer_render_pass_emit_store(cmd_buffer, cl, 558bf215546Sopenharmony_ci ds_attachment_idx, layer, 559bf215546Sopenharmony_ci zs_buffer, false, false); 560bf215546Sopenharmony_ci has_stores = true; 561bf215546Sopenharmony_ci } 562bf215546Sopenharmony_ci } 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; i++) { 565bf215546Sopenharmony_ci uint32_t attachment_idx = subpass->color_attachments[i].attachment; 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci if (attachment_idx == VK_ATTACHMENT_UNUSED) 568bf215546Sopenharmony_ci continue; 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci const struct v3dv_render_pass_attachment *attachment = 571bf215546Sopenharmony_ci &state->pass->attachments[attachment_idx]; 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci assert(state->job->first_subpass >= attachment->first_subpass); 574bf215546Sopenharmony_ci assert(state->subpass_idx >= attachment->first_subpass); 575bf215546Sopenharmony_ci assert(state->subpass_idx <= attachment->last_subpass); 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci /* Only clear once on the first subpass that uses the attachment */ 578bf215546Sopenharmony_ci uint32_t first_subpass = !pass->multiview_enabled ? 579bf215546Sopenharmony_ci attachment->first_subpass : 580bf215546Sopenharmony_ci attachment->views[layer].first_subpass; 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci bool needs_clear = 583bf215546Sopenharmony_ci check_needs_clear(state, 584bf215546Sopenharmony_ci VK_IMAGE_ASPECT_COLOR_BIT, 585bf215546Sopenharmony_ci first_subpass, 586bf215546Sopenharmony_ci attachment->desc.loadOp, 587bf215546Sopenharmony_ci false); 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci /* Skip the last store if it is not required */ 590bf215546Sopenharmony_ci uint32_t last_subpass = !pass->multiview_enabled ? 591bf215546Sopenharmony_ci attachment->last_subpass : 592bf215546Sopenharmony_ci attachment->views[layer].last_subpass; 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci bool needs_store = 595bf215546Sopenharmony_ci check_needs_store(state, 596bf215546Sopenharmony_ci VK_IMAGE_ASPECT_COLOR_BIT, 597bf215546Sopenharmony_ci last_subpass, 598bf215546Sopenharmony_ci attachment->desc.storeOp); 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci /* If we need to resolve this attachment emit that store first. Notice 601bf215546Sopenharmony_ci * that we must not request a tile buffer clear here in that case, since 602bf215546Sopenharmony_ci * that would clear the tile buffer before we get to emit the actual 603bf215546Sopenharmony_ci * color attachment store below, since the clear happens after the 604bf215546Sopenharmony_ci * store is completed. 605bf215546Sopenharmony_ci * 606bf215546Sopenharmony_ci * If the attachment doesn't support TLB resolves (or the render area 607bf215546Sopenharmony_ci * is not aligned to tile boundaries) then we will have to fallback to 608bf215546Sopenharmony_ci * doing the resolve in a shader separately after this job, so we will 609bf215546Sopenharmony_ci * need to store the multisampled attachment even if that wasn't 610bf215546Sopenharmony_ci * requested by the client. 611bf215546Sopenharmony_ci */ 612bf215546Sopenharmony_ci const struct v3dv_cmd_buffer_attachment_state *att_state = 613bf215546Sopenharmony_ci &state->attachments[attachment_idx]; 614bf215546Sopenharmony_ci if (att_state->use_tlb_resolve) { 615bf215546Sopenharmony_ci assert(att_state->has_resolve); 616bf215546Sopenharmony_ci const uint32_t resolve_attachment_idx = 617bf215546Sopenharmony_ci subpass->resolve_attachments[i].attachment; 618bf215546Sopenharmony_ci cmd_buffer_render_pass_emit_store(cmd_buffer, cl, 619bf215546Sopenharmony_ci resolve_attachment_idx, layer, 620bf215546Sopenharmony_ci RENDER_TARGET_0 + i, 621bf215546Sopenharmony_ci false, true); 622bf215546Sopenharmony_ci has_stores = true; 623bf215546Sopenharmony_ci } else if (att_state->has_resolve) { 624bf215546Sopenharmony_ci needs_store = true; 625bf215546Sopenharmony_ci } 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci /* Emit the color attachment store if needed */ 628bf215546Sopenharmony_ci if (needs_store) { 629bf215546Sopenharmony_ci cmd_buffer_render_pass_emit_store(cmd_buffer, cl, 630bf215546Sopenharmony_ci attachment_idx, layer, 631bf215546Sopenharmony_ci RENDER_TARGET_0 + i, 632bf215546Sopenharmony_ci needs_clear && !use_global_rt_clear, 633bf215546Sopenharmony_ci false); 634bf215546Sopenharmony_ci has_stores = true; 635bf215546Sopenharmony_ci } else if (needs_clear) { 636bf215546Sopenharmony_ci use_global_rt_clear = true; 637bf215546Sopenharmony_ci } 638bf215546Sopenharmony_ci } 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci /* We always need to emit at least one dummy store */ 641bf215546Sopenharmony_ci if (!has_stores) { 642bf215546Sopenharmony_ci cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) { 643bf215546Sopenharmony_ci store.buffer_to_store = NONE; 644bf215546Sopenharmony_ci } 645bf215546Sopenharmony_ci } 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci /* If we have any depth/stencil clears we can't use the per-buffer clear 648bf215546Sopenharmony_ci * bit and instead we have to emit a single clear of all tile buffers. 649bf215546Sopenharmony_ci */ 650bf215546Sopenharmony_ci if (use_global_zs_clear || use_global_rt_clear) { 651bf215546Sopenharmony_ci cl_emit(cl, CLEAR_TILE_BUFFERS, clear) { 652bf215546Sopenharmony_ci clear.clear_z_stencil_buffer = use_global_zs_clear; 653bf215546Sopenharmony_ci clear.clear_all_render_targets = use_global_rt_clear; 654bf215546Sopenharmony_ci } 655bf215546Sopenharmony_ci } 656bf215546Sopenharmony_ci} 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_cistatic void 659bf215546Sopenharmony_cicmd_buffer_render_pass_emit_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer, 660bf215546Sopenharmony_ci uint32_t layer) 661bf215546Sopenharmony_ci{ 662bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 663bf215546Sopenharmony_ci assert(job); 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci /* Emit the generic list in our indirect state -- the rcl will just 666bf215546Sopenharmony_ci * have pointers into it. 667bf215546Sopenharmony_ci */ 668bf215546Sopenharmony_ci struct v3dv_cl *cl = &job->indirect; 669bf215546Sopenharmony_ci v3dv_cl_ensure_space(cl, 200, 1); 670bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl); 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords); 675bf215546Sopenharmony_ci 676bf215546Sopenharmony_ci cmd_buffer_render_pass_emit_loads(cmd_buffer, cl, layer); 677bf215546Sopenharmony_ci 678bf215546Sopenharmony_ci /* The binner starts out writing tiles assuming that the initial mode 679bf215546Sopenharmony_ci * is triangles, so make sure that's the case. 680bf215546Sopenharmony_ci */ 681bf215546Sopenharmony_ci cl_emit(cl, PRIM_LIST_FORMAT, fmt) { 682bf215546Sopenharmony_ci fmt.primitive_type = LIST_TRIANGLES; 683bf215546Sopenharmony_ci } 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci /* PTB assumes that value to be 0, but hw will not set it. */ 686bf215546Sopenharmony_ci cl_emit(cl, SET_INSTANCEID, set) { 687bf215546Sopenharmony_ci set.instance_id = 0; 688bf215546Sopenharmony_ci } 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch); 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci cmd_buffer_render_pass_emit_stores(cmd_buffer, cl, layer); 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_ci cl_emit(cl, END_OF_TILE_MARKER, end); 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci cl_emit(cl, RETURN_FROM_SUB_LIST, ret); 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) { 699bf215546Sopenharmony_ci branch.start = tile_list_start; 700bf215546Sopenharmony_ci branch.end = v3dv_cl_get_address(cl); 701bf215546Sopenharmony_ci } 702bf215546Sopenharmony_ci} 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_cistatic void 705bf215546Sopenharmony_cicmd_buffer_emit_render_pass_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer, 706bf215546Sopenharmony_ci uint32_t layer) 707bf215546Sopenharmony_ci{ 708bf215546Sopenharmony_ci const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 711bf215546Sopenharmony_ci struct v3dv_cl *rcl = &job->rcl; 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci /* If doing multicore binning, we would need to initialize each 714bf215546Sopenharmony_ci * core's tile list here. 715bf215546Sopenharmony_ci */ 716bf215546Sopenharmony_ci const struct v3dv_frame_tiling *tiling = &job->frame_tiling; 717bf215546Sopenharmony_ci const uint32_t tile_alloc_offset = 718bf215546Sopenharmony_ci 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y; 719bf215546Sopenharmony_ci cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) { 720bf215546Sopenharmony_ci list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset); 721bf215546Sopenharmony_ci } 722bf215546Sopenharmony_ci 723bf215546Sopenharmony_ci cmd_buffer_render_pass_emit_per_tile_rcl(cmd_buffer, layer); 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci uint32_t supertile_w_in_pixels = 726bf215546Sopenharmony_ci tiling->tile_width * tiling->supertile_width; 727bf215546Sopenharmony_ci uint32_t supertile_h_in_pixels = 728bf215546Sopenharmony_ci tiling->tile_height * tiling->supertile_height; 729bf215546Sopenharmony_ci const uint32_t min_x_supertile = 730bf215546Sopenharmony_ci state->render_area.offset.x / supertile_w_in_pixels; 731bf215546Sopenharmony_ci const uint32_t min_y_supertile = 732bf215546Sopenharmony_ci state->render_area.offset.y / supertile_h_in_pixels; 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ci uint32_t max_render_x = state->render_area.offset.x; 735bf215546Sopenharmony_ci if (state->render_area.extent.width > 0) 736bf215546Sopenharmony_ci max_render_x += state->render_area.extent.width - 1; 737bf215546Sopenharmony_ci uint32_t max_render_y = state->render_area.offset.y; 738bf215546Sopenharmony_ci if (state->render_area.extent.height > 0) 739bf215546Sopenharmony_ci max_render_y += state->render_area.extent.height - 1; 740bf215546Sopenharmony_ci const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels; 741bf215546Sopenharmony_ci const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels; 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci for (int y = min_y_supertile; y <= max_y_supertile; y++) { 744bf215546Sopenharmony_ci for (int x = min_x_supertile; x <= max_x_supertile; x++) { 745bf215546Sopenharmony_ci cl_emit(rcl, SUPERTILE_COORDINATES, coords) { 746bf215546Sopenharmony_ci coords.column_number_in_supertiles = x; 747bf215546Sopenharmony_ci coords.row_number_in_supertiles = y; 748bf215546Sopenharmony_ci } 749bf215546Sopenharmony_ci } 750bf215546Sopenharmony_ci } 751bf215546Sopenharmony_ci} 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_cistatic void 754bf215546Sopenharmony_ciset_rcl_early_z_config(struct v3dv_job *job, 755bf215546Sopenharmony_ci bool *early_z_disable, 756bf215546Sopenharmony_ci uint32_t *early_z_test_and_update_direction) 757bf215546Sopenharmony_ci{ 758bf215546Sopenharmony_ci /* Disable if none of the draw calls in this job enabled EZ */ 759bf215546Sopenharmony_ci if (!job->has_ez_draws) { 760bf215546Sopenharmony_ci *early_z_disable = true; 761bf215546Sopenharmony_ci return; 762bf215546Sopenharmony_ci } 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci switch (job->first_ez_state) { 765bf215546Sopenharmony_ci case V3D_EZ_UNDECIDED: 766bf215546Sopenharmony_ci case V3D_EZ_LT_LE: 767bf215546Sopenharmony_ci *early_z_disable = false; 768bf215546Sopenharmony_ci *early_z_test_and_update_direction = EARLY_Z_DIRECTION_LT_LE; 769bf215546Sopenharmony_ci break; 770bf215546Sopenharmony_ci case V3D_EZ_GT_GE: 771bf215546Sopenharmony_ci *early_z_disable = false; 772bf215546Sopenharmony_ci *early_z_test_and_update_direction = EARLY_Z_DIRECTION_GT_GE; 773bf215546Sopenharmony_ci break; 774bf215546Sopenharmony_ci case V3D_EZ_DISABLED: 775bf215546Sopenharmony_ci *early_z_disable = true; 776bf215546Sopenharmony_ci break; 777bf215546Sopenharmony_ci } 778bf215546Sopenharmony_ci} 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_civoid 781bf215546Sopenharmony_civ3dX(cmd_buffer_emit_render_pass_rcl)(struct v3dv_cmd_buffer *cmd_buffer) 782bf215546Sopenharmony_ci{ 783bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 784bf215546Sopenharmony_ci assert(job); 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 787bf215546Sopenharmony_ci const struct v3dv_framebuffer *framebuffer = state->framebuffer; 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci /* We can't emit the RCL until we have a framebuffer, which we may not have 790bf215546Sopenharmony_ci * if we are recording a secondary command buffer. In that case, we will 791bf215546Sopenharmony_ci * have to wait until vkCmdExecuteCommands is called from a primary command 792bf215546Sopenharmony_ci * buffer. 793bf215546Sopenharmony_ci */ 794bf215546Sopenharmony_ci if (!framebuffer) { 795bf215546Sopenharmony_ci assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 796bf215546Sopenharmony_ci return; 797bf215546Sopenharmony_ci } 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_ci const struct v3dv_frame_tiling *tiling = &job->frame_tiling; 800bf215546Sopenharmony_ci 801bf215546Sopenharmony_ci const uint32_t fb_layers = job->frame_tiling.layers; 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->rcl, 200 + 804bf215546Sopenharmony_ci MAX2(fb_layers, 1) * 256 * 805bf215546Sopenharmony_ci cl_packet_length(SUPERTILE_COORDINATES)); 806bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci assert(state->subpass_idx < state->pass->subpass_count); 809bf215546Sopenharmony_ci const struct v3dv_render_pass *pass = state->pass; 810bf215546Sopenharmony_ci const struct v3dv_subpass *subpass = &pass->subpasses[state->subpass_idx]; 811bf215546Sopenharmony_ci struct v3dv_cl *rcl = &job->rcl; 812bf215546Sopenharmony_ci 813bf215546Sopenharmony_ci /* Comon config must be the first TILE_RENDERING_MODE_CFG and 814bf215546Sopenharmony_ci * Z_STENCIL_CLEAR_VALUES must be last. The ones in between are optional 815bf215546Sopenharmony_ci * updates to the previous HW state. 816bf215546Sopenharmony_ci */ 817bf215546Sopenharmony_ci bool do_early_zs_clear = false; 818bf215546Sopenharmony_ci const uint32_t ds_attachment_idx = subpass->ds_attachment.attachment; 819bf215546Sopenharmony_ci assert(!tiling->msaa || !tiling->double_buffer); 820bf215546Sopenharmony_ci cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) { 821bf215546Sopenharmony_ci config.image_width_pixels = framebuffer->width; 822bf215546Sopenharmony_ci config.image_height_pixels = framebuffer->height; 823bf215546Sopenharmony_ci config.number_of_render_targets = MAX2(subpass->color_count, 1); 824bf215546Sopenharmony_ci config.multisample_mode_4x = tiling->msaa; 825bf215546Sopenharmony_ci config.double_buffer_in_non_ms_mode = tiling->double_buffer; 826bf215546Sopenharmony_ci config.maximum_bpp_of_all_render_targets = tiling->internal_bpp; 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { 829bf215546Sopenharmony_ci const struct v3dv_image_view *iview = 830bf215546Sopenharmony_ci state->attachments[ds_attachment_idx].image_view; 831bf215546Sopenharmony_ci config.internal_depth_type = iview->internal_type; 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci set_rcl_early_z_config(job, 834bf215546Sopenharmony_ci &config.early_z_disable, 835bf215546Sopenharmony_ci &config.early_z_test_and_update_direction); 836bf215546Sopenharmony_ci 837bf215546Sopenharmony_ci /* Early-Z/S clear can be enabled if the job is clearing and not 838bf215546Sopenharmony_ci * storing (or loading) depth. If a stencil aspect is also present 839bf215546Sopenharmony_ci * we have the same requirements for it, however, in this case we 840bf215546Sopenharmony_ci * can accept stencil loadOp DONT_CARE as well, so instead of 841bf215546Sopenharmony_ci * checking that stencil is cleared we check that is not loaded. 842bf215546Sopenharmony_ci * 843bf215546Sopenharmony_ci * Early-Z/S clearing is independent of Early Z/S testing, so it is 844bf215546Sopenharmony_ci * possible to enable one but not the other so long as their 845bf215546Sopenharmony_ci * respective requirements are met. 846bf215546Sopenharmony_ci */ 847bf215546Sopenharmony_ci struct v3dv_render_pass_attachment *ds_attachment = 848bf215546Sopenharmony_ci &pass->attachments[ds_attachment_idx]; 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_ci const VkImageAspectFlags ds_aspects = 851bf215546Sopenharmony_ci vk_format_aspects(ds_attachment->desc.format); 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci bool needs_depth_clear = 854bf215546Sopenharmony_ci check_needs_clear(state, 855bf215546Sopenharmony_ci ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 856bf215546Sopenharmony_ci ds_attachment->first_subpass, 857bf215546Sopenharmony_ci ds_attachment->desc.loadOp, 858bf215546Sopenharmony_ci subpass->do_depth_clear_with_draw); 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci bool needs_depth_store = 861bf215546Sopenharmony_ci check_needs_store(state, 862bf215546Sopenharmony_ci ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 863bf215546Sopenharmony_ci ds_attachment->last_subpass, 864bf215546Sopenharmony_ci ds_attachment->desc.storeOp) || 865bf215546Sopenharmony_ci subpass->resolve_depth; 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci do_early_zs_clear = needs_depth_clear && !needs_depth_store; 868bf215546Sopenharmony_ci if (do_early_zs_clear && 869bf215546Sopenharmony_ci vk_format_has_stencil(ds_attachment->desc.format)) { 870bf215546Sopenharmony_ci bool needs_stencil_load = 871bf215546Sopenharmony_ci check_needs_load(state, 872bf215546Sopenharmony_ci ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 873bf215546Sopenharmony_ci ds_attachment->first_subpass, 874bf215546Sopenharmony_ci ds_attachment->desc.stencilLoadOp); 875bf215546Sopenharmony_ci 876bf215546Sopenharmony_ci bool needs_stencil_store = 877bf215546Sopenharmony_ci check_needs_store(state, 878bf215546Sopenharmony_ci ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT, 879bf215546Sopenharmony_ci ds_attachment->last_subpass, 880bf215546Sopenharmony_ci ds_attachment->desc.stencilStoreOp) || 881bf215546Sopenharmony_ci subpass->resolve_stencil; 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci do_early_zs_clear = !needs_stencil_load && !needs_stencil_store; 884bf215546Sopenharmony_ci } 885bf215546Sopenharmony_ci 886bf215546Sopenharmony_ci config.early_depth_stencil_clear = do_early_zs_clear; 887bf215546Sopenharmony_ci } else { 888bf215546Sopenharmony_ci config.early_z_disable = true; 889bf215546Sopenharmony_ci } 890bf215546Sopenharmony_ci } 891bf215546Sopenharmony_ci 892bf215546Sopenharmony_ci /* If we enabled early Z/S clear, then we can't emit any "Clear Tile Buffers" 893bf215546Sopenharmony_ci * commands with the Z/S bit set, so keep track of whether we enabled this 894bf215546Sopenharmony_ci * in the job so we can skip these later. 895bf215546Sopenharmony_ci */ 896bf215546Sopenharmony_ci job->early_zs_clear = do_early_zs_clear; 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; i++) { 899bf215546Sopenharmony_ci uint32_t attachment_idx = subpass->color_attachments[i].attachment; 900bf215546Sopenharmony_ci if (attachment_idx == VK_ATTACHMENT_UNUSED) 901bf215546Sopenharmony_ci continue; 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci struct v3dv_image_view *iview = 904bf215546Sopenharmony_ci state->attachments[attachment_idx].image_view; 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci const struct v3dv_image *image = (struct v3dv_image *) iview->vk.image; 907bf215546Sopenharmony_ci const struct v3d_resource_slice *slice = 908bf215546Sopenharmony_ci &image->slices[iview->vk.base_mip_level]; 909bf215546Sopenharmony_ci 910bf215546Sopenharmony_ci const uint32_t *clear_color = 911bf215546Sopenharmony_ci &state->attachments[attachment_idx].clear_value.color[0]; 912bf215546Sopenharmony_ci 913bf215546Sopenharmony_ci uint32_t clear_pad = 0; 914bf215546Sopenharmony_ci if (slice->tiling == V3D_TILING_UIF_NO_XOR || 915bf215546Sopenharmony_ci slice->tiling == V3D_TILING_UIF_XOR) { 916bf215546Sopenharmony_ci int uif_block_height = v3d_utile_height(image->cpp) * 2; 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci uint32_t implicit_padded_height = 919bf215546Sopenharmony_ci align(framebuffer->height, uif_block_height) / uif_block_height; 920bf215546Sopenharmony_ci 921bf215546Sopenharmony_ci if (slice->padded_height_of_output_image_in_uif_blocks - 922bf215546Sopenharmony_ci implicit_padded_height >= 15) { 923bf215546Sopenharmony_ci clear_pad = slice->padded_height_of_output_image_in_uif_blocks; 924bf215546Sopenharmony_ci } 925bf215546Sopenharmony_ci } 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) { 928bf215546Sopenharmony_ci clear.clear_color_low_32_bits = clear_color[0]; 929bf215546Sopenharmony_ci clear.clear_color_next_24_bits = clear_color[1] & 0xffffff; 930bf215546Sopenharmony_ci clear.render_target_number = i; 931bf215546Sopenharmony_ci }; 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) { 934bf215546Sopenharmony_ci cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) { 935bf215546Sopenharmony_ci clear.clear_color_mid_low_32_bits = 936bf215546Sopenharmony_ci ((clear_color[1] >> 24) | (clear_color[2] << 8)); 937bf215546Sopenharmony_ci clear.clear_color_mid_high_24_bits = 938bf215546Sopenharmony_ci ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8)); 939bf215546Sopenharmony_ci clear.render_target_number = i; 940bf215546Sopenharmony_ci }; 941bf215546Sopenharmony_ci } 942bf215546Sopenharmony_ci 943bf215546Sopenharmony_ci if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) { 944bf215546Sopenharmony_ci cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) { 945bf215546Sopenharmony_ci clear.uif_padded_height_in_uif_blocks = clear_pad; 946bf215546Sopenharmony_ci clear.clear_color_high_16_bits = clear_color[3] >> 16; 947bf215546Sopenharmony_ci clear.render_target_number = i; 948bf215546Sopenharmony_ci }; 949bf215546Sopenharmony_ci } 950bf215546Sopenharmony_ci } 951bf215546Sopenharmony_ci 952bf215546Sopenharmony_ci cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) { 953bf215546Sopenharmony_ci v3dX(cmd_buffer_render_pass_setup_render_target) 954bf215546Sopenharmony_ci (cmd_buffer, 0, &rt.render_target_0_internal_bpp, 955bf215546Sopenharmony_ci &rt.render_target_0_internal_type, &rt.render_target_0_clamp); 956bf215546Sopenharmony_ci v3dX(cmd_buffer_render_pass_setup_render_target) 957bf215546Sopenharmony_ci (cmd_buffer, 1, &rt.render_target_1_internal_bpp, 958bf215546Sopenharmony_ci &rt.render_target_1_internal_type, &rt.render_target_1_clamp); 959bf215546Sopenharmony_ci v3dX(cmd_buffer_render_pass_setup_render_target) 960bf215546Sopenharmony_ci (cmd_buffer, 2, &rt.render_target_2_internal_bpp, 961bf215546Sopenharmony_ci &rt.render_target_2_internal_type, &rt.render_target_2_clamp); 962bf215546Sopenharmony_ci v3dX(cmd_buffer_render_pass_setup_render_target) 963bf215546Sopenharmony_ci (cmd_buffer, 3, &rt.render_target_3_internal_bpp, 964bf215546Sopenharmony_ci &rt.render_target_3_internal_type, &rt.render_target_3_clamp); 965bf215546Sopenharmony_ci } 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_ci /* Ends rendering mode config. */ 968bf215546Sopenharmony_ci if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) { 969bf215546Sopenharmony_ci cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { 970bf215546Sopenharmony_ci clear.z_clear_value = 971bf215546Sopenharmony_ci state->attachments[ds_attachment_idx].clear_value.z; 972bf215546Sopenharmony_ci clear.stencil_clear_value = 973bf215546Sopenharmony_ci state->attachments[ds_attachment_idx].clear_value.s; 974bf215546Sopenharmony_ci }; 975bf215546Sopenharmony_ci } else { 976bf215546Sopenharmony_ci cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) { 977bf215546Sopenharmony_ci clear.z_clear_value = 1.0f; 978bf215546Sopenharmony_ci clear.stencil_clear_value = 0; 979bf215546Sopenharmony_ci }; 980bf215546Sopenharmony_ci } 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_ci /* Always set initial block size before the first branch, which needs 983bf215546Sopenharmony_ci * to match the value from binning mode config. 984bf215546Sopenharmony_ci */ 985bf215546Sopenharmony_ci cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) { 986bf215546Sopenharmony_ci init.use_auto_chained_tile_lists = true; 987bf215546Sopenharmony_ci init.size_of_first_block_in_chained_tile_lists = 988bf215546Sopenharmony_ci TILE_ALLOCATION_BLOCK_SIZE_64B; 989bf215546Sopenharmony_ci } 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) { 992bf215546Sopenharmony_ci config.number_of_bin_tile_lists = 1; 993bf215546Sopenharmony_ci config.total_frame_width_in_tiles = tiling->draw_tiles_x; 994bf215546Sopenharmony_ci config.total_frame_height_in_tiles = tiling->draw_tiles_y; 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci config.supertile_width_in_tiles = tiling->supertile_width; 997bf215546Sopenharmony_ci config.supertile_height_in_tiles = tiling->supertile_height; 998bf215546Sopenharmony_ci 999bf215546Sopenharmony_ci config.total_frame_width_in_supertiles = 1000bf215546Sopenharmony_ci tiling->frame_width_in_supertiles; 1001bf215546Sopenharmony_ci config.total_frame_height_in_supertiles = 1002bf215546Sopenharmony_ci tiling->frame_height_in_supertiles; 1003bf215546Sopenharmony_ci } 1004bf215546Sopenharmony_ci 1005bf215546Sopenharmony_ci /* Emit an initial clear of the tile buffers. This is necessary 1006bf215546Sopenharmony_ci * for any buffers that should be cleared (since clearing 1007bf215546Sopenharmony_ci * normally happens at the *end* of the generic tile list), but 1008bf215546Sopenharmony_ci * it's also nice to clear everything so the first tile doesn't 1009bf215546Sopenharmony_ci * inherit any contents from some previous frame. 1010bf215546Sopenharmony_ci * 1011bf215546Sopenharmony_ci * Also, implement the GFXH-1742 workaround. There's a race in 1012bf215546Sopenharmony_ci * the HW between the RCL updating the TLB's internal type/size 1013bf215546Sopenharmony_ci * and the spawning of the QPU instances using the TLB's current 1014bf215546Sopenharmony_ci * internal type/size. To make sure the QPUs get the right 1015bf215546Sopenharmony_ci * state, we need 1 dummy store in between internal type/size 1016bf215546Sopenharmony_ci * changes on V3D 3.x, and 2 dummy stores on 4.x. 1017bf215546Sopenharmony_ci */ 1018bf215546Sopenharmony_ci for (int i = 0; i < 2; i++) { 1019bf215546Sopenharmony_ci cl_emit(rcl, TILE_COORDINATES, coords); 1020bf215546Sopenharmony_ci cl_emit(rcl, END_OF_LOADS, end); 1021bf215546Sopenharmony_ci cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) { 1022bf215546Sopenharmony_ci store.buffer_to_store = NONE; 1023bf215546Sopenharmony_ci } 1024bf215546Sopenharmony_ci if (cmd_buffer->state.tile_aligned_render_area && 1025bf215546Sopenharmony_ci (i == 0 || v3dv_do_double_initial_tile_clear(tiling))) { 1026bf215546Sopenharmony_ci cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) { 1027bf215546Sopenharmony_ci clear.clear_z_stencil_buffer = !job->early_zs_clear; 1028bf215546Sopenharmony_ci clear.clear_all_render_targets = true; 1029bf215546Sopenharmony_ci } 1030bf215546Sopenharmony_ci } 1031bf215546Sopenharmony_ci cl_emit(rcl, END_OF_TILE_MARKER, end); 1032bf215546Sopenharmony_ci } 1033bf215546Sopenharmony_ci 1034bf215546Sopenharmony_ci cl_emit(rcl, FLUSH_VCD_CACHE, flush); 1035bf215546Sopenharmony_ci 1036bf215546Sopenharmony_ci for (int layer = 0; layer < MAX2(1, fb_layers); layer++) { 1037bf215546Sopenharmony_ci if (subpass->view_mask == 0 || (subpass->view_mask & (1u << layer))) 1038bf215546Sopenharmony_ci cmd_buffer_emit_render_pass_layer_rcl(cmd_buffer, layer); 1039bf215546Sopenharmony_ci } 1040bf215546Sopenharmony_ci 1041bf215546Sopenharmony_ci cl_emit(rcl, END_OF_RENDERING, end); 1042bf215546Sopenharmony_ci} 1043bf215546Sopenharmony_ci 1044bf215546Sopenharmony_civoid 1045bf215546Sopenharmony_civ3dX(cmd_buffer_emit_viewport)(struct v3dv_cmd_buffer *cmd_buffer) 1046bf215546Sopenharmony_ci{ 1047bf215546Sopenharmony_ci struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 1048bf215546Sopenharmony_ci /* FIXME: right now we only support one viewport. viewporst[0] would work 1049bf215546Sopenharmony_ci * now, would need to change if we allow multiple viewports 1050bf215546Sopenharmony_ci */ 1051bf215546Sopenharmony_ci float *vptranslate = dynamic->viewport.translate[0]; 1052bf215546Sopenharmony_ci float *vpscale = dynamic->viewport.scale[0]; 1053bf215546Sopenharmony_ci 1054bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1055bf215546Sopenharmony_ci assert(job); 1056bf215546Sopenharmony_ci 1057bf215546Sopenharmony_ci const uint32_t required_cl_size = 1058bf215546Sopenharmony_ci cl_packet_length(CLIPPER_XY_SCALING) + 1059bf215546Sopenharmony_ci cl_packet_length(CLIPPER_Z_SCALE_AND_OFFSET) + 1060bf215546Sopenharmony_ci cl_packet_length(CLIPPER_Z_MIN_MAX_CLIPPING_PLANES) + 1061bf215546Sopenharmony_ci cl_packet_length(VIEWPORT_OFFSET); 1062bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, required_cl_size); 1063bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1064bf215546Sopenharmony_ci 1065bf215546Sopenharmony_ci cl_emit(&job->bcl, CLIPPER_XY_SCALING, clip) { 1066bf215546Sopenharmony_ci clip.viewport_half_width_in_1_256th_of_pixel = vpscale[0] * 256.0f; 1067bf215546Sopenharmony_ci clip.viewport_half_height_in_1_256th_of_pixel = vpscale[1] * 256.0f; 1068bf215546Sopenharmony_ci } 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci cl_emit(&job->bcl, CLIPPER_Z_SCALE_AND_OFFSET, clip) { 1071bf215546Sopenharmony_ci clip.viewport_z_offset_zc_to_zs = vptranslate[2]; 1072bf215546Sopenharmony_ci clip.viewport_z_scale_zc_to_zs = vpscale[2]; 1073bf215546Sopenharmony_ci } 1074bf215546Sopenharmony_ci cl_emit(&job->bcl, CLIPPER_Z_MIN_MAX_CLIPPING_PLANES, clip) { 1075bf215546Sopenharmony_ci /* Vulkan's Z NDC is [0..1], unlile OpenGL which is [-1, 1] */ 1076bf215546Sopenharmony_ci float z1 = vptranslate[2]; 1077bf215546Sopenharmony_ci float z2 = vptranslate[2] + vpscale[2]; 1078bf215546Sopenharmony_ci clip.minimum_zw = MIN2(z1, z2); 1079bf215546Sopenharmony_ci clip.maximum_zw = MAX2(z1, z2); 1080bf215546Sopenharmony_ci } 1081bf215546Sopenharmony_ci 1082bf215546Sopenharmony_ci cl_emit(&job->bcl, VIEWPORT_OFFSET, vp) { 1083bf215546Sopenharmony_ci vp.viewport_centre_x_coordinate = vptranslate[0]; 1084bf215546Sopenharmony_ci vp.viewport_centre_y_coordinate = vptranslate[1]; 1085bf215546Sopenharmony_ci } 1086bf215546Sopenharmony_ci 1087bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_VIEWPORT; 1088bf215546Sopenharmony_ci} 1089bf215546Sopenharmony_ci 1090bf215546Sopenharmony_civoid 1091bf215546Sopenharmony_civ3dX(cmd_buffer_emit_stencil)(struct v3dv_cmd_buffer *cmd_buffer) 1092bf215546Sopenharmony_ci{ 1093bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1094bf215546Sopenharmony_ci assert(job); 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 1097bf215546Sopenharmony_ci struct v3dv_dynamic_state *dynamic_state = &cmd_buffer->state.dynamic; 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK | 1100bf215546Sopenharmony_ci V3DV_DYNAMIC_STENCIL_WRITE_MASK | 1101bf215546Sopenharmony_ci V3DV_DYNAMIC_STENCIL_REFERENCE; 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, 1104bf215546Sopenharmony_ci 2 * cl_packet_length(STENCIL_CFG)); 1105bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci bool emitted_stencil = false; 1108bf215546Sopenharmony_ci for (uint32_t i = 0; i < 2; i++) { 1109bf215546Sopenharmony_ci if (pipeline->emit_stencil_cfg[i]) { 1110bf215546Sopenharmony_ci if (dynamic_state->mask & dynamic_stencil_states) { 1111bf215546Sopenharmony_ci cl_emit_with_prepacked(&job->bcl, STENCIL_CFG, 1112bf215546Sopenharmony_ci pipeline->stencil_cfg[i], config) { 1113bf215546Sopenharmony_ci if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK) { 1114bf215546Sopenharmony_ci config.stencil_test_mask = 1115bf215546Sopenharmony_ci i == 0 ? dynamic_state->stencil_compare_mask.front : 1116bf215546Sopenharmony_ci dynamic_state->stencil_compare_mask.back; 1117bf215546Sopenharmony_ci } 1118bf215546Sopenharmony_ci if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK) { 1119bf215546Sopenharmony_ci config.stencil_write_mask = 1120bf215546Sopenharmony_ci i == 0 ? dynamic_state->stencil_write_mask.front : 1121bf215546Sopenharmony_ci dynamic_state->stencil_write_mask.back; 1122bf215546Sopenharmony_ci } 1123bf215546Sopenharmony_ci if (dynamic_state->mask & V3DV_DYNAMIC_STENCIL_REFERENCE) { 1124bf215546Sopenharmony_ci config.stencil_ref_value = 1125bf215546Sopenharmony_ci i == 0 ? dynamic_state->stencil_reference.front : 1126bf215546Sopenharmony_ci dynamic_state->stencil_reference.back; 1127bf215546Sopenharmony_ci } 1128bf215546Sopenharmony_ci } 1129bf215546Sopenharmony_ci } else { 1130bf215546Sopenharmony_ci cl_emit_prepacked(&job->bcl, &pipeline->stencil_cfg[i]); 1131bf215546Sopenharmony_ci } 1132bf215546Sopenharmony_ci 1133bf215546Sopenharmony_ci emitted_stencil = true; 1134bf215546Sopenharmony_ci } 1135bf215546Sopenharmony_ci } 1136bf215546Sopenharmony_ci 1137bf215546Sopenharmony_ci if (emitted_stencil) { 1138bf215546Sopenharmony_ci const uint32_t dynamic_stencil_dirty_flags = 1139bf215546Sopenharmony_ci V3DV_CMD_DIRTY_STENCIL_COMPARE_MASK | 1140bf215546Sopenharmony_ci V3DV_CMD_DIRTY_STENCIL_WRITE_MASK | 1141bf215546Sopenharmony_ci V3DV_CMD_DIRTY_STENCIL_REFERENCE; 1142bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~dynamic_stencil_dirty_flags; 1143bf215546Sopenharmony_ci } 1144bf215546Sopenharmony_ci} 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_civoid 1147bf215546Sopenharmony_civ3dX(cmd_buffer_emit_depth_bias)(struct v3dv_cmd_buffer *cmd_buffer) 1148bf215546Sopenharmony_ci{ 1149bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 1150bf215546Sopenharmony_ci assert(pipeline); 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_ci if (!pipeline->depth_bias.enabled) 1153bf215546Sopenharmony_ci return; 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1156bf215546Sopenharmony_ci assert(job); 1157bf215546Sopenharmony_ci 1158bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(DEPTH_OFFSET)); 1159bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_ci struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 1162bf215546Sopenharmony_ci cl_emit(&job->bcl, DEPTH_OFFSET, bias) { 1163bf215546Sopenharmony_ci bias.depth_offset_factor = dynamic->depth_bias.slope_factor; 1164bf215546Sopenharmony_ci bias.depth_offset_units = dynamic->depth_bias.constant_factor; 1165bf215546Sopenharmony_ci if (pipeline->depth_bias.is_z16) 1166bf215546Sopenharmony_ci bias.depth_offset_units *= 256.0f; 1167bf215546Sopenharmony_ci bias.limit = dynamic->depth_bias.depth_bias_clamp; 1168bf215546Sopenharmony_ci } 1169bf215546Sopenharmony_ci 1170bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_DEPTH_BIAS; 1171bf215546Sopenharmony_ci} 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_civoid 1174bf215546Sopenharmony_civ3dX(cmd_buffer_emit_line_width)(struct v3dv_cmd_buffer *cmd_buffer) 1175bf215546Sopenharmony_ci{ 1176bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1177bf215546Sopenharmony_ci assert(job); 1178bf215546Sopenharmony_ci 1179bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(LINE_WIDTH)); 1180bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1181bf215546Sopenharmony_ci 1182bf215546Sopenharmony_ci cl_emit(&job->bcl, LINE_WIDTH, line) { 1183bf215546Sopenharmony_ci line.line_width = cmd_buffer->state.dynamic.line_width; 1184bf215546Sopenharmony_ci } 1185bf215546Sopenharmony_ci 1186bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_LINE_WIDTH; 1187bf215546Sopenharmony_ci} 1188bf215546Sopenharmony_ci 1189bf215546Sopenharmony_civoid 1190bf215546Sopenharmony_civ3dX(cmd_buffer_emit_sample_state)(struct v3dv_cmd_buffer *cmd_buffer) 1191bf215546Sopenharmony_ci{ 1192bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 1193bf215546Sopenharmony_ci assert(pipeline); 1194bf215546Sopenharmony_ci 1195bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1196bf215546Sopenharmony_ci assert(job); 1197bf215546Sopenharmony_ci 1198bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(SAMPLE_STATE)); 1199bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1200bf215546Sopenharmony_ci 1201bf215546Sopenharmony_ci cl_emit(&job->bcl, SAMPLE_STATE, state) { 1202bf215546Sopenharmony_ci state.coverage = 1.0f; 1203bf215546Sopenharmony_ci state.mask = pipeline->sample_mask; 1204bf215546Sopenharmony_ci } 1205bf215546Sopenharmony_ci} 1206bf215546Sopenharmony_ci 1207bf215546Sopenharmony_civoid 1208bf215546Sopenharmony_civ3dX(cmd_buffer_emit_blend)(struct v3dv_cmd_buffer *cmd_buffer) 1209bf215546Sopenharmony_ci{ 1210bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1211bf215546Sopenharmony_ci assert(job); 1212bf215546Sopenharmony_ci 1213bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 1214bf215546Sopenharmony_ci assert(pipeline); 1215bf215546Sopenharmony_ci 1216bf215546Sopenharmony_ci const uint32_t blend_packets_size = 1217bf215546Sopenharmony_ci cl_packet_length(BLEND_ENABLES) + 1218bf215546Sopenharmony_ci cl_packet_length(BLEND_CONSTANT_COLOR) + 1219bf215546Sopenharmony_ci cl_packet_length(BLEND_CFG) * V3D_MAX_DRAW_BUFFERS; 1220bf215546Sopenharmony_ci 1221bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, blend_packets_size); 1222bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1223bf215546Sopenharmony_ci 1224bf215546Sopenharmony_ci if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) { 1225bf215546Sopenharmony_ci if (pipeline->blend.enables) { 1226bf215546Sopenharmony_ci cl_emit(&job->bcl, BLEND_ENABLES, enables) { 1227bf215546Sopenharmony_ci enables.mask = pipeline->blend.enables; 1228bf215546Sopenharmony_ci } 1229bf215546Sopenharmony_ci } 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_ci for (uint32_t i = 0; i < V3D_MAX_DRAW_BUFFERS; i++) { 1232bf215546Sopenharmony_ci if (pipeline->blend.enables & (1 << i)) 1233bf215546Sopenharmony_ci cl_emit_prepacked(&job->bcl, &pipeline->blend.cfg[i]); 1234bf215546Sopenharmony_ci } 1235bf215546Sopenharmony_ci } 1236bf215546Sopenharmony_ci 1237bf215546Sopenharmony_ci if (pipeline->blend.needs_color_constants && 1238bf215546Sopenharmony_ci cmd_buffer->state.dirty & V3DV_CMD_DIRTY_BLEND_CONSTANTS) { 1239bf215546Sopenharmony_ci struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 1240bf215546Sopenharmony_ci cl_emit(&job->bcl, BLEND_CONSTANT_COLOR, color) { 1241bf215546Sopenharmony_ci color.red_f16 = _mesa_float_to_half(dynamic->blend_constants[0]); 1242bf215546Sopenharmony_ci color.green_f16 = _mesa_float_to_half(dynamic->blend_constants[1]); 1243bf215546Sopenharmony_ci color.blue_f16 = _mesa_float_to_half(dynamic->blend_constants[2]); 1244bf215546Sopenharmony_ci color.alpha_f16 = _mesa_float_to_half(dynamic->blend_constants[3]); 1245bf215546Sopenharmony_ci } 1246bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_BLEND_CONSTANTS; 1247bf215546Sopenharmony_ci } 1248bf215546Sopenharmony_ci} 1249bf215546Sopenharmony_ci 1250bf215546Sopenharmony_civoid 1251bf215546Sopenharmony_civ3dX(cmd_buffer_emit_color_write_mask)(struct v3dv_cmd_buffer *cmd_buffer) 1252bf215546Sopenharmony_ci{ 1253bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1254bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(COLOR_WRITE_MASKS)); 1255bf215546Sopenharmony_ci 1256bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 1257bf215546Sopenharmony_ci struct v3dv_dynamic_state *dynamic = &cmd_buffer->state.dynamic; 1258bf215546Sopenharmony_ci cl_emit(&job->bcl, COLOR_WRITE_MASKS, mask) { 1259bf215546Sopenharmony_ci mask.mask = (~dynamic->color_write_enable | 1260bf215546Sopenharmony_ci pipeline->blend.color_write_masks) & 0xffff; 1261bf215546Sopenharmony_ci } 1262bf215546Sopenharmony_ci 1263bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_COLOR_WRITE_ENABLE; 1264bf215546Sopenharmony_ci} 1265bf215546Sopenharmony_ci 1266bf215546Sopenharmony_cistatic void 1267bf215546Sopenharmony_ciemit_flat_shade_flags(struct v3dv_job *job, 1268bf215546Sopenharmony_ci int varying_offset, 1269bf215546Sopenharmony_ci uint32_t varyings, 1270bf215546Sopenharmony_ci enum V3DX(Varying_Flags_Action) lower, 1271bf215546Sopenharmony_ci enum V3DX(Varying_Flags_Action) higher) 1272bf215546Sopenharmony_ci{ 1273bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, 1274bf215546Sopenharmony_ci cl_packet_length(FLAT_SHADE_FLAGS)); 1275bf215546Sopenharmony_ci v3dv_return_if_oom(NULL, job); 1276bf215546Sopenharmony_ci 1277bf215546Sopenharmony_ci cl_emit(&job->bcl, FLAT_SHADE_FLAGS, flags) { 1278bf215546Sopenharmony_ci flags.varying_offset_v0 = varying_offset; 1279bf215546Sopenharmony_ci flags.flat_shade_flags_for_varyings_v024 = varyings; 1280bf215546Sopenharmony_ci flags.action_for_flat_shade_flags_of_lower_numbered_varyings = lower; 1281bf215546Sopenharmony_ci flags.action_for_flat_shade_flags_of_higher_numbered_varyings = higher; 1282bf215546Sopenharmony_ci } 1283bf215546Sopenharmony_ci} 1284bf215546Sopenharmony_ci 1285bf215546Sopenharmony_cistatic void 1286bf215546Sopenharmony_ciemit_noperspective_flags(struct v3dv_job *job, 1287bf215546Sopenharmony_ci int varying_offset, 1288bf215546Sopenharmony_ci uint32_t varyings, 1289bf215546Sopenharmony_ci enum V3DX(Varying_Flags_Action) lower, 1290bf215546Sopenharmony_ci enum V3DX(Varying_Flags_Action) higher) 1291bf215546Sopenharmony_ci{ 1292bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, 1293bf215546Sopenharmony_ci cl_packet_length(NON_PERSPECTIVE_FLAGS)); 1294bf215546Sopenharmony_ci v3dv_return_if_oom(NULL, job); 1295bf215546Sopenharmony_ci 1296bf215546Sopenharmony_ci cl_emit(&job->bcl, NON_PERSPECTIVE_FLAGS, flags) { 1297bf215546Sopenharmony_ci flags.varying_offset_v0 = varying_offset; 1298bf215546Sopenharmony_ci flags.non_perspective_flags_for_varyings_v024 = varyings; 1299bf215546Sopenharmony_ci flags.action_for_non_perspective_flags_of_lower_numbered_varyings = lower; 1300bf215546Sopenharmony_ci flags.action_for_non_perspective_flags_of_higher_numbered_varyings = higher; 1301bf215546Sopenharmony_ci } 1302bf215546Sopenharmony_ci} 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_cistatic void 1305bf215546Sopenharmony_ciemit_centroid_flags(struct v3dv_job *job, 1306bf215546Sopenharmony_ci int varying_offset, 1307bf215546Sopenharmony_ci uint32_t varyings, 1308bf215546Sopenharmony_ci enum V3DX(Varying_Flags_Action) lower, 1309bf215546Sopenharmony_ci enum V3DX(Varying_Flags_Action) higher) 1310bf215546Sopenharmony_ci{ 1311bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, 1312bf215546Sopenharmony_ci cl_packet_length(CENTROID_FLAGS)); 1313bf215546Sopenharmony_ci v3dv_return_if_oom(NULL, job); 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_ci cl_emit(&job->bcl, CENTROID_FLAGS, flags) { 1316bf215546Sopenharmony_ci flags.varying_offset_v0 = varying_offset; 1317bf215546Sopenharmony_ci flags.centroid_flags_for_varyings_v024 = varyings; 1318bf215546Sopenharmony_ci flags.action_for_centroid_flags_of_lower_numbered_varyings = lower; 1319bf215546Sopenharmony_ci flags.action_for_centroid_flags_of_higher_numbered_varyings = higher; 1320bf215546Sopenharmony_ci } 1321bf215546Sopenharmony_ci} 1322bf215546Sopenharmony_ci 1323bf215546Sopenharmony_cistatic bool 1324bf215546Sopenharmony_ciemit_varying_flags(struct v3dv_job *job, 1325bf215546Sopenharmony_ci uint32_t num_flags, 1326bf215546Sopenharmony_ci const uint32_t *flags, 1327bf215546Sopenharmony_ci void (*flag_emit_callback)(struct v3dv_job *job, 1328bf215546Sopenharmony_ci int varying_offset, 1329bf215546Sopenharmony_ci uint32_t flags, 1330bf215546Sopenharmony_ci enum V3DX(Varying_Flags_Action) lower, 1331bf215546Sopenharmony_ci enum V3DX(Varying_Flags_Action) higher)) 1332bf215546Sopenharmony_ci{ 1333bf215546Sopenharmony_ci bool emitted_any = false; 1334bf215546Sopenharmony_ci for (int i = 0; i < num_flags; i++) { 1335bf215546Sopenharmony_ci if (!flags[i]) 1336bf215546Sopenharmony_ci continue; 1337bf215546Sopenharmony_ci 1338bf215546Sopenharmony_ci if (emitted_any) { 1339bf215546Sopenharmony_ci flag_emit_callback(job, i, flags[i], 1340bf215546Sopenharmony_ci V3D_VARYING_FLAGS_ACTION_UNCHANGED, 1341bf215546Sopenharmony_ci V3D_VARYING_FLAGS_ACTION_UNCHANGED); 1342bf215546Sopenharmony_ci } else if (i == 0) { 1343bf215546Sopenharmony_ci flag_emit_callback(job, i, flags[i], 1344bf215546Sopenharmony_ci V3D_VARYING_FLAGS_ACTION_UNCHANGED, 1345bf215546Sopenharmony_ci V3D_VARYING_FLAGS_ACTION_ZEROED); 1346bf215546Sopenharmony_ci } else { 1347bf215546Sopenharmony_ci flag_emit_callback(job, i, flags[i], 1348bf215546Sopenharmony_ci V3D_VARYING_FLAGS_ACTION_ZEROED, 1349bf215546Sopenharmony_ci V3D_VARYING_FLAGS_ACTION_ZEROED); 1350bf215546Sopenharmony_ci } 1351bf215546Sopenharmony_ci 1352bf215546Sopenharmony_ci emitted_any = true; 1353bf215546Sopenharmony_ci } 1354bf215546Sopenharmony_ci 1355bf215546Sopenharmony_ci return emitted_any; 1356bf215546Sopenharmony_ci} 1357bf215546Sopenharmony_ci 1358bf215546Sopenharmony_civoid 1359bf215546Sopenharmony_civ3dX(cmd_buffer_emit_varyings_state)(struct v3dv_cmd_buffer *cmd_buffer) 1360bf215546Sopenharmony_ci{ 1361bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1362bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 1363bf215546Sopenharmony_ci 1364bf215546Sopenharmony_ci struct v3d_fs_prog_data *prog_data_fs = 1365bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs; 1366bf215546Sopenharmony_ci 1367bf215546Sopenharmony_ci const uint32_t num_flags = 1368bf215546Sopenharmony_ci ARRAY_SIZE(prog_data_fs->flat_shade_flags); 1369bf215546Sopenharmony_ci const uint32_t *flat_shade_flags = prog_data_fs->flat_shade_flags; 1370bf215546Sopenharmony_ci const uint32_t *noperspective_flags = prog_data_fs->noperspective_flags; 1371bf215546Sopenharmony_ci const uint32_t *centroid_flags = prog_data_fs->centroid_flags; 1372bf215546Sopenharmony_ci 1373bf215546Sopenharmony_ci if (!emit_varying_flags(job, num_flags, flat_shade_flags, 1374bf215546Sopenharmony_ci emit_flat_shade_flags)) { 1375bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 1376bf215546Sopenharmony_ci &job->bcl, cl_packet_length(ZERO_ALL_FLAT_SHADE_FLAGS)); 1377bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_ci cl_emit(&job->bcl, ZERO_ALL_FLAT_SHADE_FLAGS, flags); 1380bf215546Sopenharmony_ci } 1381bf215546Sopenharmony_ci 1382bf215546Sopenharmony_ci if (!emit_varying_flags(job, num_flags, noperspective_flags, 1383bf215546Sopenharmony_ci emit_noperspective_flags)) { 1384bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 1385bf215546Sopenharmony_ci &job->bcl, cl_packet_length(ZERO_ALL_NON_PERSPECTIVE_FLAGS)); 1386bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1387bf215546Sopenharmony_ci 1388bf215546Sopenharmony_ci cl_emit(&job->bcl, ZERO_ALL_NON_PERSPECTIVE_FLAGS, flags); 1389bf215546Sopenharmony_ci } 1390bf215546Sopenharmony_ci 1391bf215546Sopenharmony_ci if (!emit_varying_flags(job, num_flags, centroid_flags, 1392bf215546Sopenharmony_ci emit_centroid_flags)) { 1393bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 1394bf215546Sopenharmony_ci &job->bcl, cl_packet_length(ZERO_ALL_CENTROID_FLAGS)); 1395bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1396bf215546Sopenharmony_ci 1397bf215546Sopenharmony_ci cl_emit(&job->bcl, ZERO_ALL_CENTROID_FLAGS, flags); 1398bf215546Sopenharmony_ci } 1399bf215546Sopenharmony_ci} 1400bf215546Sopenharmony_ci 1401bf215546Sopenharmony_ci/* Updates job early Z state tracking. Returns False if EZ must be disabled 1402bf215546Sopenharmony_ci * for the current draw call. 1403bf215546Sopenharmony_ci */ 1404bf215546Sopenharmony_cistatic bool 1405bf215546Sopenharmony_cijob_update_ez_state(struct v3dv_job *job, 1406bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline, 1407bf215546Sopenharmony_ci struct v3dv_cmd_buffer *cmd_buffer) 1408bf215546Sopenharmony_ci{ 1409bf215546Sopenharmony_ci /* If first_ez_state is V3D_EZ_DISABLED it means that we have already 1410bf215546Sopenharmony_ci * determined that we should disable EZ completely for all draw calls in 1411bf215546Sopenharmony_ci * this job. This will cause us to disable EZ for the entire job in the 1412bf215546Sopenharmony_ci * Tile Rendering Mode RCL packet and when we do that we need to make sure 1413bf215546Sopenharmony_ci * we never emit a draw call in the job with EZ enabled in the CFG_BITS 1414bf215546Sopenharmony_ci * packet, so ez_state must also be V3D_EZ_DISABLED; 1415bf215546Sopenharmony_ci */ 1416bf215546Sopenharmony_ci if (job->first_ez_state == V3D_EZ_DISABLED) { 1417bf215546Sopenharmony_ci assert(job->ez_state == V3D_EZ_DISABLED); 1418bf215546Sopenharmony_ci return false; 1419bf215546Sopenharmony_ci } 1420bf215546Sopenharmony_ci 1421bf215546Sopenharmony_ci /* If ez_state is V3D_EZ_DISABLED it means that we have already decided 1422bf215546Sopenharmony_ci * that EZ must be disabled for the remaining of the frame. 1423bf215546Sopenharmony_ci */ 1424bf215546Sopenharmony_ci if (job->ez_state == V3D_EZ_DISABLED) 1425bf215546Sopenharmony_ci return false; 1426bf215546Sopenharmony_ci 1427bf215546Sopenharmony_ci /* This is part of the pre draw call handling, so we should be inside a 1428bf215546Sopenharmony_ci * render pass. 1429bf215546Sopenharmony_ci */ 1430bf215546Sopenharmony_ci assert(cmd_buffer->state.pass); 1431bf215546Sopenharmony_ci 1432bf215546Sopenharmony_ci /* If this is the first time we update EZ state for this job we first check 1433bf215546Sopenharmony_ci * if there is anything that requires disabling it completely for the entire 1434bf215546Sopenharmony_ci * job (based on state that is not related to the current draw call and 1435bf215546Sopenharmony_ci * pipeline state). 1436bf215546Sopenharmony_ci */ 1437bf215546Sopenharmony_ci if (!job->decided_global_ez_enable) { 1438bf215546Sopenharmony_ci job->decided_global_ez_enable = true; 1439bf215546Sopenharmony_ci 1440bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1441bf215546Sopenharmony_ci assert(state->subpass_idx < state->pass->subpass_count); 1442bf215546Sopenharmony_ci struct v3dv_subpass *subpass = &state->pass->subpasses[state->subpass_idx]; 1443bf215546Sopenharmony_ci if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED) { 1444bf215546Sopenharmony_ci job->first_ez_state = V3D_EZ_DISABLED; 1445bf215546Sopenharmony_ci job->ez_state = V3D_EZ_DISABLED; 1446bf215546Sopenharmony_ci return false; 1447bf215546Sopenharmony_ci } 1448bf215546Sopenharmony_ci 1449bf215546Sopenharmony_ci /* GFXH-1918: the early-z buffer may load incorrect depth values 1450bf215546Sopenharmony_ci * if the frame has odd width or height. 1451bf215546Sopenharmony_ci * 1452bf215546Sopenharmony_ci * So we need to disable EZ in this case. 1453bf215546Sopenharmony_ci */ 1454bf215546Sopenharmony_ci const struct v3dv_render_pass_attachment *ds_attachment = 1455bf215546Sopenharmony_ci &state->pass->attachments[subpass->ds_attachment.attachment]; 1456bf215546Sopenharmony_ci 1457bf215546Sopenharmony_ci const VkImageAspectFlags ds_aspects = 1458bf215546Sopenharmony_ci vk_format_aspects(ds_attachment->desc.format); 1459bf215546Sopenharmony_ci 1460bf215546Sopenharmony_ci bool needs_depth_load = 1461bf215546Sopenharmony_ci check_needs_load(state, 1462bf215546Sopenharmony_ci ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT, 1463bf215546Sopenharmony_ci ds_attachment->first_subpass, 1464bf215546Sopenharmony_ci ds_attachment->desc.loadOp); 1465bf215546Sopenharmony_ci 1466bf215546Sopenharmony_ci if (needs_depth_load) { 1467bf215546Sopenharmony_ci struct v3dv_framebuffer *fb = state->framebuffer; 1468bf215546Sopenharmony_ci 1469bf215546Sopenharmony_ci if (!fb) { 1470bf215546Sopenharmony_ci assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 1471bf215546Sopenharmony_ci perf_debug("Loading depth aspect in a secondary command buffer " 1472bf215546Sopenharmony_ci "without framebuffer info disables early-z tests.\n"); 1473bf215546Sopenharmony_ci job->first_ez_state = V3D_EZ_DISABLED; 1474bf215546Sopenharmony_ci job->ez_state = V3D_EZ_DISABLED; 1475bf215546Sopenharmony_ci return false; 1476bf215546Sopenharmony_ci } 1477bf215546Sopenharmony_ci 1478bf215546Sopenharmony_ci if (((fb->width % 2) != 0 || (fb->height % 2) != 0)) { 1479bf215546Sopenharmony_ci perf_debug("Loading depth aspect for framebuffer with odd width " 1480bf215546Sopenharmony_ci "or height disables early-Z tests.\n"); 1481bf215546Sopenharmony_ci job->first_ez_state = V3D_EZ_DISABLED; 1482bf215546Sopenharmony_ci job->ez_state = V3D_EZ_DISABLED; 1483bf215546Sopenharmony_ci return false; 1484bf215546Sopenharmony_ci } 1485bf215546Sopenharmony_ci } 1486bf215546Sopenharmony_ci } 1487bf215546Sopenharmony_ci 1488bf215546Sopenharmony_ci /* Otherwise, we can decide to selectively enable or disable EZ for draw 1489bf215546Sopenharmony_ci * calls using the CFG_BITS packet based on the bound pipeline state. 1490bf215546Sopenharmony_ci */ 1491bf215546Sopenharmony_ci bool disable_ez = false; 1492bf215546Sopenharmony_ci bool incompatible_test = false; 1493bf215546Sopenharmony_ci switch (pipeline->ez_state) { 1494bf215546Sopenharmony_ci case V3D_EZ_UNDECIDED: 1495bf215546Sopenharmony_ci /* If the pipeline didn't pick a direction but didn't disable, then go 1496bf215546Sopenharmony_ci * along with the current EZ state. This allows EZ optimization for Z 1497bf215546Sopenharmony_ci * func == EQUAL or NEVER. 1498bf215546Sopenharmony_ci */ 1499bf215546Sopenharmony_ci break; 1500bf215546Sopenharmony_ci 1501bf215546Sopenharmony_ci case V3D_EZ_LT_LE: 1502bf215546Sopenharmony_ci case V3D_EZ_GT_GE: 1503bf215546Sopenharmony_ci /* If the pipeline picked a direction, then it needs to match the current 1504bf215546Sopenharmony_ci * direction if we've decided on one. 1505bf215546Sopenharmony_ci */ 1506bf215546Sopenharmony_ci if (job->ez_state == V3D_EZ_UNDECIDED) { 1507bf215546Sopenharmony_ci job->ez_state = pipeline->ez_state; 1508bf215546Sopenharmony_ci } else if (job->ez_state != pipeline->ez_state) { 1509bf215546Sopenharmony_ci disable_ez = true; 1510bf215546Sopenharmony_ci incompatible_test = true; 1511bf215546Sopenharmony_ci } 1512bf215546Sopenharmony_ci break; 1513bf215546Sopenharmony_ci 1514bf215546Sopenharmony_ci case V3D_EZ_DISABLED: 1515bf215546Sopenharmony_ci disable_ez = true; 1516bf215546Sopenharmony_ci incompatible_test = pipeline->incompatible_ez_test; 1517bf215546Sopenharmony_ci break; 1518bf215546Sopenharmony_ci } 1519bf215546Sopenharmony_ci 1520bf215546Sopenharmony_ci if (job->first_ez_state == V3D_EZ_UNDECIDED && !disable_ez) { 1521bf215546Sopenharmony_ci assert(job->ez_state != V3D_EZ_DISABLED); 1522bf215546Sopenharmony_ci job->first_ez_state = job->ez_state; 1523bf215546Sopenharmony_ci } 1524bf215546Sopenharmony_ci 1525bf215546Sopenharmony_ci /* If we had to disable EZ because of an incompatible test direction and 1526bf215546Sopenharmony_ci * and the pipeline writes depth then we need to disable EZ for the rest of 1527bf215546Sopenharmony_ci * the frame. 1528bf215546Sopenharmony_ci */ 1529bf215546Sopenharmony_ci if (incompatible_test && pipeline->z_updates_enable) { 1530bf215546Sopenharmony_ci assert(disable_ez); 1531bf215546Sopenharmony_ci job->ez_state = V3D_EZ_DISABLED; 1532bf215546Sopenharmony_ci } 1533bf215546Sopenharmony_ci 1534bf215546Sopenharmony_ci if (!disable_ez) 1535bf215546Sopenharmony_ci job->has_ez_draws = true; 1536bf215546Sopenharmony_ci 1537bf215546Sopenharmony_ci return !disable_ez; 1538bf215546Sopenharmony_ci} 1539bf215546Sopenharmony_ci 1540bf215546Sopenharmony_civoid 1541bf215546Sopenharmony_civ3dX(cmd_buffer_emit_configuration_bits)(struct v3dv_cmd_buffer *cmd_buffer) 1542bf215546Sopenharmony_ci{ 1543bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1544bf215546Sopenharmony_ci assert(job); 1545bf215546Sopenharmony_ci 1546bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 1547bf215546Sopenharmony_ci assert(pipeline); 1548bf215546Sopenharmony_ci 1549bf215546Sopenharmony_ci bool enable_ez = job_update_ez_state(job, pipeline, cmd_buffer); 1550bf215546Sopenharmony_ci 1551bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, cl_packet_length(CFG_BITS)); 1552bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1553bf215546Sopenharmony_ci 1554bf215546Sopenharmony_ci cl_emit_with_prepacked(&job->bcl, CFG_BITS, pipeline->cfg_bits, config) { 1555bf215546Sopenharmony_ci config.early_z_enable = enable_ez; 1556bf215546Sopenharmony_ci config.early_z_updates_enable = config.early_z_enable && 1557bf215546Sopenharmony_ci pipeline->z_updates_enable; 1558bf215546Sopenharmony_ci } 1559bf215546Sopenharmony_ci} 1560bf215546Sopenharmony_ci 1561bf215546Sopenharmony_civoid 1562bf215546Sopenharmony_civ3dX(cmd_buffer_emit_occlusion_query)(struct v3dv_cmd_buffer *cmd_buffer) 1563bf215546Sopenharmony_ci{ 1564bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1565bf215546Sopenharmony_ci assert(job); 1566bf215546Sopenharmony_ci 1567bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, 1568bf215546Sopenharmony_ci cl_packet_length(OCCLUSION_QUERY_COUNTER)); 1569bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1570bf215546Sopenharmony_ci 1571bf215546Sopenharmony_ci cl_emit(&job->bcl, OCCLUSION_QUERY_COUNTER, counter) { 1572bf215546Sopenharmony_ci if (cmd_buffer->state.query.active_query.bo) { 1573bf215546Sopenharmony_ci counter.address = 1574bf215546Sopenharmony_ci v3dv_cl_address(cmd_buffer->state.query.active_query.bo, 1575bf215546Sopenharmony_ci cmd_buffer->state.query.active_query.offset); 1576bf215546Sopenharmony_ci } 1577bf215546Sopenharmony_ci } 1578bf215546Sopenharmony_ci 1579bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_OCCLUSION_QUERY; 1580bf215546Sopenharmony_ci} 1581bf215546Sopenharmony_ci 1582bf215546Sopenharmony_cistatic struct v3dv_job * 1583bf215546Sopenharmony_cicmd_buffer_subpass_split_for_barrier(struct v3dv_cmd_buffer *cmd_buffer, 1584bf215546Sopenharmony_ci bool is_bcl_barrier) 1585bf215546Sopenharmony_ci{ 1586bf215546Sopenharmony_ci assert(cmd_buffer->state.subpass_idx != -1); 1587bf215546Sopenharmony_ci v3dv_cmd_buffer_finish_job(cmd_buffer); 1588bf215546Sopenharmony_ci struct v3dv_job *job = 1589bf215546Sopenharmony_ci v3dv_cmd_buffer_subpass_resume(cmd_buffer, 1590bf215546Sopenharmony_ci cmd_buffer->state.subpass_idx); 1591bf215546Sopenharmony_ci if (!job) 1592bf215546Sopenharmony_ci return NULL; 1593bf215546Sopenharmony_ci 1594bf215546Sopenharmony_ci /* FIXME: we can do better than all barriers */ 1595bf215546Sopenharmony_ci job->serialize = V3DV_BARRIER_ALL; 1596bf215546Sopenharmony_ci job->needs_bcl_sync = is_bcl_barrier; 1597bf215546Sopenharmony_ci return job; 1598bf215546Sopenharmony_ci} 1599bf215546Sopenharmony_ci 1600bf215546Sopenharmony_cistatic void 1601bf215546Sopenharmony_cicmd_buffer_copy_secondary_end_query_state(struct v3dv_cmd_buffer *primary, 1602bf215546Sopenharmony_ci struct v3dv_cmd_buffer *secondary) 1603bf215546Sopenharmony_ci{ 1604bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *p_state = &primary->state; 1605bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *s_state = &secondary->state; 1606bf215546Sopenharmony_ci 1607bf215546Sopenharmony_ci const uint32_t total_state_count = 1608bf215546Sopenharmony_ci p_state->query.end.used_count + s_state->query.end.used_count; 1609bf215546Sopenharmony_ci v3dv_cmd_buffer_ensure_array_state(primary, 1610bf215546Sopenharmony_ci sizeof(struct v3dv_end_query_cpu_job_info), 1611bf215546Sopenharmony_ci total_state_count, 1612bf215546Sopenharmony_ci &p_state->query.end.alloc_count, 1613bf215546Sopenharmony_ci (void **) &p_state->query.end.states); 1614bf215546Sopenharmony_ci v3dv_return_if_oom(primary, NULL); 1615bf215546Sopenharmony_ci 1616bf215546Sopenharmony_ci for (uint32_t i = 0; i < s_state->query.end.used_count; i++) { 1617bf215546Sopenharmony_ci const struct v3dv_end_query_cpu_job_info *s_qstate = 1618bf215546Sopenharmony_ci &secondary->state.query.end.states[i]; 1619bf215546Sopenharmony_ci 1620bf215546Sopenharmony_ci struct v3dv_end_query_cpu_job_info *p_qstate = 1621bf215546Sopenharmony_ci &p_state->query.end.states[p_state->query.end.used_count++]; 1622bf215546Sopenharmony_ci 1623bf215546Sopenharmony_ci p_qstate->pool = s_qstate->pool; 1624bf215546Sopenharmony_ci p_qstate->query = s_qstate->query; 1625bf215546Sopenharmony_ci } 1626bf215546Sopenharmony_ci} 1627bf215546Sopenharmony_ci 1628bf215546Sopenharmony_civoid 1629bf215546Sopenharmony_civ3dX(cmd_buffer_execute_inside_pass)(struct v3dv_cmd_buffer *primary, 1630bf215546Sopenharmony_ci uint32_t cmd_buffer_count, 1631bf215546Sopenharmony_ci const VkCommandBuffer *cmd_buffers) 1632bf215546Sopenharmony_ci{ 1633bf215546Sopenharmony_ci assert(primary->state.job); 1634bf215546Sopenharmony_ci 1635bf215546Sopenharmony_ci /* Emit occlusion query state if needed so the draw calls inside our 1636bf215546Sopenharmony_ci * secondaries update the counters. 1637bf215546Sopenharmony_ci */ 1638bf215546Sopenharmony_ci bool has_occlusion_query = 1639bf215546Sopenharmony_ci primary->state.dirty & V3DV_CMD_DIRTY_OCCLUSION_QUERY; 1640bf215546Sopenharmony_ci if (has_occlusion_query) 1641bf215546Sopenharmony_ci v3dX(cmd_buffer_emit_occlusion_query)(primary); 1642bf215546Sopenharmony_ci 1643bf215546Sopenharmony_ci /* FIXME: if our primary job tiling doesn't enable MSSA but any of the 1644bf215546Sopenharmony_ci * pipelines used by the secondaries do, we need to re-start the primary 1645bf215546Sopenharmony_ci * job to enable MSAA. See cmd_buffer_restart_job_for_msaa_if_needed. 1646bf215546Sopenharmony_ci */ 1647bf215546Sopenharmony_ci struct v3dv_barrier_state pending_barrier = { 0 }; 1648bf215546Sopenharmony_ci for (uint32_t i = 0; i < cmd_buffer_count; i++) { 1649bf215546Sopenharmony_ci V3DV_FROM_HANDLE(v3dv_cmd_buffer, secondary, cmd_buffers[i]); 1650bf215546Sopenharmony_ci 1651bf215546Sopenharmony_ci assert(secondary->usage_flags & 1652bf215546Sopenharmony_ci VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT); 1653bf215546Sopenharmony_ci 1654bf215546Sopenharmony_ci list_for_each_entry(struct v3dv_job, secondary_job, 1655bf215546Sopenharmony_ci &secondary->jobs, list_link) { 1656bf215546Sopenharmony_ci if (secondary_job->type == V3DV_JOB_TYPE_GPU_CL_SECONDARY) { 1657bf215546Sopenharmony_ci /* If the job is a CL, then we branch to it from the primary BCL. 1658bf215546Sopenharmony_ci * In this case the secondary's BCL is finished with a 1659bf215546Sopenharmony_ci * RETURN_FROM_SUB_LIST command to return back to the primary BCL 1660bf215546Sopenharmony_ci * once we are done executing it. 1661bf215546Sopenharmony_ci */ 1662bf215546Sopenharmony_ci assert(v3dv_cl_offset(&secondary_job->rcl) == 0); 1663bf215546Sopenharmony_ci assert(secondary_job->bcl.bo); 1664bf215546Sopenharmony_ci 1665bf215546Sopenharmony_ci /* Sanity check that secondary BCL ends with RETURN_FROM_SUB_LIST */ 1666bf215546Sopenharmony_ci STATIC_ASSERT(cl_packet_length(RETURN_FROM_SUB_LIST) == 1); 1667bf215546Sopenharmony_ci assert(v3dv_cl_offset(&secondary_job->bcl) >= 1); 1668bf215546Sopenharmony_ci assert(*(((uint8_t *)secondary_job->bcl.next) - 1) == 1669bf215546Sopenharmony_ci V3DX(RETURN_FROM_SUB_LIST_opcode)); 1670bf215546Sopenharmony_ci 1671bf215546Sopenharmony_ci /* If this secondary has any barriers (or we had any pending barrier 1672bf215546Sopenharmony_ci * to apply), then we can't just branch to it from the primary, we 1673bf215546Sopenharmony_ci * need to split the primary to create a new job that can consume 1674bf215546Sopenharmony_ci * the barriers first. 1675bf215546Sopenharmony_ci * 1676bf215546Sopenharmony_ci * FIXME: in this case, maybe just copy the secondary BCL without 1677bf215546Sopenharmony_ci * the RETURN_FROM_SUB_LIST into the primary job to skip the 1678bf215546Sopenharmony_ci * branch? 1679bf215546Sopenharmony_ci */ 1680bf215546Sopenharmony_ci struct v3dv_job *primary_job = primary->state.job; 1681bf215546Sopenharmony_ci if (!primary_job || secondary_job->serialize || 1682bf215546Sopenharmony_ci pending_barrier.dst_mask) { 1683bf215546Sopenharmony_ci const bool needs_bcl_barrier = 1684bf215546Sopenharmony_ci secondary_job->needs_bcl_sync || 1685bf215546Sopenharmony_ci pending_barrier.bcl_buffer_access || 1686bf215546Sopenharmony_ci pending_barrier.bcl_image_access; 1687bf215546Sopenharmony_ci 1688bf215546Sopenharmony_ci primary_job = 1689bf215546Sopenharmony_ci cmd_buffer_subpass_split_for_barrier(primary, 1690bf215546Sopenharmony_ci needs_bcl_barrier); 1691bf215546Sopenharmony_ci v3dv_return_if_oom(primary, NULL); 1692bf215546Sopenharmony_ci 1693bf215546Sopenharmony_ci /* Since we have created a new primary we need to re-emit 1694bf215546Sopenharmony_ci * occlusion query state. 1695bf215546Sopenharmony_ci */ 1696bf215546Sopenharmony_ci if (has_occlusion_query) 1697bf215546Sopenharmony_ci v3dX(cmd_buffer_emit_occlusion_query)(primary); 1698bf215546Sopenharmony_ci } 1699bf215546Sopenharmony_ci 1700bf215546Sopenharmony_ci /* Make sure our primary job has all required BO references */ 1701bf215546Sopenharmony_ci set_foreach(secondary_job->bos, entry) { 1702bf215546Sopenharmony_ci struct v3dv_bo *bo = (struct v3dv_bo *)entry->key; 1703bf215546Sopenharmony_ci v3dv_job_add_bo(primary_job, bo); 1704bf215546Sopenharmony_ci } 1705bf215546Sopenharmony_ci 1706bf215546Sopenharmony_ci /* Emit required branch instructions. We expect each of these 1707bf215546Sopenharmony_ci * to end with a corresponding 'return from sub list' item. 1708bf215546Sopenharmony_ci */ 1709bf215546Sopenharmony_ci list_for_each_entry(struct v3dv_bo, bcl_bo, 1710bf215546Sopenharmony_ci &secondary_job->bcl.bo_list, list_link) { 1711bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&primary_job->bcl, 1712bf215546Sopenharmony_ci cl_packet_length(BRANCH_TO_SUB_LIST)); 1713bf215546Sopenharmony_ci v3dv_return_if_oom(primary, NULL); 1714bf215546Sopenharmony_ci cl_emit(&primary_job->bcl, BRANCH_TO_SUB_LIST, branch) { 1715bf215546Sopenharmony_ci branch.address = v3dv_cl_address(bcl_bo, 0); 1716bf215546Sopenharmony_ci } 1717bf215546Sopenharmony_ci } 1718bf215546Sopenharmony_ci 1719bf215546Sopenharmony_ci primary_job->tmu_dirty_rcl |= secondary_job->tmu_dirty_rcl; 1720bf215546Sopenharmony_ci } else { 1721bf215546Sopenharmony_ci /* This is a regular job (CPU or GPU), so just finish the current 1722bf215546Sopenharmony_ci * primary job (if any) and then add the secondary job to the 1723bf215546Sopenharmony_ci * primary's job list right after it. 1724bf215546Sopenharmony_ci */ 1725bf215546Sopenharmony_ci v3dv_cmd_buffer_finish_job(primary); 1726bf215546Sopenharmony_ci v3dv_job_clone_in_cmd_buffer(secondary_job, primary); 1727bf215546Sopenharmony_ci if (pending_barrier.dst_mask) { 1728bf215546Sopenharmony_ci /* FIXME: do the same we do for primaries and only choose the 1729bf215546Sopenharmony_ci * relevant src masks. 1730bf215546Sopenharmony_ci */ 1731bf215546Sopenharmony_ci secondary_job->serialize = pending_barrier.src_mask_graphics | 1732bf215546Sopenharmony_ci pending_barrier.src_mask_transfer | 1733bf215546Sopenharmony_ci pending_barrier.src_mask_compute; 1734bf215546Sopenharmony_ci if (pending_barrier.bcl_buffer_access || 1735bf215546Sopenharmony_ci pending_barrier.bcl_image_access) { 1736bf215546Sopenharmony_ci secondary_job->needs_bcl_sync = true; 1737bf215546Sopenharmony_ci } 1738bf215546Sopenharmony_ci } 1739bf215546Sopenharmony_ci } 1740bf215546Sopenharmony_ci 1741bf215546Sopenharmony_ci memset(&pending_barrier, 0, sizeof(pending_barrier)); 1742bf215546Sopenharmony_ci } 1743bf215546Sopenharmony_ci 1744bf215546Sopenharmony_ci /* If the secondary has recorded any vkCmdEndQuery commands, we need to 1745bf215546Sopenharmony_ci * copy this state to the primary so it is processed properly when the 1746bf215546Sopenharmony_ci * current primary job is finished. 1747bf215546Sopenharmony_ci */ 1748bf215546Sopenharmony_ci cmd_buffer_copy_secondary_end_query_state(primary, secondary); 1749bf215546Sopenharmony_ci 1750bf215546Sopenharmony_ci /* If this secondary had any pending barrier state we will need that 1751bf215546Sopenharmony_ci * barrier state consumed with whatever comes next in the primary. 1752bf215546Sopenharmony_ci */ 1753bf215546Sopenharmony_ci assert(secondary->state.barrier.dst_mask || 1754bf215546Sopenharmony_ci (!secondary->state.barrier.bcl_buffer_access && 1755bf215546Sopenharmony_ci !secondary->state.barrier.bcl_image_access)); 1756bf215546Sopenharmony_ci 1757bf215546Sopenharmony_ci pending_barrier = secondary->state.barrier; 1758bf215546Sopenharmony_ci } 1759bf215546Sopenharmony_ci 1760bf215546Sopenharmony_ci if (pending_barrier.dst_mask) { 1761bf215546Sopenharmony_ci v3dv_cmd_buffer_merge_barrier_state(&primary->state.barrier, 1762bf215546Sopenharmony_ci &pending_barrier); 1763bf215546Sopenharmony_ci } 1764bf215546Sopenharmony_ci} 1765bf215546Sopenharmony_ci 1766bf215546Sopenharmony_cistatic void 1767bf215546Sopenharmony_ciemit_gs_shader_state_record(struct v3dv_job *job, 1768bf215546Sopenharmony_ci struct v3dv_bo *assembly_bo, 1769bf215546Sopenharmony_ci struct v3dv_shader_variant *gs_bin, 1770bf215546Sopenharmony_ci struct v3dv_cl_reloc gs_bin_uniforms, 1771bf215546Sopenharmony_ci struct v3dv_shader_variant *gs, 1772bf215546Sopenharmony_ci struct v3dv_cl_reloc gs_render_uniforms) 1773bf215546Sopenharmony_ci{ 1774bf215546Sopenharmony_ci cl_emit(&job->indirect, GEOMETRY_SHADER_STATE_RECORD, shader) { 1775bf215546Sopenharmony_ci shader.geometry_bin_mode_shader_code_address = 1776bf215546Sopenharmony_ci v3dv_cl_address(assembly_bo, gs_bin->assembly_offset); 1777bf215546Sopenharmony_ci shader.geometry_bin_mode_shader_4_way_threadable = 1778bf215546Sopenharmony_ci gs_bin->prog_data.gs->base.threads == 4; 1779bf215546Sopenharmony_ci shader.geometry_bin_mode_shader_start_in_final_thread_section = 1780bf215546Sopenharmony_ci gs_bin->prog_data.gs->base.single_seg; 1781bf215546Sopenharmony_ci shader.geometry_bin_mode_shader_propagate_nans = true; 1782bf215546Sopenharmony_ci shader.geometry_bin_mode_shader_uniforms_address = 1783bf215546Sopenharmony_ci gs_bin_uniforms; 1784bf215546Sopenharmony_ci 1785bf215546Sopenharmony_ci shader.geometry_render_mode_shader_code_address = 1786bf215546Sopenharmony_ci v3dv_cl_address(assembly_bo, gs->assembly_offset); 1787bf215546Sopenharmony_ci shader.geometry_render_mode_shader_4_way_threadable = 1788bf215546Sopenharmony_ci gs->prog_data.gs->base.threads == 4; 1789bf215546Sopenharmony_ci shader.geometry_render_mode_shader_start_in_final_thread_section = 1790bf215546Sopenharmony_ci gs->prog_data.gs->base.single_seg; 1791bf215546Sopenharmony_ci shader.geometry_render_mode_shader_propagate_nans = true; 1792bf215546Sopenharmony_ci shader.geometry_render_mode_shader_uniforms_address = 1793bf215546Sopenharmony_ci gs_render_uniforms; 1794bf215546Sopenharmony_ci } 1795bf215546Sopenharmony_ci} 1796bf215546Sopenharmony_ci 1797bf215546Sopenharmony_cistatic uint8_t 1798bf215546Sopenharmony_civ3d_gs_output_primitive(enum shader_prim prim_type) 1799bf215546Sopenharmony_ci{ 1800bf215546Sopenharmony_ci switch (prim_type) { 1801bf215546Sopenharmony_ci case SHADER_PRIM_POINTS: 1802bf215546Sopenharmony_ci return GEOMETRY_SHADER_POINTS; 1803bf215546Sopenharmony_ci case SHADER_PRIM_LINE_STRIP: 1804bf215546Sopenharmony_ci return GEOMETRY_SHADER_LINE_STRIP; 1805bf215546Sopenharmony_ci case SHADER_PRIM_TRIANGLE_STRIP: 1806bf215546Sopenharmony_ci return GEOMETRY_SHADER_TRI_STRIP; 1807bf215546Sopenharmony_ci default: 1808bf215546Sopenharmony_ci unreachable("Unsupported primitive type"); 1809bf215546Sopenharmony_ci } 1810bf215546Sopenharmony_ci} 1811bf215546Sopenharmony_ci 1812bf215546Sopenharmony_cistatic void 1813bf215546Sopenharmony_ciemit_tes_gs_common_params(struct v3dv_job *job, 1814bf215546Sopenharmony_ci uint8_t gs_out_prim_type, 1815bf215546Sopenharmony_ci uint8_t gs_num_invocations) 1816bf215546Sopenharmony_ci{ 1817bf215546Sopenharmony_ci cl_emit(&job->indirect, TESSELLATION_GEOMETRY_COMMON_PARAMS, shader) { 1818bf215546Sopenharmony_ci shader.tessellation_type = TESSELLATION_TYPE_TRIANGLE; 1819bf215546Sopenharmony_ci shader.tessellation_point_mode = false; 1820bf215546Sopenharmony_ci shader.tessellation_edge_spacing = TESSELLATION_EDGE_SPACING_EVEN; 1821bf215546Sopenharmony_ci shader.tessellation_clockwise = true; 1822bf215546Sopenharmony_ci shader.tessellation_invocations = 1; 1823bf215546Sopenharmony_ci 1824bf215546Sopenharmony_ci shader.geometry_shader_output_format = 1825bf215546Sopenharmony_ci v3d_gs_output_primitive(gs_out_prim_type); 1826bf215546Sopenharmony_ci shader.geometry_shader_instances = gs_num_invocations & 0x1F; 1827bf215546Sopenharmony_ci } 1828bf215546Sopenharmony_ci} 1829bf215546Sopenharmony_ci 1830bf215546Sopenharmony_cistatic uint8_t 1831bf215546Sopenharmony_cisimd_width_to_gs_pack_mode(uint32_t width) 1832bf215546Sopenharmony_ci{ 1833bf215546Sopenharmony_ci switch (width) { 1834bf215546Sopenharmony_ci case 16: 1835bf215546Sopenharmony_ci return V3D_PACK_MODE_16_WAY; 1836bf215546Sopenharmony_ci case 8: 1837bf215546Sopenharmony_ci return V3D_PACK_MODE_8_WAY; 1838bf215546Sopenharmony_ci case 4: 1839bf215546Sopenharmony_ci return V3D_PACK_MODE_4_WAY; 1840bf215546Sopenharmony_ci case 1: 1841bf215546Sopenharmony_ci return V3D_PACK_MODE_1_WAY; 1842bf215546Sopenharmony_ci default: 1843bf215546Sopenharmony_ci unreachable("Invalid SIMD width"); 1844bf215546Sopenharmony_ci }; 1845bf215546Sopenharmony_ci} 1846bf215546Sopenharmony_ci 1847bf215546Sopenharmony_cistatic void 1848bf215546Sopenharmony_ciemit_tes_gs_shader_params(struct v3dv_job *job, 1849bf215546Sopenharmony_ci uint32_t gs_simd, 1850bf215546Sopenharmony_ci uint32_t gs_vpm_output_size, 1851bf215546Sopenharmony_ci uint32_t gs_max_vpm_input_size_per_batch) 1852bf215546Sopenharmony_ci{ 1853bf215546Sopenharmony_ci cl_emit(&job->indirect, TESSELLATION_GEOMETRY_SHADER_PARAMS, shader) { 1854bf215546Sopenharmony_ci shader.tcs_batch_flush_mode = V3D_TCS_FLUSH_MODE_FULLY_PACKED; 1855bf215546Sopenharmony_ci shader.per_patch_data_column_depth = 1; 1856bf215546Sopenharmony_ci shader.tcs_output_segment_size_in_sectors = 1; 1857bf215546Sopenharmony_ci shader.tcs_output_segment_pack_mode = V3D_PACK_MODE_16_WAY; 1858bf215546Sopenharmony_ci shader.tes_output_segment_size_in_sectors = 1; 1859bf215546Sopenharmony_ci shader.tes_output_segment_pack_mode = V3D_PACK_MODE_16_WAY; 1860bf215546Sopenharmony_ci shader.gs_output_segment_size_in_sectors = gs_vpm_output_size; 1861bf215546Sopenharmony_ci shader.gs_output_segment_pack_mode = 1862bf215546Sopenharmony_ci simd_width_to_gs_pack_mode(gs_simd); 1863bf215546Sopenharmony_ci shader.tbg_max_patches_per_tcs_batch = 1; 1864bf215546Sopenharmony_ci shader.tbg_max_extra_vertex_segs_for_patches_after_first = 0; 1865bf215546Sopenharmony_ci shader.tbg_min_tcs_output_segments_required_in_play = 1; 1866bf215546Sopenharmony_ci shader.tbg_min_per_patch_data_segments_required_in_play = 1; 1867bf215546Sopenharmony_ci shader.tpg_max_patches_per_tes_batch = 1; 1868bf215546Sopenharmony_ci shader.tpg_max_vertex_segments_per_tes_batch = 0; 1869bf215546Sopenharmony_ci shader.tpg_max_tcs_output_segments_per_tes_batch = 1; 1870bf215546Sopenharmony_ci shader.tpg_min_tes_output_segments_required_in_play = 1; 1871bf215546Sopenharmony_ci shader.gbg_max_tes_output_vertex_segments_per_gs_batch = 1872bf215546Sopenharmony_ci gs_max_vpm_input_size_per_batch; 1873bf215546Sopenharmony_ci shader.gbg_min_gs_output_segments_required_in_play = 1; 1874bf215546Sopenharmony_ci } 1875bf215546Sopenharmony_ci} 1876bf215546Sopenharmony_ci 1877bf215546Sopenharmony_civoid 1878bf215546Sopenharmony_civ3dX(cmd_buffer_emit_gl_shader_state)(struct v3dv_cmd_buffer *cmd_buffer) 1879bf215546Sopenharmony_ci{ 1880bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 1881bf215546Sopenharmony_ci assert(job); 1882bf215546Sopenharmony_ci 1883bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 1884bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = state->gfx.pipeline; 1885bf215546Sopenharmony_ci assert(pipeline); 1886bf215546Sopenharmony_ci 1887bf215546Sopenharmony_ci struct v3dv_shader_variant *vs_variant = 1888bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]; 1889bf215546Sopenharmony_ci struct v3d_vs_prog_data *prog_data_vs = vs_variant->prog_data.vs; 1890bf215546Sopenharmony_ci 1891bf215546Sopenharmony_ci struct v3dv_shader_variant *vs_bin_variant = 1892bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]; 1893bf215546Sopenharmony_ci struct v3d_vs_prog_data *prog_data_vs_bin = vs_bin_variant->prog_data.vs; 1894bf215546Sopenharmony_ci 1895bf215546Sopenharmony_ci struct v3dv_shader_variant *fs_variant = 1896bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]; 1897bf215546Sopenharmony_ci struct v3d_fs_prog_data *prog_data_fs = fs_variant->prog_data.fs; 1898bf215546Sopenharmony_ci 1899bf215546Sopenharmony_ci struct v3dv_shader_variant *gs_variant = NULL; 1900bf215546Sopenharmony_ci struct v3dv_shader_variant *gs_bin_variant = NULL; 1901bf215546Sopenharmony_ci struct v3d_gs_prog_data *prog_data_gs = NULL; 1902bf215546Sopenharmony_ci struct v3d_gs_prog_data *prog_data_gs_bin = NULL; 1903bf215546Sopenharmony_ci if (pipeline->has_gs) { 1904bf215546Sopenharmony_ci gs_variant = 1905bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]; 1906bf215546Sopenharmony_ci prog_data_gs = gs_variant->prog_data.gs; 1907bf215546Sopenharmony_ci 1908bf215546Sopenharmony_ci gs_bin_variant = 1909bf215546Sopenharmony_ci pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]; 1910bf215546Sopenharmony_ci prog_data_gs_bin = gs_bin_variant->prog_data.gs; 1911bf215546Sopenharmony_ci } 1912bf215546Sopenharmony_ci 1913bf215546Sopenharmony_ci /* Update the cache dirty flag based on the shader progs data */ 1914bf215546Sopenharmony_ci job->tmu_dirty_rcl |= prog_data_vs_bin->base.tmu_dirty_rcl; 1915bf215546Sopenharmony_ci job->tmu_dirty_rcl |= prog_data_vs->base.tmu_dirty_rcl; 1916bf215546Sopenharmony_ci job->tmu_dirty_rcl |= prog_data_fs->base.tmu_dirty_rcl; 1917bf215546Sopenharmony_ci if (pipeline->has_gs) { 1918bf215546Sopenharmony_ci job->tmu_dirty_rcl |= prog_data_gs_bin->base.tmu_dirty_rcl; 1919bf215546Sopenharmony_ci job->tmu_dirty_rcl |= prog_data_gs->base.tmu_dirty_rcl; 1920bf215546Sopenharmony_ci } 1921bf215546Sopenharmony_ci 1922bf215546Sopenharmony_ci /* See GFXH-930 workaround below */ 1923bf215546Sopenharmony_ci uint32_t num_elements_to_emit = MAX2(pipeline->va_count, 1); 1924bf215546Sopenharmony_ci 1925bf215546Sopenharmony_ci uint32_t shader_state_record_length = 1926bf215546Sopenharmony_ci cl_packet_length(GL_SHADER_STATE_RECORD); 1927bf215546Sopenharmony_ci if (pipeline->has_gs) { 1928bf215546Sopenharmony_ci shader_state_record_length += 1929bf215546Sopenharmony_ci cl_packet_length(GEOMETRY_SHADER_STATE_RECORD) + 1930bf215546Sopenharmony_ci cl_packet_length(TESSELLATION_GEOMETRY_COMMON_PARAMS) + 1931bf215546Sopenharmony_ci 2 * cl_packet_length(TESSELLATION_GEOMETRY_SHADER_PARAMS); 1932bf215546Sopenharmony_ci } 1933bf215546Sopenharmony_ci 1934bf215546Sopenharmony_ci uint32_t shader_rec_offset = 1935bf215546Sopenharmony_ci v3dv_cl_ensure_space(&job->indirect, 1936bf215546Sopenharmony_ci shader_state_record_length + 1937bf215546Sopenharmony_ci num_elements_to_emit * 1938bf215546Sopenharmony_ci cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD), 1939bf215546Sopenharmony_ci 32); 1940bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 1941bf215546Sopenharmony_ci 1942bf215546Sopenharmony_ci struct v3dv_bo *assembly_bo = pipeline->shared_data->assembly_bo; 1943bf215546Sopenharmony_ci 1944bf215546Sopenharmony_ci if (pipeline->has_gs) { 1945bf215546Sopenharmony_ci emit_gs_shader_state_record(job, 1946bf215546Sopenharmony_ci assembly_bo, 1947bf215546Sopenharmony_ci gs_bin_variant, 1948bf215546Sopenharmony_ci cmd_buffer->state.uniforms.gs_bin, 1949bf215546Sopenharmony_ci gs_variant, 1950bf215546Sopenharmony_ci cmd_buffer->state.uniforms.gs); 1951bf215546Sopenharmony_ci 1952bf215546Sopenharmony_ci emit_tes_gs_common_params(job, 1953bf215546Sopenharmony_ci prog_data_gs->out_prim_type, 1954bf215546Sopenharmony_ci prog_data_gs->num_invocations); 1955bf215546Sopenharmony_ci 1956bf215546Sopenharmony_ci emit_tes_gs_shader_params(job, 1957bf215546Sopenharmony_ci pipeline->vpm_cfg_bin.gs_width, 1958bf215546Sopenharmony_ci pipeline->vpm_cfg_bin.Gd, 1959bf215546Sopenharmony_ci pipeline->vpm_cfg_bin.Gv); 1960bf215546Sopenharmony_ci 1961bf215546Sopenharmony_ci emit_tes_gs_shader_params(job, 1962bf215546Sopenharmony_ci pipeline->vpm_cfg.gs_width, 1963bf215546Sopenharmony_ci pipeline->vpm_cfg.Gd, 1964bf215546Sopenharmony_ci pipeline->vpm_cfg.Gv); 1965bf215546Sopenharmony_ci } 1966bf215546Sopenharmony_ci 1967bf215546Sopenharmony_ci struct v3dv_bo *default_attribute_values = 1968bf215546Sopenharmony_ci pipeline->default_attribute_values != NULL ? 1969bf215546Sopenharmony_ci pipeline->default_attribute_values : 1970bf215546Sopenharmony_ci pipeline->device->default_attribute_float; 1971bf215546Sopenharmony_ci 1972bf215546Sopenharmony_ci cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_RECORD, 1973bf215546Sopenharmony_ci pipeline->shader_state_record, shader) { 1974bf215546Sopenharmony_ci 1975bf215546Sopenharmony_ci /* FIXME: we are setting this values here and during the 1976bf215546Sopenharmony_ci * prepacking. This is because both cl_emit_with_prepacked and v3dvx_pack 1977bf215546Sopenharmony_ci * asserts for minimum values of these. It would be good to get 1978bf215546Sopenharmony_ci * v3dvx_pack to assert on the final value if possible 1979bf215546Sopenharmony_ci */ 1980bf215546Sopenharmony_ci shader.min_coord_shader_input_segments_required_in_play = 1981bf215546Sopenharmony_ci pipeline->vpm_cfg_bin.As; 1982bf215546Sopenharmony_ci shader.min_vertex_shader_input_segments_required_in_play = 1983bf215546Sopenharmony_ci pipeline->vpm_cfg.As; 1984bf215546Sopenharmony_ci 1985bf215546Sopenharmony_ci shader.coordinate_shader_code_address = 1986bf215546Sopenharmony_ci v3dv_cl_address(assembly_bo, vs_bin_variant->assembly_offset); 1987bf215546Sopenharmony_ci shader.vertex_shader_code_address = 1988bf215546Sopenharmony_ci v3dv_cl_address(assembly_bo, vs_variant->assembly_offset); 1989bf215546Sopenharmony_ci shader.fragment_shader_code_address = 1990bf215546Sopenharmony_ci v3dv_cl_address(assembly_bo, fs_variant->assembly_offset); 1991bf215546Sopenharmony_ci 1992bf215546Sopenharmony_ci shader.coordinate_shader_uniforms_address = cmd_buffer->state.uniforms.vs_bin; 1993bf215546Sopenharmony_ci shader.vertex_shader_uniforms_address = cmd_buffer->state.uniforms.vs; 1994bf215546Sopenharmony_ci shader.fragment_shader_uniforms_address = cmd_buffer->state.uniforms.fs; 1995bf215546Sopenharmony_ci 1996bf215546Sopenharmony_ci shader.address_of_default_attribute_values = 1997bf215546Sopenharmony_ci v3dv_cl_address(default_attribute_values, 0); 1998bf215546Sopenharmony_ci 1999bf215546Sopenharmony_ci shader.any_shader_reads_hardware_written_primitive_id = 2000bf215546Sopenharmony_ci (pipeline->has_gs && prog_data_gs->uses_pid) || prog_data_fs->uses_pid; 2001bf215546Sopenharmony_ci shader.insert_primitive_id_as_first_varying_to_fragment_shader = 2002bf215546Sopenharmony_ci !pipeline->has_gs && prog_data_fs->uses_pid; 2003bf215546Sopenharmony_ci } 2004bf215546Sopenharmony_ci 2005bf215546Sopenharmony_ci /* Upload vertex element attributes (SHADER_STATE_ATTRIBUTE_RECORD) */ 2006bf215546Sopenharmony_ci bool cs_loaded_any = false; 2007bf215546Sopenharmony_ci const bool cs_uses_builtins = prog_data_vs_bin->uses_iid || 2008bf215546Sopenharmony_ci prog_data_vs_bin->uses_biid || 2009bf215546Sopenharmony_ci prog_data_vs_bin->uses_vid; 2010bf215546Sopenharmony_ci const uint32_t packet_length = 2011bf215546Sopenharmony_ci cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD); 2012bf215546Sopenharmony_ci 2013bf215546Sopenharmony_ci uint32_t emitted_va_count = 0; 2014bf215546Sopenharmony_ci for (uint32_t i = 0; emitted_va_count < pipeline->va_count; i++) { 2015bf215546Sopenharmony_ci assert(i < MAX_VERTEX_ATTRIBS); 2016bf215546Sopenharmony_ci 2017bf215546Sopenharmony_ci if (pipeline->va[i].vk_format == VK_FORMAT_UNDEFINED) 2018bf215546Sopenharmony_ci continue; 2019bf215546Sopenharmony_ci 2020bf215546Sopenharmony_ci const uint32_t binding = pipeline->va[i].binding; 2021bf215546Sopenharmony_ci 2022bf215546Sopenharmony_ci /* We store each vertex attribute in the array using its driver location 2023bf215546Sopenharmony_ci * as index. 2024bf215546Sopenharmony_ci */ 2025bf215546Sopenharmony_ci const uint32_t location = i; 2026bf215546Sopenharmony_ci 2027bf215546Sopenharmony_ci struct v3dv_vertex_binding *c_vb = &cmd_buffer->state.vertex_bindings[binding]; 2028bf215546Sopenharmony_ci 2029bf215546Sopenharmony_ci cl_emit_with_prepacked(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, 2030bf215546Sopenharmony_ci &pipeline->vertex_attrs[i * packet_length], attr) { 2031bf215546Sopenharmony_ci 2032bf215546Sopenharmony_ci assert(c_vb->buffer->mem->bo); 2033bf215546Sopenharmony_ci attr.address = v3dv_cl_address(c_vb->buffer->mem->bo, 2034bf215546Sopenharmony_ci c_vb->buffer->mem_offset + 2035bf215546Sopenharmony_ci pipeline->va[i].offset + 2036bf215546Sopenharmony_ci c_vb->offset); 2037bf215546Sopenharmony_ci 2038bf215546Sopenharmony_ci attr.number_of_values_read_by_coordinate_shader = 2039bf215546Sopenharmony_ci prog_data_vs_bin->vattr_sizes[location]; 2040bf215546Sopenharmony_ci attr.number_of_values_read_by_vertex_shader = 2041bf215546Sopenharmony_ci prog_data_vs->vattr_sizes[location]; 2042bf215546Sopenharmony_ci 2043bf215546Sopenharmony_ci /* GFXH-930: At least one attribute must be enabled and read by CS 2044bf215546Sopenharmony_ci * and VS. If we have attributes being consumed by the VS but not 2045bf215546Sopenharmony_ci * the CS, then set up a dummy load of the last attribute into the 2046bf215546Sopenharmony_ci * CS's VPM inputs. (Since CS is just dead-code-elimination compared 2047bf215546Sopenharmony_ci * to VS, we can't have CS loading but not VS). 2048bf215546Sopenharmony_ci * 2049bf215546Sopenharmony_ci * GFXH-1602: first attribute must be active if using builtins. 2050bf215546Sopenharmony_ci */ 2051bf215546Sopenharmony_ci if (prog_data_vs_bin->vattr_sizes[location]) 2052bf215546Sopenharmony_ci cs_loaded_any = true; 2053bf215546Sopenharmony_ci 2054bf215546Sopenharmony_ci if (i == 0 && cs_uses_builtins && !cs_loaded_any) { 2055bf215546Sopenharmony_ci attr.number_of_values_read_by_coordinate_shader = 1; 2056bf215546Sopenharmony_ci cs_loaded_any = true; 2057bf215546Sopenharmony_ci } else if (i == pipeline->va_count - 1 && !cs_loaded_any) { 2058bf215546Sopenharmony_ci attr.number_of_values_read_by_coordinate_shader = 1; 2059bf215546Sopenharmony_ci cs_loaded_any = true; 2060bf215546Sopenharmony_ci } 2061bf215546Sopenharmony_ci 2062bf215546Sopenharmony_ci attr.maximum_index = 0xffffff; 2063bf215546Sopenharmony_ci } 2064bf215546Sopenharmony_ci 2065bf215546Sopenharmony_ci emitted_va_count++; 2066bf215546Sopenharmony_ci } 2067bf215546Sopenharmony_ci 2068bf215546Sopenharmony_ci if (pipeline->va_count == 0) { 2069bf215546Sopenharmony_ci /* GFXH-930: At least one attribute must be enabled and read 2070bf215546Sopenharmony_ci * by CS and VS. If we have no attributes being consumed by 2071bf215546Sopenharmony_ci * the shader, set up a dummy to be loaded into the VPM. 2072bf215546Sopenharmony_ci */ 2073bf215546Sopenharmony_ci cl_emit(&job->indirect, GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) { 2074bf215546Sopenharmony_ci /* Valid address of data whose value will be unused. */ 2075bf215546Sopenharmony_ci attr.address = v3dv_cl_address(job->indirect.bo, 0); 2076bf215546Sopenharmony_ci 2077bf215546Sopenharmony_ci attr.type = ATTRIBUTE_FLOAT; 2078bf215546Sopenharmony_ci attr.stride = 0; 2079bf215546Sopenharmony_ci attr.vec_size = 1; 2080bf215546Sopenharmony_ci 2081bf215546Sopenharmony_ci attr.number_of_values_read_by_coordinate_shader = 1; 2082bf215546Sopenharmony_ci attr.number_of_values_read_by_vertex_shader = 1; 2083bf215546Sopenharmony_ci } 2084bf215546Sopenharmony_ci } 2085bf215546Sopenharmony_ci 2086bf215546Sopenharmony_ci if (cmd_buffer->state.dirty & V3DV_CMD_DIRTY_PIPELINE) { 2087bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, 2088bf215546Sopenharmony_ci sizeof(pipeline->vcm_cache_size)); 2089bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2090bf215546Sopenharmony_ci 2091bf215546Sopenharmony_ci cl_emit_prepacked(&job->bcl, &pipeline->vcm_cache_size); 2092bf215546Sopenharmony_ci } 2093bf215546Sopenharmony_ci 2094bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch(&job->bcl, 2095bf215546Sopenharmony_ci cl_packet_length(GL_SHADER_STATE)); 2096bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2097bf215546Sopenharmony_ci 2098bf215546Sopenharmony_ci if (pipeline->has_gs) { 2099bf215546Sopenharmony_ci cl_emit(&job->bcl, GL_SHADER_STATE_INCLUDING_GS, state) { 2100bf215546Sopenharmony_ci state.address = v3dv_cl_address(job->indirect.bo, shader_rec_offset); 2101bf215546Sopenharmony_ci state.number_of_attribute_arrays = num_elements_to_emit; 2102bf215546Sopenharmony_ci } 2103bf215546Sopenharmony_ci } else { 2104bf215546Sopenharmony_ci cl_emit(&job->bcl, GL_SHADER_STATE, state) { 2105bf215546Sopenharmony_ci state.address = v3dv_cl_address(job->indirect.bo, shader_rec_offset); 2106bf215546Sopenharmony_ci state.number_of_attribute_arrays = num_elements_to_emit; 2107bf215546Sopenharmony_ci } 2108bf215546Sopenharmony_ci } 2109bf215546Sopenharmony_ci 2110bf215546Sopenharmony_ci /* Clearing push constants and descriptor sets for all stages is not quite 2111bf215546Sopenharmony_ci * correct (some shader stages may not be used at all or they may not be 2112bf215546Sopenharmony_ci * consuming push constants), however this is not relevant because if we 2113bf215546Sopenharmony_ci * bind a different pipeline we always have to rebuild the uniform streams. 2114bf215546Sopenharmony_ci */ 2115bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~(V3DV_CMD_DIRTY_VERTEX_BUFFER | 2116bf215546Sopenharmony_ci V3DV_CMD_DIRTY_DESCRIPTOR_SETS | 2117bf215546Sopenharmony_ci V3DV_CMD_DIRTY_PUSH_CONSTANTS); 2118bf215546Sopenharmony_ci cmd_buffer->state.dirty_descriptor_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS; 2119bf215546Sopenharmony_ci cmd_buffer->state.dirty_push_constants_stages &= ~VK_SHADER_STAGE_ALL_GRAPHICS; 2120bf215546Sopenharmony_ci} 2121bf215546Sopenharmony_ci 2122bf215546Sopenharmony_civoid 2123bf215546Sopenharmony_civ3dX(cmd_buffer_emit_draw)(struct v3dv_cmd_buffer *cmd_buffer, 2124bf215546Sopenharmony_ci struct v3dv_draw_info *info) 2125bf215546Sopenharmony_ci{ 2126bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 2127bf215546Sopenharmony_ci assert(job); 2128bf215546Sopenharmony_ci 2129bf215546Sopenharmony_ci struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 2130bf215546Sopenharmony_ci struct v3dv_pipeline *pipeline = state->gfx.pipeline; 2131bf215546Sopenharmony_ci 2132bf215546Sopenharmony_ci assert(pipeline); 2133bf215546Sopenharmony_ci 2134bf215546Sopenharmony_ci uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); 2135bf215546Sopenharmony_ci 2136bf215546Sopenharmony_ci if (info->first_instance > 0) { 2137bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 2138bf215546Sopenharmony_ci &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE)); 2139bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2140bf215546Sopenharmony_ci 2141bf215546Sopenharmony_ci cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { 2142bf215546Sopenharmony_ci base.base_instance = info->first_instance; 2143bf215546Sopenharmony_ci base.base_vertex = 0; 2144bf215546Sopenharmony_ci } 2145bf215546Sopenharmony_ci } 2146bf215546Sopenharmony_ci 2147bf215546Sopenharmony_ci if (info->instance_count > 1) { 2148bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 2149bf215546Sopenharmony_ci &job->bcl, cl_packet_length(VERTEX_ARRAY_INSTANCED_PRIMS)); 2150bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2151bf215546Sopenharmony_ci 2152bf215546Sopenharmony_ci cl_emit(&job->bcl, VERTEX_ARRAY_INSTANCED_PRIMS, prim) { 2153bf215546Sopenharmony_ci prim.mode = hw_prim_type; 2154bf215546Sopenharmony_ci prim.index_of_first_vertex = info->first_vertex; 2155bf215546Sopenharmony_ci prim.number_of_instances = info->instance_count; 2156bf215546Sopenharmony_ci prim.instance_length = info->vertex_count; 2157bf215546Sopenharmony_ci } 2158bf215546Sopenharmony_ci } else { 2159bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 2160bf215546Sopenharmony_ci &job->bcl, cl_packet_length(VERTEX_ARRAY_PRIMS)); 2161bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2162bf215546Sopenharmony_ci cl_emit(&job->bcl, VERTEX_ARRAY_PRIMS, prim) { 2163bf215546Sopenharmony_ci prim.mode = hw_prim_type; 2164bf215546Sopenharmony_ci prim.length = info->vertex_count; 2165bf215546Sopenharmony_ci prim.index_of_first_vertex = info->first_vertex; 2166bf215546Sopenharmony_ci } 2167bf215546Sopenharmony_ci } 2168bf215546Sopenharmony_ci} 2169bf215546Sopenharmony_ci 2170bf215546Sopenharmony_civoid 2171bf215546Sopenharmony_civ3dX(cmd_buffer_emit_index_buffer)(struct v3dv_cmd_buffer *cmd_buffer) 2172bf215546Sopenharmony_ci{ 2173bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 2174bf215546Sopenharmony_ci assert(job); 2175bf215546Sopenharmony_ci 2176bf215546Sopenharmony_ci /* We flag all state as dirty when we create a new job so make sure we 2177bf215546Sopenharmony_ci * have a valid index buffer before attempting to emit state for it. 2178bf215546Sopenharmony_ci */ 2179bf215546Sopenharmony_ci struct v3dv_buffer *ibuffer = 2180bf215546Sopenharmony_ci v3dv_buffer_from_handle(cmd_buffer->state.index_buffer.buffer); 2181bf215546Sopenharmony_ci if (ibuffer) { 2182bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 2183bf215546Sopenharmony_ci &job->bcl, cl_packet_length(INDEX_BUFFER_SETUP)); 2184bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2185bf215546Sopenharmony_ci 2186bf215546Sopenharmony_ci const uint32_t offset = cmd_buffer->state.index_buffer.offset; 2187bf215546Sopenharmony_ci cl_emit(&job->bcl, INDEX_BUFFER_SETUP, ib) { 2188bf215546Sopenharmony_ci ib.address = v3dv_cl_address(ibuffer->mem->bo, 2189bf215546Sopenharmony_ci ibuffer->mem_offset + offset); 2190bf215546Sopenharmony_ci ib.size = ibuffer->mem->bo->size; 2191bf215546Sopenharmony_ci } 2192bf215546Sopenharmony_ci } 2193bf215546Sopenharmony_ci 2194bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~V3DV_CMD_DIRTY_INDEX_BUFFER; 2195bf215546Sopenharmony_ci} 2196bf215546Sopenharmony_ci 2197bf215546Sopenharmony_civoid 2198bf215546Sopenharmony_civ3dX(cmd_buffer_emit_draw_indexed)(struct v3dv_cmd_buffer *cmd_buffer, 2199bf215546Sopenharmony_ci uint32_t indexCount, 2200bf215546Sopenharmony_ci uint32_t instanceCount, 2201bf215546Sopenharmony_ci uint32_t firstIndex, 2202bf215546Sopenharmony_ci int32_t vertexOffset, 2203bf215546Sopenharmony_ci uint32_t firstInstance) 2204bf215546Sopenharmony_ci{ 2205bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 2206bf215546Sopenharmony_ci assert(job); 2207bf215546Sopenharmony_ci 2208bf215546Sopenharmony_ci const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 2209bf215546Sopenharmony_ci uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); 2210bf215546Sopenharmony_ci uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1; 2211bf215546Sopenharmony_ci uint32_t index_offset = firstIndex * cmd_buffer->state.index_buffer.index_size; 2212bf215546Sopenharmony_ci 2213bf215546Sopenharmony_ci if (vertexOffset != 0 || firstInstance != 0) { 2214bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 2215bf215546Sopenharmony_ci &job->bcl, cl_packet_length(BASE_VERTEX_BASE_INSTANCE)); 2216bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2217bf215546Sopenharmony_ci 2218bf215546Sopenharmony_ci cl_emit(&job->bcl, BASE_VERTEX_BASE_INSTANCE, base) { 2219bf215546Sopenharmony_ci base.base_instance = firstInstance; 2220bf215546Sopenharmony_ci base.base_vertex = vertexOffset; 2221bf215546Sopenharmony_ci } 2222bf215546Sopenharmony_ci } 2223bf215546Sopenharmony_ci 2224bf215546Sopenharmony_ci if (instanceCount == 1) { 2225bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 2226bf215546Sopenharmony_ci &job->bcl, cl_packet_length(INDEXED_PRIM_LIST)); 2227bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2228bf215546Sopenharmony_ci 2229bf215546Sopenharmony_ci cl_emit(&job->bcl, INDEXED_PRIM_LIST, prim) { 2230bf215546Sopenharmony_ci prim.index_type = index_type; 2231bf215546Sopenharmony_ci prim.length = indexCount; 2232bf215546Sopenharmony_ci prim.index_offset = index_offset; 2233bf215546Sopenharmony_ci prim.mode = hw_prim_type; 2234bf215546Sopenharmony_ci prim.enable_primitive_restarts = pipeline->primitive_restart; 2235bf215546Sopenharmony_ci } 2236bf215546Sopenharmony_ci } else if (instanceCount > 1) { 2237bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 2238bf215546Sopenharmony_ci &job->bcl, cl_packet_length(INDEXED_INSTANCED_PRIM_LIST)); 2239bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2240bf215546Sopenharmony_ci 2241bf215546Sopenharmony_ci cl_emit(&job->bcl, INDEXED_INSTANCED_PRIM_LIST, prim) { 2242bf215546Sopenharmony_ci prim.index_type = index_type; 2243bf215546Sopenharmony_ci prim.index_offset = index_offset; 2244bf215546Sopenharmony_ci prim.mode = hw_prim_type; 2245bf215546Sopenharmony_ci prim.enable_primitive_restarts = pipeline->primitive_restart; 2246bf215546Sopenharmony_ci prim.number_of_instances = instanceCount; 2247bf215546Sopenharmony_ci prim.instance_length = indexCount; 2248bf215546Sopenharmony_ci } 2249bf215546Sopenharmony_ci } 2250bf215546Sopenharmony_ci} 2251bf215546Sopenharmony_ci 2252bf215546Sopenharmony_civoid 2253bf215546Sopenharmony_civ3dX(cmd_buffer_emit_draw_indirect)(struct v3dv_cmd_buffer *cmd_buffer, 2254bf215546Sopenharmony_ci struct v3dv_buffer *buffer, 2255bf215546Sopenharmony_ci VkDeviceSize offset, 2256bf215546Sopenharmony_ci uint32_t drawCount, 2257bf215546Sopenharmony_ci uint32_t stride) 2258bf215546Sopenharmony_ci{ 2259bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 2260bf215546Sopenharmony_ci assert(job); 2261bf215546Sopenharmony_ci 2262bf215546Sopenharmony_ci const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 2263bf215546Sopenharmony_ci uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); 2264bf215546Sopenharmony_ci 2265bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 2266bf215546Sopenharmony_ci &job->bcl, cl_packet_length(INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS)); 2267bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2268bf215546Sopenharmony_ci 2269bf215546Sopenharmony_ci cl_emit(&job->bcl, INDIRECT_VERTEX_ARRAY_INSTANCED_PRIMS, prim) { 2270bf215546Sopenharmony_ci prim.mode = hw_prim_type; 2271bf215546Sopenharmony_ci prim.number_of_draw_indirect_array_records = drawCount; 2272bf215546Sopenharmony_ci prim.stride_in_multiples_of_4_bytes = stride >> 2; 2273bf215546Sopenharmony_ci prim.address = v3dv_cl_address(buffer->mem->bo, 2274bf215546Sopenharmony_ci buffer->mem_offset + offset); 2275bf215546Sopenharmony_ci } 2276bf215546Sopenharmony_ci} 2277bf215546Sopenharmony_ci 2278bf215546Sopenharmony_civoid 2279bf215546Sopenharmony_civ3dX(cmd_buffer_emit_indexed_indirect)(struct v3dv_cmd_buffer *cmd_buffer, 2280bf215546Sopenharmony_ci struct v3dv_buffer *buffer, 2281bf215546Sopenharmony_ci VkDeviceSize offset, 2282bf215546Sopenharmony_ci uint32_t drawCount, 2283bf215546Sopenharmony_ci uint32_t stride) 2284bf215546Sopenharmony_ci{ 2285bf215546Sopenharmony_ci struct v3dv_job *job = cmd_buffer->state.job; 2286bf215546Sopenharmony_ci assert(job); 2287bf215546Sopenharmony_ci 2288bf215546Sopenharmony_ci const struct v3dv_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 2289bf215546Sopenharmony_ci uint32_t hw_prim_type = v3d_hw_prim_type(pipeline->topology); 2290bf215546Sopenharmony_ci uint8_t index_type = ffs(cmd_buffer->state.index_buffer.index_size) - 1; 2291bf215546Sopenharmony_ci 2292bf215546Sopenharmony_ci v3dv_cl_ensure_space_with_branch( 2293bf215546Sopenharmony_ci &job->bcl, cl_packet_length(INDIRECT_INDEXED_INSTANCED_PRIM_LIST)); 2294bf215546Sopenharmony_ci v3dv_return_if_oom(cmd_buffer, NULL); 2295bf215546Sopenharmony_ci 2296bf215546Sopenharmony_ci cl_emit(&job->bcl, INDIRECT_INDEXED_INSTANCED_PRIM_LIST, prim) { 2297bf215546Sopenharmony_ci prim.index_type = index_type; 2298bf215546Sopenharmony_ci prim.mode = hw_prim_type; 2299bf215546Sopenharmony_ci prim.enable_primitive_restarts = pipeline->primitive_restart; 2300bf215546Sopenharmony_ci prim.number_of_draw_indirect_indexed_records = drawCount; 2301bf215546Sopenharmony_ci prim.stride_in_multiples_of_4_bytes = stride >> 2; 2302bf215546Sopenharmony_ci prim.address = v3dv_cl_address(buffer->mem->bo, 2303bf215546Sopenharmony_ci buffer->mem_offset + offset); 2304bf215546Sopenharmony_ci } 2305bf215546Sopenharmony_ci} 2306bf215546Sopenharmony_ci 2307bf215546Sopenharmony_civoid 2308bf215546Sopenharmony_civ3dX(cmd_buffer_render_pass_setup_render_target)(struct v3dv_cmd_buffer *cmd_buffer, 2309bf215546Sopenharmony_ci int rt, 2310bf215546Sopenharmony_ci uint32_t *rt_bpp, 2311bf215546Sopenharmony_ci uint32_t *rt_type, 2312bf215546Sopenharmony_ci uint32_t *rt_clamp) 2313bf215546Sopenharmony_ci{ 2314bf215546Sopenharmony_ci const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state; 2315bf215546Sopenharmony_ci 2316bf215546Sopenharmony_ci assert(state->subpass_idx < state->pass->subpass_count); 2317bf215546Sopenharmony_ci const struct v3dv_subpass *subpass = 2318bf215546Sopenharmony_ci &state->pass->subpasses[state->subpass_idx]; 2319bf215546Sopenharmony_ci 2320bf215546Sopenharmony_ci if (rt >= subpass->color_count) 2321bf215546Sopenharmony_ci return; 2322bf215546Sopenharmony_ci 2323bf215546Sopenharmony_ci struct v3dv_subpass_attachment *attachment = &subpass->color_attachments[rt]; 2324bf215546Sopenharmony_ci const uint32_t attachment_idx = attachment->attachment; 2325bf215546Sopenharmony_ci if (attachment_idx == VK_ATTACHMENT_UNUSED) 2326bf215546Sopenharmony_ci return; 2327bf215546Sopenharmony_ci 2328bf215546Sopenharmony_ci assert(attachment_idx < state->framebuffer->attachment_count && 2329bf215546Sopenharmony_ci attachment_idx < state->attachment_alloc_count); 2330bf215546Sopenharmony_ci struct v3dv_image_view *iview = state->attachments[attachment_idx].image_view; 2331bf215546Sopenharmony_ci assert(vk_format_is_color(iview->vk.format)); 2332bf215546Sopenharmony_ci 2333bf215546Sopenharmony_ci *rt_bpp = iview->internal_bpp; 2334bf215546Sopenharmony_ci *rt_type = iview->internal_type; 2335bf215546Sopenharmony_ci if (vk_format_is_int(iview->vk.view_format)) 2336bf215546Sopenharmony_ci *rt_clamp = V3D_RENDER_TARGET_CLAMP_INT; 2337bf215546Sopenharmony_ci else if (vk_format_is_srgb(iview->vk.view_format)) 2338bf215546Sopenharmony_ci *rt_clamp = V3D_RENDER_TARGET_CLAMP_NORM; 2339bf215546Sopenharmony_ci else 2340bf215546Sopenharmony_ci *rt_clamp = V3D_RENDER_TARGET_CLAMP_NONE; 2341bf215546Sopenharmony_ci} 2342