1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2016 Red Hat. 3bf215546Sopenharmony_ci * Copyright © 2016 Bas Nieuwenhuizen 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * based in part on anv driver which is: 6bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 7bf215546Sopenharmony_ci * 8bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 9bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 10bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 11bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 13bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 16bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 17bf215546Sopenharmony_ci * Software. 18bf215546Sopenharmony_ci * 19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25bf215546Sopenharmony_ci * IN THE SOFTWARE. 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "radv_cs.h" 29bf215546Sopenharmony_ci#include "radv_debug.h" 30bf215546Sopenharmony_ci#include "radv_meta.h" 31bf215546Sopenharmony_ci#include "radv_private.h" 32bf215546Sopenharmony_ci#include "radv_radeon_winsys.h" 33bf215546Sopenharmony_ci#include "radv_shader.h" 34bf215546Sopenharmony_ci#include "sid.h" 35bf215546Sopenharmony_ci#include "vk_format.h" 36bf215546Sopenharmony_ci#include "vk_util.h" 37bf215546Sopenharmony_ci#include "vk_enum_defines.h" 38bf215546Sopenharmony_ci#include "vk_common_entrypoints.h" 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci#include "ac_debug.h" 41bf215546Sopenharmony_ci#include "ac_shader_args.h" 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci#include "util/fast_idiv_by_const.h" 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_cienum { 46bf215546Sopenharmony_ci RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0), 47bf215546Sopenharmony_ci RADV_PREFETCH_VS = (1 << 1), 48bf215546Sopenharmony_ci RADV_PREFETCH_TCS = (1 << 2), 49bf215546Sopenharmony_ci RADV_PREFETCH_TES = (1 << 3), 50bf215546Sopenharmony_ci RADV_PREFETCH_GS = (1 << 4), 51bf215546Sopenharmony_ci RADV_PREFETCH_PS = (1 << 5), 52bf215546Sopenharmony_ci RADV_PREFETCH_MS = (1 << 6), 53bf215546Sopenharmony_ci RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES | 54bf215546Sopenharmony_ci RADV_PREFETCH_GS | RADV_PREFETCH_PS | RADV_PREFETCH_MS) 55bf215546Sopenharmony_ci}; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_cistatic void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, 58bf215546Sopenharmony_ci struct radv_image *image, VkImageLayout src_layout, 59bf215546Sopenharmony_ci bool src_render_loop, VkImageLayout dst_layout, 60bf215546Sopenharmony_ci bool dst_render_loop, uint32_t src_family_index, 61bf215546Sopenharmony_ci uint32_t dst_family_index, const VkImageSubresourceRange *range, 62bf215546Sopenharmony_ci struct radv_sample_locations_state *sample_locs); 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_cistatic void radv_set_rt_stack_size(struct radv_cmd_buffer *cmd_buffer, uint32_t size); 65bf215546Sopenharmony_ci 66bf215546Sopenharmony_ciconst struct radv_dynamic_state default_dynamic_state = { 67bf215546Sopenharmony_ci .viewport = 68bf215546Sopenharmony_ci { 69bf215546Sopenharmony_ci .count = 0, 70bf215546Sopenharmony_ci }, 71bf215546Sopenharmony_ci .scissor = 72bf215546Sopenharmony_ci { 73bf215546Sopenharmony_ci .count = 0, 74bf215546Sopenharmony_ci }, 75bf215546Sopenharmony_ci .line_width = 1.0f, 76bf215546Sopenharmony_ci .depth_bias = 77bf215546Sopenharmony_ci { 78bf215546Sopenharmony_ci .bias = 0.0f, 79bf215546Sopenharmony_ci .clamp = 0.0f, 80bf215546Sopenharmony_ci .slope = 0.0f, 81bf215546Sopenharmony_ci }, 82bf215546Sopenharmony_ci .blend_constants = {0.0f, 0.0f, 0.0f, 0.0f}, 83bf215546Sopenharmony_ci .depth_bounds = 84bf215546Sopenharmony_ci { 85bf215546Sopenharmony_ci .min = 0.0f, 86bf215546Sopenharmony_ci .max = 1.0f, 87bf215546Sopenharmony_ci }, 88bf215546Sopenharmony_ci .stencil_compare_mask = 89bf215546Sopenharmony_ci { 90bf215546Sopenharmony_ci .front = ~0u, 91bf215546Sopenharmony_ci .back = ~0u, 92bf215546Sopenharmony_ci }, 93bf215546Sopenharmony_ci .stencil_write_mask = 94bf215546Sopenharmony_ci { 95bf215546Sopenharmony_ci .front = ~0u, 96bf215546Sopenharmony_ci .back = ~0u, 97bf215546Sopenharmony_ci }, 98bf215546Sopenharmony_ci .stencil_reference = 99bf215546Sopenharmony_ci { 100bf215546Sopenharmony_ci .front = 0u, 101bf215546Sopenharmony_ci .back = 0u, 102bf215546Sopenharmony_ci }, 103bf215546Sopenharmony_ci .line_stipple = 104bf215546Sopenharmony_ci { 105bf215546Sopenharmony_ci .factor = 0u, 106bf215546Sopenharmony_ci .pattern = 0u, 107bf215546Sopenharmony_ci }, 108bf215546Sopenharmony_ci .cull_mode = 0u, 109bf215546Sopenharmony_ci .front_face = 0u, 110bf215546Sopenharmony_ci .primitive_topology = 0u, 111bf215546Sopenharmony_ci .fragment_shading_rate = 112bf215546Sopenharmony_ci { 113bf215546Sopenharmony_ci .size = {1u, 1u}, 114bf215546Sopenharmony_ci .combiner_ops = {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR, 115bf215546Sopenharmony_ci VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR}, 116bf215546Sopenharmony_ci }, 117bf215546Sopenharmony_ci .depth_bias_enable = 0u, 118bf215546Sopenharmony_ci .primitive_restart_enable = 0u, 119bf215546Sopenharmony_ci .rasterizer_discard_enable = 0u, 120bf215546Sopenharmony_ci .logic_op = 0u, 121bf215546Sopenharmony_ci .color_write_enable = 0xffffffffu, 122bf215546Sopenharmony_ci}; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_cistatic void 125bf215546Sopenharmony_ciradv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dynamic_state *src) 126bf215546Sopenharmony_ci{ 127bf215546Sopenharmony_ci struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic; 128bf215546Sopenharmony_ci uint64_t copy_mask = src->mask; 129bf215546Sopenharmony_ci uint64_t dest_mask = 0; 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci dest->discard_rectangle.count = src->discard_rectangle.count; 132bf215546Sopenharmony_ci dest->sample_location.count = src->sample_location.count; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_VIEWPORT) { 135bf215546Sopenharmony_ci if (dest->viewport.count != src->viewport.count) { 136bf215546Sopenharmony_ci dest->viewport.count = src->viewport.count; 137bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_VIEWPORT; 138bf215546Sopenharmony_ci } 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci if (memcmp(&dest->viewport.viewports, &src->viewport.viewports, 141bf215546Sopenharmony_ci src->viewport.count * sizeof(VkViewport))) { 142bf215546Sopenharmony_ci typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count); 143bf215546Sopenharmony_ci typed_memcpy(dest->viewport.xform, src->viewport.xform, src->viewport.count); 144bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_VIEWPORT; 145bf215546Sopenharmony_ci } 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_SCISSOR) { 149bf215546Sopenharmony_ci if (dest->scissor.count != src->scissor.count) { 150bf215546Sopenharmony_ci dest->scissor.count = src->scissor.count; 151bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_SCISSOR; 152bf215546Sopenharmony_ci } 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci if (memcmp(&dest->scissor.scissors, &src->scissor.scissors, 155bf215546Sopenharmony_ci src->scissor.count * sizeof(VkRect2D))) { 156bf215546Sopenharmony_ci typed_memcpy(dest->scissor.scissors, src->scissor.scissors, src->scissor.count); 157bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_SCISSOR; 158bf215546Sopenharmony_ci } 159bf215546Sopenharmony_ci } 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) { 162bf215546Sopenharmony_ci if (dest->line_width != src->line_width) { 163bf215546Sopenharmony_ci dest->line_width = src->line_width; 164bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_LINE_WIDTH; 165bf215546Sopenharmony_ci } 166bf215546Sopenharmony_ci } 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) { 169bf215546Sopenharmony_ci if (memcmp(&dest->depth_bias, &src->depth_bias, sizeof(src->depth_bias))) { 170bf215546Sopenharmony_ci dest->depth_bias = src->depth_bias; 171bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_DEPTH_BIAS; 172bf215546Sopenharmony_ci } 173bf215546Sopenharmony_ci } 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) { 176bf215546Sopenharmony_ci if (memcmp(&dest->blend_constants, &src->blend_constants, sizeof(src->blend_constants))) { 177bf215546Sopenharmony_ci typed_memcpy(dest->blend_constants, src->blend_constants, 4); 178bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS; 179bf215546Sopenharmony_ci } 180bf215546Sopenharmony_ci } 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) { 183bf215546Sopenharmony_ci if (memcmp(&dest->depth_bounds, &src->depth_bounds, sizeof(src->depth_bounds))) { 184bf215546Sopenharmony_ci dest->depth_bounds = src->depth_bounds; 185bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS; 186bf215546Sopenharmony_ci } 187bf215546Sopenharmony_ci } 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) { 190bf215546Sopenharmony_ci if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask, 191bf215546Sopenharmony_ci sizeof(src->stencil_compare_mask))) { 192bf215546Sopenharmony_ci dest->stencil_compare_mask = src->stencil_compare_mask; 193bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK; 194bf215546Sopenharmony_ci } 195bf215546Sopenharmony_ci } 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) { 198bf215546Sopenharmony_ci if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask, 199bf215546Sopenharmony_ci sizeof(src->stencil_write_mask))) { 200bf215546Sopenharmony_ci dest->stencil_write_mask = src->stencil_write_mask; 201bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK; 202bf215546Sopenharmony_ci } 203bf215546Sopenharmony_ci } 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) { 206bf215546Sopenharmony_ci if (memcmp(&dest->stencil_reference, &src->stencil_reference, 207bf215546Sopenharmony_ci sizeof(src->stencil_reference))) { 208bf215546Sopenharmony_ci dest->stencil_reference = src->stencil_reference; 209bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE; 210bf215546Sopenharmony_ci } 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) { 214bf215546Sopenharmony_ci if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles, 215bf215546Sopenharmony_ci src->discard_rectangle.count * sizeof(VkRect2D))) { 216bf215546Sopenharmony_ci typed_memcpy(dest->discard_rectangle.rectangles, src->discard_rectangle.rectangles, 217bf215546Sopenharmony_ci src->discard_rectangle.count); 218bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE; 219bf215546Sopenharmony_ci } 220bf215546Sopenharmony_ci } 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) { 223bf215546Sopenharmony_ci if (dest->sample_location.per_pixel != src->sample_location.per_pixel || 224bf215546Sopenharmony_ci dest->sample_location.grid_size.width != src->sample_location.grid_size.width || 225bf215546Sopenharmony_ci dest->sample_location.grid_size.height != src->sample_location.grid_size.height || 226bf215546Sopenharmony_ci memcmp(&dest->sample_location.locations, &src->sample_location.locations, 227bf215546Sopenharmony_ci src->sample_location.count * sizeof(VkSampleLocationEXT))) { 228bf215546Sopenharmony_ci dest->sample_location.per_pixel = src->sample_location.per_pixel; 229bf215546Sopenharmony_ci dest->sample_location.grid_size = src->sample_location.grid_size; 230bf215546Sopenharmony_ci typed_memcpy(dest->sample_location.locations, src->sample_location.locations, 231bf215546Sopenharmony_ci src->sample_location.count); 232bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS; 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_LINE_STIPPLE) { 237bf215546Sopenharmony_ci if (memcmp(&dest->line_stipple, &src->line_stipple, sizeof(src->line_stipple))) { 238bf215546Sopenharmony_ci dest->line_stipple = src->line_stipple; 239bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_LINE_STIPPLE; 240bf215546Sopenharmony_ci } 241bf215546Sopenharmony_ci } 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_CULL_MODE) { 244bf215546Sopenharmony_ci if (dest->cull_mode != src->cull_mode) { 245bf215546Sopenharmony_ci dest->cull_mode = src->cull_mode; 246bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_CULL_MODE; 247bf215546Sopenharmony_ci } 248bf215546Sopenharmony_ci } 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_FRONT_FACE) { 251bf215546Sopenharmony_ci if (dest->front_face != src->front_face) { 252bf215546Sopenharmony_ci dest->front_face = src->front_face; 253bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_FRONT_FACE; 254bf215546Sopenharmony_ci } 255bf215546Sopenharmony_ci } 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) { 258bf215546Sopenharmony_ci if (dest->primitive_topology != src->primitive_topology) { 259bf215546Sopenharmony_ci dest->primitive_topology = src->primitive_topology; 260bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_PRIMITIVE_TOPOLOGY; 261bf215546Sopenharmony_ci } 262bf215546Sopenharmony_ci } 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_DEPTH_TEST_ENABLE) { 265bf215546Sopenharmony_ci if (dest->depth_test_enable != src->depth_test_enable) { 266bf215546Sopenharmony_ci dest->depth_test_enable = src->depth_test_enable; 267bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_DEPTH_TEST_ENABLE; 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci } 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) { 272bf215546Sopenharmony_ci if (dest->depth_write_enable != src->depth_write_enable) { 273bf215546Sopenharmony_ci dest->depth_write_enable = src->depth_write_enable; 274bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_DEPTH_WRITE_ENABLE; 275bf215546Sopenharmony_ci } 276bf215546Sopenharmony_ci } 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_DEPTH_COMPARE_OP) { 279bf215546Sopenharmony_ci if (dest->depth_compare_op != src->depth_compare_op) { 280bf215546Sopenharmony_ci dest->depth_compare_op = src->depth_compare_op; 281bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_DEPTH_COMPARE_OP; 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci } 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) { 286bf215546Sopenharmony_ci if (dest->depth_bounds_test_enable != src->depth_bounds_test_enable) { 287bf215546Sopenharmony_ci dest->depth_bounds_test_enable = src->depth_bounds_test_enable; 288bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE; 289bf215546Sopenharmony_ci } 290bf215546Sopenharmony_ci } 291bf215546Sopenharmony_ci 292bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_STENCIL_TEST_ENABLE) { 293bf215546Sopenharmony_ci if (dest->stencil_test_enable != src->stencil_test_enable) { 294bf215546Sopenharmony_ci dest->stencil_test_enable = src->stencil_test_enable; 295bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_STENCIL_TEST_ENABLE; 296bf215546Sopenharmony_ci } 297bf215546Sopenharmony_ci } 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_STENCIL_OP) { 300bf215546Sopenharmony_ci if (memcmp(&dest->stencil_op, &src->stencil_op, sizeof(src->stencil_op))) { 301bf215546Sopenharmony_ci dest->stencil_op = src->stencil_op; 302bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_STENCIL_OP; 303bf215546Sopenharmony_ci } 304bf215546Sopenharmony_ci } 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) { 307bf215546Sopenharmony_ci if (memcmp(&dest->fragment_shading_rate, &src->fragment_shading_rate, 308bf215546Sopenharmony_ci sizeof(src->fragment_shading_rate))) { 309bf215546Sopenharmony_ci dest->fragment_shading_rate = src->fragment_shading_rate; 310bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_FRAGMENT_SHADING_RATE; 311bf215546Sopenharmony_ci } 312bf215546Sopenharmony_ci } 313bf215546Sopenharmony_ci 314bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS_ENABLE) { 315bf215546Sopenharmony_ci if (dest->depth_bias_enable != src->depth_bias_enable) { 316bf215546Sopenharmony_ci dest->depth_bias_enable = src->depth_bias_enable; 317bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_DEPTH_BIAS_ENABLE; 318bf215546Sopenharmony_ci } 319bf215546Sopenharmony_ci } 320bf215546Sopenharmony_ci 321bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE) { 322bf215546Sopenharmony_ci if (dest->primitive_restart_enable != src->primitive_restart_enable) { 323bf215546Sopenharmony_ci dest->primitive_restart_enable = src->primitive_restart_enable; 324bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE; 325bf215546Sopenharmony_ci } 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE) { 329bf215546Sopenharmony_ci if (dest->rasterizer_discard_enable != src->rasterizer_discard_enable) { 330bf215546Sopenharmony_ci dest->rasterizer_discard_enable = src->rasterizer_discard_enable; 331bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE; 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci } 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_LOGIC_OP) { 336bf215546Sopenharmony_ci if (dest->logic_op != src->logic_op) { 337bf215546Sopenharmony_ci dest->logic_op = src->logic_op; 338bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_LOGIC_OP; 339bf215546Sopenharmony_ci } 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci if (copy_mask & RADV_DYNAMIC_COLOR_WRITE_ENABLE) { 343bf215546Sopenharmony_ci if (dest->color_write_enable != src->color_write_enable) { 344bf215546Sopenharmony_ci dest->color_write_enable = src->color_write_enable; 345bf215546Sopenharmony_ci dest_mask |= RADV_DYNAMIC_COLOR_WRITE_ENABLE; 346bf215546Sopenharmony_ci } 347bf215546Sopenharmony_ci } 348bf215546Sopenharmony_ci 349bf215546Sopenharmony_ci cmd_buffer->state.dirty |= dest_mask; 350bf215546Sopenharmony_ci} 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_cibool 353bf215546Sopenharmony_ciradv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer) 354bf215546Sopenharmony_ci{ 355bf215546Sopenharmony_ci return cmd_buffer->qf == RADV_QUEUE_COMPUTE && 356bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7; 357bf215546Sopenharmony_ci} 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_cienum amd_ip_type 360bf215546Sopenharmony_ciradv_queue_family_to_ring(struct radv_physical_device *physical_device, 361bf215546Sopenharmony_ci enum radv_queue_family f) 362bf215546Sopenharmony_ci{ 363bf215546Sopenharmony_ci switch (f) { 364bf215546Sopenharmony_ci case RADV_QUEUE_GENERAL: 365bf215546Sopenharmony_ci return AMD_IP_GFX; 366bf215546Sopenharmony_ci case RADV_QUEUE_COMPUTE: 367bf215546Sopenharmony_ci return AMD_IP_COMPUTE; 368bf215546Sopenharmony_ci case RADV_QUEUE_TRANSFER: 369bf215546Sopenharmony_ci return AMD_IP_SDMA; 370bf215546Sopenharmony_ci default: 371bf215546Sopenharmony_ci unreachable("Unknown queue family"); 372bf215546Sopenharmony_ci } 373bf215546Sopenharmony_ci} 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_cistatic void 376bf215546Sopenharmony_ciradv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, 377bf215546Sopenharmony_ci unsigned count, const uint32_t *data) 378bf215546Sopenharmony_ci{ 379bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci radeon_check_space(cmd_buffer->device->ws, cs, 4 + count); 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); 384bf215546Sopenharmony_ci radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel)); 385bf215546Sopenharmony_ci radeon_emit(cs, va); 386bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 387bf215546Sopenharmony_ci radeon_emit_array(cs, data, count); 388bf215546Sopenharmony_ci} 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_cistatic void 391bf215546Sopenharmony_ciradv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va, 392bf215546Sopenharmony_ci unsigned size) 393bf215546Sopenharmony_ci{ 394bf215546Sopenharmony_ci uint32_t *zeroes = alloca(size); 395bf215546Sopenharmony_ci memset(zeroes, 0, size); 396bf215546Sopenharmony_ci radv_emit_write_data_packet(cmd_buffer, engine_sel, va, size / 4, zeroes); 397bf215546Sopenharmony_ci} 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_cistatic void 400bf215546Sopenharmony_ciradv_destroy_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) 401bf215546Sopenharmony_ci{ 402bf215546Sopenharmony_ci list_del(&cmd_buffer->pool_link); 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_ci util_dynarray_fini(&cmd_buffer->cached_vertex_formats); 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_ci list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) 407bf215546Sopenharmony_ci { 408bf215546Sopenharmony_ci cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); 409bf215546Sopenharmony_ci list_del(&up->list); 410bf215546Sopenharmony_ci free(up); 411bf215546Sopenharmony_ci } 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci if (cmd_buffer->upload.upload_bo) 414bf215546Sopenharmony_ci cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo); 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci if (cmd_buffer->state.own_render_pass) { 417bf215546Sopenharmony_ci radv_DestroyRenderPass(radv_device_to_handle(cmd_buffer->device), 418bf215546Sopenharmony_ci radv_render_pass_to_handle(cmd_buffer->state.pass), NULL); 419bf215546Sopenharmony_ci cmd_buffer->state.own_render_pass = false; 420bf215546Sopenharmony_ci } 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci if (cmd_buffer->cs) 423bf215546Sopenharmony_ci cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs); 424bf215546Sopenharmony_ci if (cmd_buffer->ace_internal.cs) 425bf215546Sopenharmony_ci cmd_buffer->device->ws->cs_destroy(cmd_buffer->ace_internal.cs); 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_ci for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { 428bf215546Sopenharmony_ci struct radv_descriptor_set_header *set = &cmd_buffer->descriptors[i].push_set.set; 429bf215546Sopenharmony_ci free(set->mapped_ptr); 430bf215546Sopenharmony_ci if (set->layout) 431bf215546Sopenharmony_ci vk_descriptor_set_layout_unref(&cmd_buffer->device->vk, &set->layout->vk); 432bf215546Sopenharmony_ci vk_object_base_finish(&set->base); 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci vk_object_base_finish(&cmd_buffer->meta_push_descriptors.base); 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci vk_command_buffer_finish(&cmd_buffer->vk); 438bf215546Sopenharmony_ci vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer); 439bf215546Sopenharmony_ci} 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_cistatic VkResult 442bf215546Sopenharmony_ciradv_create_cmd_buffer(struct radv_device *device, struct radv_cmd_pool *pool, 443bf215546Sopenharmony_ci VkCommandBufferLevel level, VkCommandBuffer *pCommandBuffer) 444bf215546Sopenharmony_ci{ 445bf215546Sopenharmony_ci struct radv_cmd_buffer *cmd_buffer; 446bf215546Sopenharmony_ci unsigned ring; 447bf215546Sopenharmony_ci cmd_buffer = vk_zalloc(&pool->vk.alloc, sizeof(*cmd_buffer), 8, 448bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 449bf215546Sopenharmony_ci if (cmd_buffer == NULL) 450bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci VkResult result = 453bf215546Sopenharmony_ci vk_command_buffer_init(&cmd_buffer->vk, &pool->vk, level); 454bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 455bf215546Sopenharmony_ci vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer); 456bf215546Sopenharmony_ci return result; 457bf215546Sopenharmony_ci } 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_ci cmd_buffer->device = device; 460bf215546Sopenharmony_ci cmd_buffer->pool = pool; 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); 463bf215546Sopenharmony_ci cmd_buffer->qf = vk_queue_to_radv(device->physical_device, pool->vk.queue_family_index); 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci cmd_buffer->cs = device->ws->cs_create(device->ws, ring); 468bf215546Sopenharmony_ci if (!cmd_buffer->cs) { 469bf215546Sopenharmony_ci radv_destroy_cmd_buffer(cmd_buffer); 470bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 471bf215546Sopenharmony_ci } 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci vk_object_base_init(&device->vk, &cmd_buffer->meta_push_descriptors.base, 474bf215546Sopenharmony_ci VK_OBJECT_TYPE_DESCRIPTOR_SET); 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_ci util_dynarray_init(&cmd_buffer->cached_vertex_formats, NULL); 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci for (unsigned i = 0; i < MAX_BIND_POINTS; i++) 479bf215546Sopenharmony_ci vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base, 480bf215546Sopenharmony_ci VK_OBJECT_TYPE_DESCRIPTOR_SET); 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci *pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer); 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci list_inithead(&cmd_buffer->upload.list); 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci return VK_SUCCESS; 487bf215546Sopenharmony_ci} 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_cistatic VkResult 490bf215546Sopenharmony_ciradv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer) 491bf215546Sopenharmony_ci{ 492bf215546Sopenharmony_ci vk_command_buffer_reset(&cmd_buffer->vk); 493bf215546Sopenharmony_ci 494bf215546Sopenharmony_ci cmd_buffer->device->ws->cs_reset(cmd_buffer->cs); 495bf215546Sopenharmony_ci if (cmd_buffer->ace_internal.cs) 496bf215546Sopenharmony_ci cmd_buffer->device->ws->cs_reset(cmd_buffer->ace_internal.cs); 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list) 499bf215546Sopenharmony_ci { 500bf215546Sopenharmony_ci cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo); 501bf215546Sopenharmony_ci list_del(&up->list); 502bf215546Sopenharmony_ci free(up); 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci if (cmd_buffer->state.own_render_pass) { 506bf215546Sopenharmony_ci radv_DestroyRenderPass(radv_device_to_handle(cmd_buffer->device), 507bf215546Sopenharmony_ci radv_render_pass_to_handle(cmd_buffer->state.pass), NULL); 508bf215546Sopenharmony_ci cmd_buffer->state.own_render_pass = false; 509bf215546Sopenharmony_ci } 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci cmd_buffer->push_constant_stages = 0; 512bf215546Sopenharmony_ci cmd_buffer->scratch_size_per_wave_needed = 0; 513bf215546Sopenharmony_ci cmd_buffer->scratch_waves_wanted = 0; 514bf215546Sopenharmony_ci cmd_buffer->compute_scratch_size_per_wave_needed = 0; 515bf215546Sopenharmony_ci cmd_buffer->compute_scratch_waves_wanted = 0; 516bf215546Sopenharmony_ci cmd_buffer->esgs_ring_size_needed = 0; 517bf215546Sopenharmony_ci cmd_buffer->gsvs_ring_size_needed = 0; 518bf215546Sopenharmony_ci cmd_buffer->tess_rings_needed = false; 519bf215546Sopenharmony_ci cmd_buffer->task_rings_needed = false; 520bf215546Sopenharmony_ci cmd_buffer->mesh_scratch_ring_needed = false; 521bf215546Sopenharmony_ci cmd_buffer->gds_needed = false; 522bf215546Sopenharmony_ci cmd_buffer->gds_oa_needed = false; 523bf215546Sopenharmony_ci cmd_buffer->sample_positions_needed = false; 524bf215546Sopenharmony_ci cmd_buffer->ace_internal.sem.gfx2ace_value = 0; 525bf215546Sopenharmony_ci cmd_buffer->ace_internal.sem.emitted_gfx2ace_value = 0; 526bf215546Sopenharmony_ci cmd_buffer->ace_internal.sem.va = 0; 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci if (cmd_buffer->upload.upload_bo) 529bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo); 530bf215546Sopenharmony_ci cmd_buffer->upload.offset = 0; 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_ci cmd_buffer->record_result = VK_SUCCESS; 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci memset(cmd_buffer->vertex_binding_buffers, 0, sizeof(struct radv_buffer *) * cmd_buffer->used_vertex_bindings); 535bf215546Sopenharmony_ci cmd_buffer->used_vertex_bindings = 0; 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci for (unsigned i = 0; i < MAX_BIND_POINTS; i++) { 538bf215546Sopenharmony_ci cmd_buffer->descriptors[i].dirty = 0; 539bf215546Sopenharmony_ci cmd_buffer->descriptors[i].valid = 0; 540bf215546Sopenharmony_ci cmd_buffer->descriptors[i].push_dirty = false; 541bf215546Sopenharmony_ci } 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { 544bf215546Sopenharmony_ci uint32_t pred_value = 0; 545bf215546Sopenharmony_ci uint32_t pred_offset; 546bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset)) 547bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci cmd_buffer->mec_inv_pred_emitted = false; 550bf215546Sopenharmony_ci cmd_buffer->mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; 551bf215546Sopenharmony_ci } 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && 554bf215546Sopenharmony_ci cmd_buffer->qf == RADV_QUEUE_GENERAL) { 555bf215546Sopenharmony_ci unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends; 556bf215546Sopenharmony_ci unsigned fence_offset, eop_bug_offset; 557bf215546Sopenharmony_ci void *fence_ptr; 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset, &fence_ptr); 560bf215546Sopenharmony_ci memset(fence_ptr, 0, 8); 561bf215546Sopenharmony_ci 562bf215546Sopenharmony_ci cmd_buffer->gfx9_fence_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 563bf215546Sopenharmony_ci cmd_buffer->gfx9_fence_va += fence_offset; 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_ci radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_fence_va, 8); 566bf215546Sopenharmony_ci 567bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) { 568bf215546Sopenharmony_ci /* Allocate a buffer for the EOP bug on GFX9. */ 569bf215546Sopenharmony_ci radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr); 570bf215546Sopenharmony_ci memset(fence_ptr, 0, 16 * num_db); 571bf215546Sopenharmony_ci cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 572bf215546Sopenharmony_ci cmd_buffer->gfx9_eop_bug_va += eop_bug_offset; 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_eop_bug_va, 16 * num_db); 575bf215546Sopenharmony_ci } 576bf215546Sopenharmony_ci } 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL; 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci return cmd_buffer->record_result; 581bf215546Sopenharmony_ci} 582bf215546Sopenharmony_ci 583bf215546Sopenharmony_cistatic bool 584bf215546Sopenharmony_ciradv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t min_needed) 585bf215546Sopenharmony_ci{ 586bf215546Sopenharmony_ci uint64_t new_size; 587bf215546Sopenharmony_ci struct radeon_winsys_bo *bo = NULL; 588bf215546Sopenharmony_ci struct radv_cmd_buffer_upload *upload; 589bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_ci new_size = MAX2(min_needed, 16 * 1024); 592bf215546Sopenharmony_ci new_size = MAX2(new_size, 2 * cmd_buffer->upload.size); 593bf215546Sopenharmony_ci 594bf215546Sopenharmony_ci VkResult result = 595bf215546Sopenharmony_ci device->ws->buffer_create(device->ws, new_size, 4096, device->ws->cs_domain(device->ws), 596bf215546Sopenharmony_ci RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | 597bf215546Sopenharmony_ci RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC, 598bf215546Sopenharmony_ci RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo); 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 601bf215546Sopenharmony_ci cmd_buffer->record_result = result; 602bf215546Sopenharmony_ci return false; 603bf215546Sopenharmony_ci } 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo); 606bf215546Sopenharmony_ci if (cmd_buffer->upload.upload_bo) { 607bf215546Sopenharmony_ci upload = malloc(sizeof(*upload)); 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_ci if (!upload) { 610bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 611bf215546Sopenharmony_ci device->ws->buffer_destroy(device->ws, bo); 612bf215546Sopenharmony_ci return false; 613bf215546Sopenharmony_ci } 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci memcpy(upload, &cmd_buffer->upload, sizeof(*upload)); 616bf215546Sopenharmony_ci list_add(&upload->list, &cmd_buffer->upload.list); 617bf215546Sopenharmony_ci } 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci cmd_buffer->upload.upload_bo = bo; 620bf215546Sopenharmony_ci cmd_buffer->upload.size = new_size; 621bf215546Sopenharmony_ci cmd_buffer->upload.offset = 0; 622bf215546Sopenharmony_ci cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo); 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci if (!cmd_buffer->upload.map) { 625bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY; 626bf215546Sopenharmony_ci return false; 627bf215546Sopenharmony_ci } 628bf215546Sopenharmony_ci 629bf215546Sopenharmony_ci return true; 630bf215546Sopenharmony_ci} 631bf215546Sopenharmony_ci 632bf215546Sopenharmony_cibool 633bf215546Sopenharmony_ciradv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size, 634bf215546Sopenharmony_ci unsigned *out_offset, void **ptr) 635bf215546Sopenharmony_ci{ 636bf215546Sopenharmony_ci assert(size % 4 == 0); 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info; 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci /* Align to the scalar cache line size if it results in this allocation 641bf215546Sopenharmony_ci * being placed in less of them. 642bf215546Sopenharmony_ci */ 643bf215546Sopenharmony_ci unsigned offset = cmd_buffer->upload.offset; 644bf215546Sopenharmony_ci unsigned line_size = rad_info->gfx_level >= GFX10 ? 64 : 32; 645bf215546Sopenharmony_ci unsigned gap = align(offset, line_size) - offset; 646bf215546Sopenharmony_ci if ((size & (line_size - 1)) > gap) 647bf215546Sopenharmony_ci offset = align(offset, line_size); 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci if (offset + size > cmd_buffer->upload.size) { 650bf215546Sopenharmony_ci if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size)) 651bf215546Sopenharmony_ci return false; 652bf215546Sopenharmony_ci offset = 0; 653bf215546Sopenharmony_ci } 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_ci *out_offset = offset; 656bf215546Sopenharmony_ci *ptr = cmd_buffer->upload.map + offset; 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci cmd_buffer->upload.offset = offset + size; 659bf215546Sopenharmony_ci return true; 660bf215546Sopenharmony_ci} 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_cibool 663bf215546Sopenharmony_ciradv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data, 664bf215546Sopenharmony_ci unsigned *out_offset) 665bf215546Sopenharmony_ci{ 666bf215546Sopenharmony_ci uint8_t *ptr; 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr)) 669bf215546Sopenharmony_ci return false; 670bf215546Sopenharmony_ci assert(ptr); 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci memcpy(ptr, data, size); 673bf215546Sopenharmony_ci return true; 674bf215546Sopenharmony_ci} 675bf215546Sopenharmony_ci 676bf215546Sopenharmony_civoid 677bf215546Sopenharmony_ciradv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer) 678bf215546Sopenharmony_ci{ 679bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 680bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 681bf215546Sopenharmony_ci uint64_t va; 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci va = radv_buffer_get_va(device->trace_bo); 684bf215546Sopenharmony_ci if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY) 685bf215546Sopenharmony_ci va += 4; 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci ++cmd_buffer->state.trace_id; 688bf215546Sopenharmony_ci radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id); 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci radeon_check_space(cmd_buffer->device->ws, cs, 2); 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_NOP, 0, 0)); 693bf215546Sopenharmony_ci radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id)); 694bf215546Sopenharmony_ci} 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_cistatic void 697bf215546Sopenharmony_ciradv_ace_internal_barrier(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stage_mask, 698bf215546Sopenharmony_ci VkPipelineStageFlags2 dst_stage_mask) 699bf215546Sopenharmony_ci{ 700bf215546Sopenharmony_ci /* Update flush bits from the main cmdbuf, except the stage flush. */ 701bf215546Sopenharmony_ci cmd_buffer->ace_internal.flush_bits |= 702bf215546Sopenharmony_ci cmd_buffer->state.flush_bits & RADV_CMD_FLUSH_ALL_COMPUTE & ~RADV_CMD_FLAG_CS_PARTIAL_FLUSH; 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci /* Add stage flush only when necessary. */ 705bf215546Sopenharmony_ci if (src_stage_mask & 706bf215546Sopenharmony_ci (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_NV | VK_PIPELINE_STAGE_2_TRANSFER_BIT | 707bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) 708bf215546Sopenharmony_ci cmd_buffer->ace_internal.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci /* Block task shaders when we have to wait for CP DMA on the GFX cmdbuf. */ 711bf215546Sopenharmony_ci if (src_stage_mask & 712bf215546Sopenharmony_ci (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT | 713bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | 714bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) 715bf215546Sopenharmony_ci dst_stage_mask |= cmd_buffer->state.dma_is_busy ? VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_NV : 0; 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_ci /* Increment the GFX/ACE semaphore when task shaders are blocked. */ 718bf215546Sopenharmony_ci if (dst_stage_mask & 719bf215546Sopenharmony_ci (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | 720bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_NV)) 721bf215546Sopenharmony_ci cmd_buffer->ace_internal.sem.gfx2ace_value++; 722bf215546Sopenharmony_ci} 723bf215546Sopenharmony_ci 724bf215546Sopenharmony_cistatic void 725bf215546Sopenharmony_ciradv_ace_internal_cache_flush(struct radv_cmd_buffer *cmd_buffer) 726bf215546Sopenharmony_ci{ 727bf215546Sopenharmony_ci struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; 728bf215546Sopenharmony_ci const uint32_t flush_bits = cmd_buffer->ace_internal.flush_bits; 729bf215546Sopenharmony_ci enum rgp_flush_bits sqtt_flush_bits = 0; 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci si_cs_emit_cache_flush(ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, 0, 732bf215546Sopenharmony_ci true, flush_bits, &sqtt_flush_bits, 0); 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ci cmd_buffer->ace_internal.flush_bits = 0; 735bf215546Sopenharmony_ci} 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_cistatic uint64_t 738bf215546Sopenharmony_ciradv_ace_internal_sem_create(struct radv_cmd_buffer *cmd_buffer) 739bf215546Sopenharmony_ci{ 740bf215546Sopenharmony_ci /* DWORD 0: GFX->ACE semaphore (GFX blocks ACE, ie. ACE waits for GFX) 741bf215546Sopenharmony_ci * DWORD 1: ACE->GFX semaphore 742bf215546Sopenharmony_ci */ 743bf215546Sopenharmony_ci uint64_t sem_init = 0; 744bf215546Sopenharmony_ci uint32_t va_off = 0; 745bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_data(cmd_buffer, sizeof(uint64_t), &sem_init, &va_off)) { 746bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 747bf215546Sopenharmony_ci return 0; 748bf215546Sopenharmony_ci } 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci return radv_buffer_get_va(cmd_buffer->upload.upload_bo) + va_off; 751bf215546Sopenharmony_ci} 752bf215546Sopenharmony_ci 753bf215546Sopenharmony_cistatic bool 754bf215546Sopenharmony_ciradv_ace_internal_sem_dirty(const struct radv_cmd_buffer *cmd_buffer) 755bf215546Sopenharmony_ci{ 756bf215546Sopenharmony_ci return cmd_buffer->ace_internal.sem.gfx2ace_value != 757bf215546Sopenharmony_ci cmd_buffer->ace_internal.sem.emitted_gfx2ace_value; 758bf215546Sopenharmony_ci} 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ciALWAYS_INLINE static bool 761bf215546Sopenharmony_ciradv_flush_gfx2ace_semaphore(struct radv_cmd_buffer *cmd_buffer) 762bf215546Sopenharmony_ci{ 763bf215546Sopenharmony_ci if (!radv_ace_internal_sem_dirty(cmd_buffer)) 764bf215546Sopenharmony_ci return false; 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci if (!cmd_buffer->ace_internal.sem.va) { 767bf215546Sopenharmony_ci cmd_buffer->ace_internal.sem.va = radv_ace_internal_sem_create(cmd_buffer); 768bf215546Sopenharmony_ci if (!cmd_buffer->ace_internal.sem.va) 769bf215546Sopenharmony_ci return false; 770bf215546Sopenharmony_ci } 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci /* GFX writes a value to the semaphore which ACE can wait for.*/ 773bf215546Sopenharmony_ci si_cs_emit_write_event_eop( 774bf215546Sopenharmony_ci cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level, 775bf215546Sopenharmony_ci radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, 776bf215546Sopenharmony_ci EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->ace_internal.sem.va, 777bf215546Sopenharmony_ci cmd_buffer->ace_internal.sem.gfx2ace_value, cmd_buffer->gfx9_eop_bug_va); 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci cmd_buffer->ace_internal.sem.emitted_gfx2ace_value = cmd_buffer->ace_internal.sem.gfx2ace_value; 780bf215546Sopenharmony_ci return true; 781bf215546Sopenharmony_ci} 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_ciALWAYS_INLINE static void 784bf215546Sopenharmony_ciradv_wait_gfx2ace_semaphore(struct radv_cmd_buffer *cmd_buffer) 785bf215546Sopenharmony_ci{ 786bf215546Sopenharmony_ci assert(cmd_buffer->ace_internal.sem.va); 787bf215546Sopenharmony_ci struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; 788bf215546Sopenharmony_ci radeon_check_space(cmd_buffer->device->ws, ace_cs, 7); 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_ci /* ACE waits for the semaphore which GFX wrote. */ 791bf215546Sopenharmony_ci radv_cp_wait_mem(ace_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->ace_internal.sem.va, 792bf215546Sopenharmony_ci cmd_buffer->ace_internal.sem.gfx2ace_value, 0xffffffff); 793bf215546Sopenharmony_ci} 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_cistatic struct radeon_cmdbuf * 796bf215546Sopenharmony_ciradv_ace_internal_create(struct radv_cmd_buffer *cmd_buffer) 797bf215546Sopenharmony_ci{ 798bf215546Sopenharmony_ci assert(!cmd_buffer->ace_internal.cs); 799bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 800bf215546Sopenharmony_ci struct radeon_cmdbuf *ace_cs = device->ws->cs_create(device->ws, AMD_IP_COMPUTE); 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci if (!ace_cs) { 803bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 804bf215546Sopenharmony_ci } 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci return ace_cs; 807bf215546Sopenharmony_ci} 808bf215546Sopenharmony_ci 809bf215546Sopenharmony_cistatic VkResult 810bf215546Sopenharmony_ciradv_ace_internal_finalize(struct radv_cmd_buffer *cmd_buffer) 811bf215546Sopenharmony_ci{ 812bf215546Sopenharmony_ci assert(cmd_buffer->ace_internal.cs); 813bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 814bf215546Sopenharmony_ci struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci /* Emit pending cache flush. */ 817bf215546Sopenharmony_ci radv_ace_internal_cache_flush(cmd_buffer); 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_ci /* Clear the ACE semaphore if it exists. 820bf215546Sopenharmony_ci * This is necessary in case the same cmd buffer is submitted again in the future. 821bf215546Sopenharmony_ci */ 822bf215546Sopenharmony_ci if (cmd_buffer->ace_internal.sem.va) { 823bf215546Sopenharmony_ci struct radeon_cmdbuf *main_cs = cmd_buffer->cs; 824bf215546Sopenharmony_ci uint64_t gfx2ace_va = cmd_buffer->ace_internal.sem.va; 825bf215546Sopenharmony_ci uint64_t ace2gfx_va = cmd_buffer->ace_internal.sem.va + 4; 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci /* ACE: write 1 to the ACE->GFX semaphore. */ 828bf215546Sopenharmony_ci si_cs_emit_write_event_eop(ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level, 829bf215546Sopenharmony_ci true, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM, 830bf215546Sopenharmony_ci EOP_DATA_SEL_VALUE_32BIT, ace2gfx_va, 1, 831bf215546Sopenharmony_ci cmd_buffer->gfx9_eop_bug_va); 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci /* Wait for ACE to finish, otherwise we may risk writing 0 to the semaphore 834bf215546Sopenharmony_ci * when ACE is still waiting for it. This may not happen in practice, but 835bf215546Sopenharmony_ci * better safe than sorry. 836bf215546Sopenharmony_ci */ 837bf215546Sopenharmony_ci radv_cp_wait_mem(main_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, ace2gfx_va, 1, 0xffffffff); 838bf215546Sopenharmony_ci 839bf215546Sopenharmony_ci /* GFX: clear GFX->ACE and ACE->GFX semaphores. */ 840bf215546Sopenharmony_ci radv_emit_clear_data(cmd_buffer, V_370_ME, gfx2ace_va, 8); 841bf215546Sopenharmony_ci } 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci device->ws->cs_add_buffers(ace_cs, cmd_buffer->cs); 844bf215546Sopenharmony_ci return device->ws->cs_finalize(ace_cs); 845bf215546Sopenharmony_ci} 846bf215546Sopenharmony_ci 847bf215546Sopenharmony_cistatic void 848bf215546Sopenharmony_ciradv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags) 849bf215546Sopenharmony_ci{ 850bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->thread_trace.bo)) { 851bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 852bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0)); 853bf215546Sopenharmony_ci } 854bf215546Sopenharmony_ci 855bf215546Sopenharmony_ci if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) { 856bf215546Sopenharmony_ci enum rgp_flush_bits sqtt_flush_bits = 0; 857bf215546Sopenharmony_ci assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)); 858bf215546Sopenharmony_ci 859bf215546Sopenharmony_ci radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4); 860bf215546Sopenharmony_ci 861bf215546Sopenharmony_ci /* Force wait for graphics or compute engines to be idle. */ 862bf215546Sopenharmony_ci si_cs_emit_cache_flush(cmd_buffer->cs, 863bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.gfx_level, 864bf215546Sopenharmony_ci &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va, 865bf215546Sopenharmony_ci radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits, 866bf215546Sopenharmony_ci cmd_buffer->gfx9_eop_bug_va); 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_ci if (cmd_buffer->state.graphics_pipeline && (flags & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) && 869bf215546Sopenharmony_ci radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_TASK)) { 870bf215546Sopenharmony_ci /* Force wait for compute engines to be idle on the internal cmdbuf. */ 871bf215546Sopenharmony_ci si_cs_emit_cache_flush(cmd_buffer->ace_internal.cs, 872bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, 0, 873bf215546Sopenharmony_ci true, RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0); 874bf215546Sopenharmony_ci } 875bf215546Sopenharmony_ci } 876bf215546Sopenharmony_ci 877bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->trace_bo)) 878bf215546Sopenharmony_ci radv_cmd_buffer_trace_emit(cmd_buffer); 879bf215546Sopenharmony_ci} 880bf215546Sopenharmony_ci 881bf215546Sopenharmony_cistatic void 882bf215546Sopenharmony_ciradv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline) 883bf215546Sopenharmony_ci{ 884bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 885bf215546Sopenharmony_ci enum amd_ip_type ring; 886bf215546Sopenharmony_ci uint32_t data[2]; 887bf215546Sopenharmony_ci uint64_t va; 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci va = radv_buffer_get_va(device->trace_bo); 890bf215546Sopenharmony_ci 891bf215546Sopenharmony_ci ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf); 892bf215546Sopenharmony_ci 893bf215546Sopenharmony_ci switch (ring) { 894bf215546Sopenharmony_ci case AMD_IP_GFX: 895bf215546Sopenharmony_ci va += 8; 896bf215546Sopenharmony_ci break; 897bf215546Sopenharmony_ci case AMD_IP_COMPUTE: 898bf215546Sopenharmony_ci va += 16; 899bf215546Sopenharmony_ci break; 900bf215546Sopenharmony_ci default: 901bf215546Sopenharmony_ci assert(!"invalid IP type"); 902bf215546Sopenharmony_ci } 903bf215546Sopenharmony_ci 904bf215546Sopenharmony_ci uint64_t pipeline_address = (uintptr_t)pipeline; 905bf215546Sopenharmony_ci data[0] = pipeline_address; 906bf215546Sopenharmony_ci data[1] = pipeline_address >> 32; 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); 909bf215546Sopenharmony_ci} 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_cistatic void 912bf215546Sopenharmony_ciradv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr) 913bf215546Sopenharmony_ci{ 914bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 915bf215546Sopenharmony_ci uint32_t data[2]; 916bf215546Sopenharmony_ci uint64_t va; 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci va = radv_buffer_get_va(device->trace_bo); 919bf215546Sopenharmony_ci va += 24; 920bf215546Sopenharmony_ci 921bf215546Sopenharmony_ci data[0] = vb_ptr; 922bf215546Sopenharmony_ci data[1] = vb_ptr >> 32; 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); 925bf215546Sopenharmony_ci} 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_cistatic void 928bf215546Sopenharmony_ciradv_save_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader_part *prolog) 929bf215546Sopenharmony_ci{ 930bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 931bf215546Sopenharmony_ci uint32_t data[2]; 932bf215546Sopenharmony_ci uint64_t va; 933bf215546Sopenharmony_ci 934bf215546Sopenharmony_ci va = radv_buffer_get_va(device->trace_bo); 935bf215546Sopenharmony_ci va += 32; 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci uint64_t prolog_address = (uintptr_t)prolog; 938bf215546Sopenharmony_ci data[0] = prolog_address; 939bf215546Sopenharmony_ci data[1] = prolog_address >> 32; 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_ci radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data); 942bf215546Sopenharmony_ci} 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_civoid 945bf215546Sopenharmony_ciradv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, 946bf215546Sopenharmony_ci struct radv_descriptor_set *set, unsigned idx) 947bf215546Sopenharmony_ci{ 948bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 949bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, bind_point); 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_ci descriptors_state->sets[idx] = set; 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci descriptors_state->valid |= (1u << idx); /* active descriptors */ 954bf215546Sopenharmony_ci descriptors_state->dirty |= (1u << idx); 955bf215546Sopenharmony_ci} 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_cistatic void 958bf215546Sopenharmony_ciradv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) 959bf215546Sopenharmony_ci{ 960bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 961bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, bind_point); 962bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 963bf215546Sopenharmony_ci uint32_t data[MAX_SETS * 2] = {0}; 964bf215546Sopenharmony_ci uint64_t va; 965bf215546Sopenharmony_ci va = radv_buffer_get_va(device->trace_bo) + 40; 966bf215546Sopenharmony_ci 967bf215546Sopenharmony_ci u_foreach_bit(i, descriptors_state->valid) 968bf215546Sopenharmony_ci { 969bf215546Sopenharmony_ci struct radv_descriptor_set *set = descriptors_state->sets[i]; 970bf215546Sopenharmony_ci data[i * 2] = (uint64_t)(uintptr_t)set; 971bf215546Sopenharmony_ci data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32; 972bf215546Sopenharmony_ci } 973bf215546Sopenharmony_ci 974bf215546Sopenharmony_ci radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data); 975bf215546Sopenharmony_ci} 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_cistruct radv_userdata_info * 978bf215546Sopenharmony_ciradv_lookup_user_sgpr(struct radv_pipeline *pipeline, gl_shader_stage stage, int idx) 979bf215546Sopenharmony_ci{ 980bf215546Sopenharmony_ci struct radv_shader *shader = radv_get_shader(pipeline, stage); 981bf215546Sopenharmony_ci return &shader->info.user_sgprs_locs.shader_data[idx]; 982bf215546Sopenharmony_ci} 983bf215546Sopenharmony_ci 984bf215546Sopenharmony_cistatic void 985bf215546Sopenharmony_ciradv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs, 986bf215546Sopenharmony_ci struct radv_pipeline *pipeline, gl_shader_stage stage, int idx, 987bf215546Sopenharmony_ci uint64_t va) 988bf215546Sopenharmony_ci{ 989bf215546Sopenharmony_ci struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); 990bf215546Sopenharmony_ci uint32_t base_reg = pipeline->user_data_0[stage]; 991bf215546Sopenharmony_ci if (loc->sgpr_idx == -1) 992bf215546Sopenharmony_ci return; 993bf215546Sopenharmony_ci 994bf215546Sopenharmony_ci assert(loc->num_sgprs == 1); 995bf215546Sopenharmony_ci 996bf215546Sopenharmony_ci radv_emit_shader_pointer(device, cs, base_reg + loc->sgpr_idx * 4, va, false); 997bf215546Sopenharmony_ci} 998bf215546Sopenharmony_ci 999bf215546Sopenharmony_cistatic void 1000bf215546Sopenharmony_ciradv_emit_descriptor_pointers(struct radv_device *device, struct radeon_cmdbuf *cs, 1001bf215546Sopenharmony_ci struct radv_pipeline *pipeline, 1002bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state, 1003bf215546Sopenharmony_ci gl_shader_stage stage) 1004bf215546Sopenharmony_ci{ 1005bf215546Sopenharmony_ci uint32_t sh_base = pipeline->user_data_0[stage]; 1006bf215546Sopenharmony_ci struct radv_userdata_locations *locs = &pipeline->shaders[stage]->info.user_sgprs_locs; 1007bf215546Sopenharmony_ci unsigned mask = locs->descriptor_sets_enabled; 1008bf215546Sopenharmony_ci 1009bf215546Sopenharmony_ci mask &= descriptors_state->dirty & descriptors_state->valid; 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_ci while (mask) { 1012bf215546Sopenharmony_ci int start, count; 1013bf215546Sopenharmony_ci 1014bf215546Sopenharmony_ci u_bit_scan_consecutive_range(&mask, &start, &count); 1015bf215546Sopenharmony_ci 1016bf215546Sopenharmony_ci struct radv_userdata_info *loc = &locs->descriptor_sets[start]; 1017bf215546Sopenharmony_ci unsigned sh_offset = sh_base + loc->sgpr_idx * 4; 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci radv_emit_shader_pointer_head(cs, sh_offset, count, true); 1020bf215546Sopenharmony_ci for (int i = 0; i < count; i++) { 1021bf215546Sopenharmony_ci struct radv_descriptor_set *set = descriptors_state->sets[start + i]; 1022bf215546Sopenharmony_ci 1023bf215546Sopenharmony_ci radv_emit_shader_pointer_body(device, cs, set->header.va, true); 1024bf215546Sopenharmony_ci } 1025bf215546Sopenharmony_ci } 1026bf215546Sopenharmony_ci} 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci/** 1029bf215546Sopenharmony_ci * Convert the user sample locations to hardware sample locations (the values 1030bf215546Sopenharmony_ci * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*). 1031bf215546Sopenharmony_ci */ 1032bf215546Sopenharmony_cistatic void 1033bf215546Sopenharmony_ciradv_convert_user_sample_locs(struct radv_sample_locations_state *state, uint32_t x, uint32_t y, 1034bf215546Sopenharmony_ci VkOffset2D *sample_locs) 1035bf215546Sopenharmony_ci{ 1036bf215546Sopenharmony_ci uint32_t x_offset = x % state->grid_size.width; 1037bf215546Sopenharmony_ci uint32_t y_offset = y % state->grid_size.height; 1038bf215546Sopenharmony_ci uint32_t num_samples = (uint32_t)state->per_pixel; 1039bf215546Sopenharmony_ci VkSampleLocationEXT *user_locs; 1040bf215546Sopenharmony_ci uint32_t pixel_offset; 1041bf215546Sopenharmony_ci 1042bf215546Sopenharmony_ci pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples; 1043bf215546Sopenharmony_ci 1044bf215546Sopenharmony_ci assert(pixel_offset <= MAX_SAMPLE_LOCATIONS); 1045bf215546Sopenharmony_ci user_locs = &state->locations[pixel_offset]; 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci for (uint32_t i = 0; i < num_samples; i++) { 1048bf215546Sopenharmony_ci float shifted_pos_x = user_locs[i].x - 0.5; 1049bf215546Sopenharmony_ci float shifted_pos_y = user_locs[i].y - 0.5; 1050bf215546Sopenharmony_ci 1051bf215546Sopenharmony_ci int32_t scaled_pos_x = floorf(shifted_pos_x * 16); 1052bf215546Sopenharmony_ci int32_t scaled_pos_y = floorf(shifted_pos_y * 16); 1053bf215546Sopenharmony_ci 1054bf215546Sopenharmony_ci sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7); 1055bf215546Sopenharmony_ci sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7); 1056bf215546Sopenharmony_ci } 1057bf215546Sopenharmony_ci} 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci/** 1060bf215546Sopenharmony_ci * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample 1061bf215546Sopenharmony_ci * locations. 1062bf215546Sopenharmony_ci */ 1063bf215546Sopenharmony_cistatic void 1064bf215546Sopenharmony_ciradv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs, 1065bf215546Sopenharmony_ci uint32_t *sample_locs_pixel) 1066bf215546Sopenharmony_ci{ 1067bf215546Sopenharmony_ci for (uint32_t i = 0; i < num_samples; i++) { 1068bf215546Sopenharmony_ci uint32_t sample_reg_idx = i / 4; 1069bf215546Sopenharmony_ci uint32_t sample_loc_idx = i % 4; 1070bf215546Sopenharmony_ci int32_t pos_x = sample_locs[i].x; 1071bf215546Sopenharmony_ci int32_t pos_y = sample_locs[i].y; 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci uint32_t shift_x = 8 * sample_loc_idx; 1074bf215546Sopenharmony_ci uint32_t shift_y = shift_x + 4; 1075bf215546Sopenharmony_ci 1076bf215546Sopenharmony_ci sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x; 1077bf215546Sopenharmony_ci sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y; 1078bf215546Sopenharmony_ci } 1079bf215546Sopenharmony_ci} 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_ci/** 1082bf215546Sopenharmony_ci * Compute the PA_SC_CENTROID_PRIORITY_* mask based on the top left hardware 1083bf215546Sopenharmony_ci * sample locations. 1084bf215546Sopenharmony_ci */ 1085bf215546Sopenharmony_cistatic uint64_t 1086bf215546Sopenharmony_ciradv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs, 1087bf215546Sopenharmony_ci uint32_t num_samples) 1088bf215546Sopenharmony_ci{ 1089bf215546Sopenharmony_ci uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities)); 1090bf215546Sopenharmony_ci uint32_t sample_mask = num_samples - 1; 1091bf215546Sopenharmony_ci uint32_t *distances = alloca(num_samples * sizeof(*distances)); 1092bf215546Sopenharmony_ci uint64_t centroid_priority = 0; 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_ci /* Compute the distances from center for each sample. */ 1095bf215546Sopenharmony_ci for (int i = 0; i < num_samples; i++) { 1096bf215546Sopenharmony_ci distances[i] = (sample_locs[i].x * sample_locs[i].x) + (sample_locs[i].y * sample_locs[i].y); 1097bf215546Sopenharmony_ci } 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci /* Compute the centroid priorities by looking at the distances array. */ 1100bf215546Sopenharmony_ci for (int i = 0; i < num_samples; i++) { 1101bf215546Sopenharmony_ci uint32_t min_idx = 0; 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci for (int j = 1; j < num_samples; j++) { 1104bf215546Sopenharmony_ci if (distances[j] < distances[min_idx]) 1105bf215546Sopenharmony_ci min_idx = j; 1106bf215546Sopenharmony_ci } 1107bf215546Sopenharmony_ci 1108bf215546Sopenharmony_ci centroid_priorities[i] = min_idx; 1109bf215546Sopenharmony_ci distances[min_idx] = 0xffffffff; 1110bf215546Sopenharmony_ci } 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci /* Compute the final centroid priority. */ 1113bf215546Sopenharmony_ci for (int i = 0; i < 8; i++) { 1114bf215546Sopenharmony_ci centroid_priority |= centroid_priorities[i & sample_mask] << (i * 4); 1115bf215546Sopenharmony_ci } 1116bf215546Sopenharmony_ci 1117bf215546Sopenharmony_ci return centroid_priority << 32 | centroid_priority; 1118bf215546Sopenharmony_ci} 1119bf215546Sopenharmony_ci 1120bf215546Sopenharmony_ci/** 1121bf215546Sopenharmony_ci * Emit the sample locations that are specified with VK_EXT_sample_locations. 1122bf215546Sopenharmony_ci */ 1123bf215546Sopenharmony_cistatic void 1124bf215546Sopenharmony_ciradv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer) 1125bf215546Sopenharmony_ci{ 1126bf215546Sopenharmony_ci struct radv_sample_locations_state *sample_location = &cmd_buffer->state.dynamic.sample_location; 1127bf215546Sopenharmony_ci uint32_t num_samples = (uint32_t)sample_location->per_pixel; 1128bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 1129bf215546Sopenharmony_ci uint32_t sample_locs_pixel[4][2] = {0}; 1130bf215546Sopenharmony_ci VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */ 1131bf215546Sopenharmony_ci uint32_t max_sample_dist = 0; 1132bf215546Sopenharmony_ci uint64_t centroid_priority; 1133bf215546Sopenharmony_ci 1134bf215546Sopenharmony_ci if (!cmd_buffer->state.dynamic.sample_location.count) 1135bf215546Sopenharmony_ci return; 1136bf215546Sopenharmony_ci 1137bf215546Sopenharmony_ci /* Convert the user sample locations to hardware sample locations. */ 1138bf215546Sopenharmony_ci radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]); 1139bf215546Sopenharmony_ci radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]); 1140bf215546Sopenharmony_ci radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]); 1141bf215546Sopenharmony_ci radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]); 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */ 1144bf215546Sopenharmony_ci for (uint32_t i = 0; i < 4; i++) { 1145bf215546Sopenharmony_ci radv_compute_sample_locs_pixel(num_samples, sample_locs[i], sample_locs_pixel[i]); 1146bf215546Sopenharmony_ci } 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */ 1149bf215546Sopenharmony_ci centroid_priority = radv_compute_centroid_priority(cmd_buffer, sample_locs[0], num_samples); 1150bf215546Sopenharmony_ci 1151bf215546Sopenharmony_ci /* Compute the maximum sample distance from the specified locations. */ 1152bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; ++i) { 1153bf215546Sopenharmony_ci for (uint32_t j = 0; j < num_samples; j++) { 1154bf215546Sopenharmony_ci VkOffset2D offset = sample_locs[i][j]; 1155bf215546Sopenharmony_ci max_sample_dist = MAX2(max_sample_dist, MAX2(abs(offset.x), abs(offset.y))); 1156bf215546Sopenharmony_ci } 1157bf215546Sopenharmony_ci } 1158bf215546Sopenharmony_ci 1159bf215546Sopenharmony_ci /* Emit the specified user sample locations. */ 1160bf215546Sopenharmony_ci switch (num_samples) { 1161bf215546Sopenharmony_ci case 2: 1162bf215546Sopenharmony_ci case 4: 1163bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 1164bf215546Sopenharmony_ci sample_locs_pixel[0][0]); 1165bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 1166bf215546Sopenharmony_ci sample_locs_pixel[1][0]); 1167bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 1168bf215546Sopenharmony_ci sample_locs_pixel[2][0]); 1169bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 1170bf215546Sopenharmony_ci sample_locs_pixel[3][0]); 1171bf215546Sopenharmony_ci break; 1172bf215546Sopenharmony_ci case 8: 1173bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0, 1174bf215546Sopenharmony_ci sample_locs_pixel[0][0]); 1175bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0, 1176bf215546Sopenharmony_ci sample_locs_pixel[1][0]); 1177bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0, 1178bf215546Sopenharmony_ci sample_locs_pixel[2][0]); 1179bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0, 1180bf215546Sopenharmony_ci sample_locs_pixel[3][0]); 1181bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1, 1182bf215546Sopenharmony_ci sample_locs_pixel[0][1]); 1183bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1, 1184bf215546Sopenharmony_ci sample_locs_pixel[1][1]); 1185bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1, 1186bf215546Sopenharmony_ci sample_locs_pixel[2][1]); 1187bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1, 1188bf215546Sopenharmony_ci sample_locs_pixel[3][1]); 1189bf215546Sopenharmony_ci break; 1190bf215546Sopenharmony_ci default: 1191bf215546Sopenharmony_ci unreachable("invalid number of samples"); 1192bf215546Sopenharmony_ci } 1193bf215546Sopenharmony_ci 1194bf215546Sopenharmony_ci /* Emit the maximum sample distance and the centroid priority. */ 1195bf215546Sopenharmony_ci radeon_set_context_reg_rmw(cs, R_028BE0_PA_SC_AA_CONFIG, 1196bf215546Sopenharmony_ci S_028BE0_MAX_SAMPLE_DIST(max_sample_dist), ~C_028BE0_MAX_SAMPLE_DIST); 1197bf215546Sopenharmony_ci 1198bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2); 1199bf215546Sopenharmony_ci radeon_emit(cs, centroid_priority); 1200bf215546Sopenharmony_ci radeon_emit(cs, centroid_priority >> 32); 1201bf215546Sopenharmony_ci 1202bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 1203bf215546Sopenharmony_ci} 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_cistatic void 1206bf215546Sopenharmony_ciradv_emit_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs, 1207bf215546Sopenharmony_ci struct radv_pipeline *pipeline, gl_shader_stage stage, int idx, 1208bf215546Sopenharmony_ci uint32_t *values) 1209bf215546Sopenharmony_ci{ 1210bf215546Sopenharmony_ci struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx); 1211bf215546Sopenharmony_ci uint32_t base_reg = pipeline->user_data_0[stage]; 1212bf215546Sopenharmony_ci if (loc->sgpr_idx == -1) 1213bf215546Sopenharmony_ci return; 1214bf215546Sopenharmony_ci 1215bf215546Sopenharmony_ci radeon_check_space(device->ws, cs, 2 + loc->num_sgprs); 1216bf215546Sopenharmony_ci 1217bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, base_reg + loc->sgpr_idx * 4, loc->num_sgprs); 1218bf215546Sopenharmony_ci radeon_emit_array(cs, values, loc->num_sgprs); 1219bf215546Sopenharmony_ci} 1220bf215546Sopenharmony_ci 1221bf215546Sopenharmony_cistatic void 1222bf215546Sopenharmony_ciradv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer, 1223bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline) 1224bf215546Sopenharmony_ci{ 1225bf215546Sopenharmony_ci int num_samples = pipeline->ms.num_samples; 1226bf215546Sopenharmony_ci struct radv_graphics_pipeline *old_pipeline = cmd_buffer->state.emitted_graphics_pipeline; 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_ci if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.needs_sample_positions) 1229bf215546Sopenharmony_ci cmd_buffer->sample_positions_needed = true; 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_ci if (old_pipeline && num_samples == old_pipeline->ms.num_samples) 1232bf215546Sopenharmony_ci return; 1233bf215546Sopenharmony_ci 1234bf215546Sopenharmony_ci radv_emit_default_sample_locations(cmd_buffer->cs, num_samples); 1235bf215546Sopenharmony_ci 1236bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 1237bf215546Sopenharmony_ci} 1238bf215546Sopenharmony_ci 1239bf215546Sopenharmony_cistatic void 1240bf215546Sopenharmony_ciradv_update_binning_state(struct radv_cmd_buffer *cmd_buffer, 1241bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline) 1242bf215546Sopenharmony_ci{ 1243bf215546Sopenharmony_ci const struct radv_graphics_pipeline *old_pipeline = cmd_buffer->state.emitted_graphics_pipeline; 1244bf215546Sopenharmony_ci 1245bf215546Sopenharmony_ci if (pipeline->base.device->physical_device->rad_info.gfx_level < GFX9) 1246bf215546Sopenharmony_ci return; 1247bf215546Sopenharmony_ci 1248bf215546Sopenharmony_ci if (old_pipeline && 1249bf215546Sopenharmony_ci old_pipeline->binning.pa_sc_binner_cntl_0 == 1250bf215546Sopenharmony_ci pipeline->binning.pa_sc_binner_cntl_0) 1251bf215546Sopenharmony_ci return; 1252bf215546Sopenharmony_ci 1253bf215546Sopenharmony_ci bool binning_flush = false; 1254bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA12 || 1255bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA20 || 1256bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN2 || 1257bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { 1258bf215546Sopenharmony_ci binning_flush = !old_pipeline || 1259bf215546Sopenharmony_ci G_028C44_BINNING_MODE(old_pipeline->binning.pa_sc_binner_cntl_0) != 1260bf215546Sopenharmony_ci G_028C44_BINNING_MODE(pipeline->binning.pa_sc_binner_cntl_0); 1261bf215546Sopenharmony_ci } 1262bf215546Sopenharmony_ci 1263bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0, 1264bf215546Sopenharmony_ci pipeline->binning.pa_sc_binner_cntl_0 | 1265bf215546Sopenharmony_ci S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush)); 1266bf215546Sopenharmony_ci 1267bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 1268bf215546Sopenharmony_ci} 1269bf215546Sopenharmony_ci 1270bf215546Sopenharmony_cistatic void 1271bf215546Sopenharmony_ciradv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader) 1272bf215546Sopenharmony_ci{ 1273bf215546Sopenharmony_ci uint64_t va; 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci if (!shader) 1276bf215546Sopenharmony_ci return; 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ci va = radv_shader_get_va(shader); 1279bf215546Sopenharmony_ci 1280bf215546Sopenharmony_ci si_cp_dma_prefetch(cmd_buffer, va, shader->code_size); 1281bf215546Sopenharmony_ci} 1282bf215546Sopenharmony_ci 1283bf215546Sopenharmony_cistatic void 1284bf215546Sopenharmony_ciradv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer, 1285bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline, bool first_stage_only) 1286bf215546Sopenharmony_ci{ 1287bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 1288bf215546Sopenharmony_ci uint32_t mask = state->prefetch_L2_mask; 1289bf215546Sopenharmony_ci 1290bf215546Sopenharmony_ci /* Fast prefetch path for starting draws as soon as possible. */ 1291bf215546Sopenharmony_ci if (first_stage_only) 1292bf215546Sopenharmony_ci mask &= RADV_PREFETCH_VS | RADV_PREFETCH_VBO_DESCRIPTORS | RADV_PREFETCH_MS; 1293bf215546Sopenharmony_ci 1294bf215546Sopenharmony_ci if (mask & RADV_PREFETCH_VS) 1295bf215546Sopenharmony_ci radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_VERTEX]); 1296bf215546Sopenharmony_ci 1297bf215546Sopenharmony_ci if (mask & RADV_PREFETCH_MS) 1298bf215546Sopenharmony_ci radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_MESH]); 1299bf215546Sopenharmony_ci 1300bf215546Sopenharmony_ci if (mask & RADV_PREFETCH_VBO_DESCRIPTORS) 1301bf215546Sopenharmony_ci si_cp_dma_prefetch(cmd_buffer, state->vb_va, pipeline->vb_desc_alloc_size); 1302bf215546Sopenharmony_ci 1303bf215546Sopenharmony_ci if (mask & RADV_PREFETCH_TCS) 1304bf215546Sopenharmony_ci radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_TESS_CTRL]); 1305bf215546Sopenharmony_ci 1306bf215546Sopenharmony_ci if (mask & RADV_PREFETCH_TES) 1307bf215546Sopenharmony_ci radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_TESS_EVAL]); 1308bf215546Sopenharmony_ci 1309bf215546Sopenharmony_ci if (mask & RADV_PREFETCH_GS) { 1310bf215546Sopenharmony_ci radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_GEOMETRY]); 1311bf215546Sopenharmony_ci if (radv_pipeline_has_gs_copy_shader(&pipeline->base)) 1312bf215546Sopenharmony_ci radv_emit_shader_prefetch(cmd_buffer, pipeline->base.gs_copy_shader); 1313bf215546Sopenharmony_ci } 1314bf215546Sopenharmony_ci 1315bf215546Sopenharmony_ci if (mask & RADV_PREFETCH_PS) 1316bf215546Sopenharmony_ci radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_FRAGMENT]); 1317bf215546Sopenharmony_ci 1318bf215546Sopenharmony_ci state->prefetch_L2_mask &= ~mask; 1319bf215546Sopenharmony_ci} 1320bf215546Sopenharmony_ci 1321bf215546Sopenharmony_cistatic void 1322bf215546Sopenharmony_ciradv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer) 1323bf215546Sopenharmony_ci{ 1324bf215546Sopenharmony_ci if (!cmd_buffer->device->physical_device->rad_info.rbplus_allowed) 1325bf215546Sopenharmony_ci return; 1326bf215546Sopenharmony_ci 1327bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 1328bf215546Sopenharmony_ci const struct radv_subpass *subpass = cmd_buffer->state.subpass; 1329bf215546Sopenharmony_ci 1330bf215546Sopenharmony_ci unsigned sx_ps_downconvert = 0; 1331bf215546Sopenharmony_ci unsigned sx_blend_opt_epsilon = 0; 1332bf215546Sopenharmony_ci unsigned sx_blend_opt_control = 0; 1333bf215546Sopenharmony_ci 1334bf215546Sopenharmony_ci for (unsigned i = 0; i < subpass->color_count; ++i) { 1335bf215546Sopenharmony_ci unsigned format, swap; 1336bf215546Sopenharmony_ci bool has_alpha, has_rgb; 1337bf215546Sopenharmony_ci if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { 1338bf215546Sopenharmony_ci /* We don't set the DISABLE bits, because the HW can't have holes, 1339bf215546Sopenharmony_ci * so the SPI color format is set to 32-bit 1-component. */ 1340bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 1341bf215546Sopenharmony_ci continue; 1342bf215546Sopenharmony_ci } 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_ci int idx = subpass->color_attachments[i].attachment; 1345bf215546Sopenharmony_ci if (cmd_buffer->state.attachments) { 1346bf215546Sopenharmony_ci struct radv_color_buffer_info *cb = &cmd_buffer->state.attachments[idx].cb; 1347bf215546Sopenharmony_ci 1348bf215546Sopenharmony_ci format = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 1349bf215546Sopenharmony_ci ? G_028C70_FORMAT_GFX11(cb->cb_color_info) 1350bf215546Sopenharmony_ci : G_028C70_FORMAT_GFX6(cb->cb_color_info); 1351bf215546Sopenharmony_ci swap = G_028C70_COMP_SWAP(cb->cb_color_info); 1352bf215546Sopenharmony_ci has_alpha = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 1353bf215546Sopenharmony_ci ? !G_028C74_FORCE_DST_ALPHA_1_GFX11(cb->cb_color_attrib) 1354bf215546Sopenharmony_ci : !G_028C74_FORCE_DST_ALPHA_1_GFX6(cb->cb_color_attrib); 1355bf215546Sopenharmony_ci } else { 1356bf215546Sopenharmony_ci VkFormat fmt = cmd_buffer->state.pass->attachments[idx].format; 1357bf215546Sopenharmony_ci format = radv_translate_colorformat(fmt); 1358bf215546Sopenharmony_ci swap = radv_translate_colorswap(fmt, false); 1359bf215546Sopenharmony_ci has_alpha = vk_format_description(fmt)->swizzle[3] != PIPE_SWIZZLE_1; 1360bf215546Sopenharmony_ci } 1361bf215546Sopenharmony_ci 1362bf215546Sopenharmony_ci uint32_t spi_format = (pipeline->col_format >> (i * 4)) & 0xf; 1363bf215546Sopenharmony_ci uint32_t colormask = (pipeline->cb_target_mask >> (i * 4)) & 0xf; 1364bf215546Sopenharmony_ci 1365bf215546Sopenharmony_ci if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || format == V_028C70_COLOR_32) 1366bf215546Sopenharmony_ci has_rgb = !has_alpha; 1367bf215546Sopenharmony_ci else 1368bf215546Sopenharmony_ci has_rgb = true; 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci /* Check the colormask and export format. */ 1371bf215546Sopenharmony_ci if (!(colormask & 0x7)) 1372bf215546Sopenharmony_ci has_rgb = false; 1373bf215546Sopenharmony_ci if (!(colormask & 0x8)) 1374bf215546Sopenharmony_ci has_alpha = false; 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_ZERO) { 1377bf215546Sopenharmony_ci has_rgb = false; 1378bf215546Sopenharmony_ci has_alpha = false; 1379bf215546Sopenharmony_ci } 1380bf215546Sopenharmony_ci 1381bf215546Sopenharmony_ci /* The HW doesn't quite blend correctly with rgb9e5 if we disable the alpha 1382bf215546Sopenharmony_ci * optimization, even though it has no alpha. */ 1383bf215546Sopenharmony_ci if (has_rgb && format == V_028C70_COLOR_5_9_9_9) 1384bf215546Sopenharmony_ci has_alpha = true; 1385bf215546Sopenharmony_ci 1386bf215546Sopenharmony_ci /* Disable value checking for disabled channels. */ 1387bf215546Sopenharmony_ci if (!has_rgb) 1388bf215546Sopenharmony_ci sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 1389bf215546Sopenharmony_ci if (!has_alpha) 1390bf215546Sopenharmony_ci sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 1391bf215546Sopenharmony_ci 1392bf215546Sopenharmony_ci /* Enable down-conversion for 32bpp and smaller formats. */ 1393bf215546Sopenharmony_ci switch (format) { 1394bf215546Sopenharmony_ci case V_028C70_COLOR_8: 1395bf215546Sopenharmony_ci case V_028C70_COLOR_8_8: 1396bf215546Sopenharmony_ci case V_028C70_COLOR_8_8_8_8: 1397bf215546Sopenharmony_ci /* For 1 and 2-channel formats, use the superset thereof. */ 1398bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 1399bf215546Sopenharmony_ci spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 1400bf215546Sopenharmony_ci spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 1401bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 1402bf215546Sopenharmony_ci sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 1403bf215546Sopenharmony_ci } 1404bf215546Sopenharmony_ci break; 1405bf215546Sopenharmony_ci 1406bf215546Sopenharmony_ci case V_028C70_COLOR_5_6_5: 1407bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 1408bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 1409bf215546Sopenharmony_ci sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 1410bf215546Sopenharmony_ci } 1411bf215546Sopenharmony_ci break; 1412bf215546Sopenharmony_ci 1413bf215546Sopenharmony_ci case V_028C70_COLOR_1_5_5_5: 1414bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 1415bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 1416bf215546Sopenharmony_ci sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 1417bf215546Sopenharmony_ci } 1418bf215546Sopenharmony_ci break; 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci case V_028C70_COLOR_4_4_4_4: 1421bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 1422bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 1423bf215546Sopenharmony_ci sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 1424bf215546Sopenharmony_ci } 1425bf215546Sopenharmony_ci break; 1426bf215546Sopenharmony_ci 1427bf215546Sopenharmony_ci case V_028C70_COLOR_32: 1428bf215546Sopenharmony_ci if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R) 1429bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 1430bf215546Sopenharmony_ci else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR) 1431bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 1432bf215546Sopenharmony_ci break; 1433bf215546Sopenharmony_ci 1434bf215546Sopenharmony_ci case V_028C70_COLOR_16: 1435bf215546Sopenharmony_ci case V_028C70_COLOR_16_16: 1436bf215546Sopenharmony_ci /* For 1-channel formats, use the superset thereof. */ 1437bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 1438bf215546Sopenharmony_ci spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 1439bf215546Sopenharmony_ci spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 1440bf215546Sopenharmony_ci spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 1441bf215546Sopenharmony_ci if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV) 1442bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 1443bf215546Sopenharmony_ci else 1444bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 1445bf215546Sopenharmony_ci } 1446bf215546Sopenharmony_ci break; 1447bf215546Sopenharmony_ci 1448bf215546Sopenharmony_ci case V_028C70_COLOR_10_11_11: 1449bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) 1450bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 1451bf215546Sopenharmony_ci break; 1452bf215546Sopenharmony_ci 1453bf215546Sopenharmony_ci case V_028C70_COLOR_2_10_10_10: 1454bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 1455bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 1456bf215546Sopenharmony_ci sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 1457bf215546Sopenharmony_ci } 1458bf215546Sopenharmony_ci break; 1459bf215546Sopenharmony_ci case V_028C70_COLOR_5_9_9_9: 1460bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) 1461bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4); 1462bf215546Sopenharmony_ci break; 1463bf215546Sopenharmony_ci } 1464bf215546Sopenharmony_ci } 1465bf215546Sopenharmony_ci 1466bf215546Sopenharmony_ci /* Do not set the DISABLE bits for the unused attachments, as that 1467bf215546Sopenharmony_ci * breaks dual source blending in SkQP and does not seem to improve 1468bf215546Sopenharmony_ci * performance. */ 1469bf215546Sopenharmony_ci 1470bf215546Sopenharmony_ci if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert && 1471bf215546Sopenharmony_ci sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon && 1472bf215546Sopenharmony_ci sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control) 1473bf215546Sopenharmony_ci return; 1474bf215546Sopenharmony_ci 1475bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3); 1476bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, sx_ps_downconvert); 1477bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon); 1478bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, sx_blend_opt_control); 1479bf215546Sopenharmony_ci 1480bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 1481bf215546Sopenharmony_ci 1482bf215546Sopenharmony_ci cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert; 1483bf215546Sopenharmony_ci cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon; 1484bf215546Sopenharmony_ci cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control; 1485bf215546Sopenharmony_ci} 1486bf215546Sopenharmony_ci 1487bf215546Sopenharmony_cistatic void 1488bf215546Sopenharmony_ciradv_emit_batch_break_on_new_ps(struct radv_cmd_buffer *cmd_buffer) 1489bf215546Sopenharmony_ci{ 1490bf215546Sopenharmony_ci if (!cmd_buffer->device->pbb_allowed) 1491bf215546Sopenharmony_ci return; 1492bf215546Sopenharmony_ci 1493bf215546Sopenharmony_ci struct radv_binning_settings settings = 1494bf215546Sopenharmony_ci radv_get_binning_settings(cmd_buffer->device->physical_device); 1495bf215546Sopenharmony_ci bool break_for_new_ps = 1496bf215546Sopenharmony_ci (!cmd_buffer->state.emitted_graphics_pipeline || 1497bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT] != 1498bf215546Sopenharmony_ci cmd_buffer->state.graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]) && 1499bf215546Sopenharmony_ci (settings.context_states_per_bin > 1 || settings.persistent_states_per_bin > 1); 1500bf215546Sopenharmony_ci bool break_for_new_cb_target_mask = 1501bf215546Sopenharmony_ci (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE) && 1502bf215546Sopenharmony_ci settings.context_states_per_bin > 1; 1503bf215546Sopenharmony_ci 1504bf215546Sopenharmony_ci if (!break_for_new_ps && !break_for_new_cb_target_mask) 1505bf215546Sopenharmony_ci return; 1506bf215546Sopenharmony_ci 1507bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 1508bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 1509bf215546Sopenharmony_ci} 1510bf215546Sopenharmony_ci 1511bf215546Sopenharmony_cistatic void 1512bf215546Sopenharmony_ciradv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer) 1513bf215546Sopenharmony_ci{ 1514bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_ci if (cmd_buffer->state.emitted_graphics_pipeline == pipeline) 1517bf215546Sopenharmony_ci return; 1518bf215546Sopenharmony_ci 1519bf215546Sopenharmony_ci radv_update_multisample_state(cmd_buffer, pipeline); 1520bf215546Sopenharmony_ci radv_update_binning_state(cmd_buffer, pipeline); 1521bf215546Sopenharmony_ci 1522bf215546Sopenharmony_ci cmd_buffer->scratch_size_per_wave_needed = 1523bf215546Sopenharmony_ci MAX2(cmd_buffer->scratch_size_per_wave_needed, pipeline->base.scratch_bytes_per_wave); 1524bf215546Sopenharmony_ci cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted, pipeline->base.max_waves); 1525bf215546Sopenharmony_ci 1526bf215546Sopenharmony_ci if (!cmd_buffer->state.emitted_graphics_pipeline || 1527bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->negative_one_to_one != pipeline->negative_one_to_one || 1528bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->depth_clamp_mode != pipeline->depth_clamp_mode) 1529bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT; 1530bf215546Sopenharmony_ci 1531bf215546Sopenharmony_ci if (!cmd_buffer->state.emitted_graphics_pipeline || 1532bf215546Sopenharmony_ci radv_rast_prim_is_points_or_lines(cmd_buffer->state.emitted_graphics_pipeline->rast_prim) != radv_rast_prim_is_points_or_lines(pipeline->rast_prim) || 1533bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->line_width != pipeline->line_width) 1534bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; 1535bf215546Sopenharmony_ci 1536bf215546Sopenharmony_ci if (!cmd_buffer->state.emitted_graphics_pipeline || 1537bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->pa_su_sc_mode_cntl != pipeline->pa_su_sc_mode_cntl) 1538bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | 1539bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE | 1540bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; 1541bf215546Sopenharmony_ci 1542bf215546Sopenharmony_ci if (!cmd_buffer->state.emitted_graphics_pipeline || 1543bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->pa_cl_clip_cntl != pipeline->pa_cl_clip_cntl) 1544bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE; 1545bf215546Sopenharmony_ci 1546bf215546Sopenharmony_ci if (!cmd_buffer->state.emitted_graphics_pipeline || 1547bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->cb_color_control != pipeline->cb_color_control) 1548bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP; 1549bf215546Sopenharmony_ci 1550bf215546Sopenharmony_ci if (!cmd_buffer->state.emitted_graphics_pipeline) 1551bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY | 1552bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS | 1553bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS | 1554bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE; 1555bf215546Sopenharmony_ci 1556bf215546Sopenharmony_ci if (!cmd_buffer->state.emitted_graphics_pipeline || 1557bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->db_depth_control != pipeline->db_depth_control) 1558bf215546Sopenharmony_ci cmd_buffer->state.dirty |= 1559bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | 1560bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | 1561bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; 1562bf215546Sopenharmony_ci 1563bf215546Sopenharmony_ci if (!cmd_buffer->state.emitted_graphics_pipeline) 1564bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; 1565bf215546Sopenharmony_ci 1566bf215546Sopenharmony_ci if (!cmd_buffer->state.emitted_graphics_pipeline || 1567bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->cb_target_mask != pipeline->cb_target_mask) { 1568bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE; 1569bf215546Sopenharmony_ci } 1570bf215546Sopenharmony_ci 1571bf215546Sopenharmony_ci radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw); 1572bf215546Sopenharmony_ci 1573bf215546Sopenharmony_ci if (pipeline->has_ngg_culling && 1574bf215546Sopenharmony_ci pipeline->last_vgt_api_stage != MESA_SHADER_GEOMETRY && 1575bf215546Sopenharmony_ci !cmd_buffer->state.last_nggc_settings) { 1576bf215546Sopenharmony_ci /* The already emitted RSRC2 contains the LDS required for NGG culling. 1577bf215546Sopenharmony_ci * Culling is currently disabled, so re-emit RSRC2 to reduce LDS usage. 1578bf215546Sopenharmony_ci * API GS always needs LDS, so this isn't useful there. 1579bf215546Sopenharmony_ci */ 1580bf215546Sopenharmony_ci struct radv_shader *v = pipeline->base.shaders[pipeline->last_vgt_api_stage]; 1581bf215546Sopenharmony_ci radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, 1582bf215546Sopenharmony_ci (v->config.rsrc2 & C_00B22C_LDS_SIZE) | 1583bf215546Sopenharmony_ci S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling)); 1584bf215546Sopenharmony_ci } 1585bf215546Sopenharmony_ci 1586bf215546Sopenharmony_ci if (!cmd_buffer->state.emitted_graphics_pipeline || 1587bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs.cdw != pipeline->base.ctx_cs.cdw || 1588bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs_hash != pipeline->base.ctx_cs_hash || 1589bf215546Sopenharmony_ci memcmp(cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs.buf, pipeline->base.ctx_cs.buf, 1590bf215546Sopenharmony_ci pipeline->base.ctx_cs.cdw * 4)) { 1591bf215546Sopenharmony_ci radeon_emit_array(cmd_buffer->cs, pipeline->base.ctx_cs.buf, pipeline->base.ctx_cs.cdw); 1592bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 1593bf215546Sopenharmony_ci } 1594bf215546Sopenharmony_ci 1595bf215546Sopenharmony_ci radv_emit_batch_break_on_new_ps(cmd_buffer); 1596bf215546Sopenharmony_ci 1597bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.slab_bo); 1598bf215546Sopenharmony_ci 1599bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->trace_bo)) 1600bf215546Sopenharmony_ci radv_save_pipeline(cmd_buffer, &pipeline->base); 1601bf215546Sopenharmony_ci 1602bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline = pipeline; 1603bf215546Sopenharmony_ci 1604bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE; 1605bf215546Sopenharmony_ci} 1606bf215546Sopenharmony_ci 1607bf215546Sopenharmony_cistatic void 1608bf215546Sopenharmony_ciradv_emit_viewport(struct radv_cmd_buffer *cmd_buffer) 1609bf215546Sopenharmony_ci{ 1610bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 1611bf215546Sopenharmony_ci const struct radv_viewport_state *viewport = &cmd_buffer->state.dynamic.viewport; 1612bf215546Sopenharmony_ci int i; 1613bf215546Sopenharmony_ci const unsigned count = viewport->count; 1614bf215546Sopenharmony_ci 1615bf215546Sopenharmony_ci assert(count); 1616bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_02843C_PA_CL_VPORT_XSCALE, count * 6); 1617bf215546Sopenharmony_ci 1618bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 1619bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].scale[0])); 1620bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].translate[0])); 1621bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].scale[1])); 1622bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].translate[1])); 1623bf215546Sopenharmony_ci 1624bf215546Sopenharmony_ci double scale_z, translate_z; 1625bf215546Sopenharmony_ci if (pipeline->negative_one_to_one) { 1626bf215546Sopenharmony_ci scale_z = viewport->xform[i].scale[2] * 0.5f; 1627bf215546Sopenharmony_ci translate_z = (viewport->xform[i].translate[2] + viewport->viewports[i].maxDepth) * 0.5f; 1628bf215546Sopenharmony_ci } else { 1629bf215546Sopenharmony_ci scale_z = viewport->xform[i].scale[2]; 1630bf215546Sopenharmony_ci translate_z = viewport->xform[i].translate[2]; 1631bf215546Sopenharmony_ci 1632bf215546Sopenharmony_ci } 1633bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(scale_z)); 1634bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(translate_z)); 1635bf215546Sopenharmony_ci } 1636bf215546Sopenharmony_ci 1637bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_0282D0_PA_SC_VPORT_ZMIN_0, count * 2); 1638bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 1639bf215546Sopenharmony_ci float zmin, zmax; 1640bf215546Sopenharmony_ci 1641bf215546Sopenharmony_ci if (pipeline->depth_clamp_mode == RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE) { 1642bf215546Sopenharmony_ci zmin = 0.0f; 1643bf215546Sopenharmony_ci zmax = 1.0f; 1644bf215546Sopenharmony_ci } else { 1645bf215546Sopenharmony_ci zmin = MIN2(viewport->viewports[i].minDepth, viewport->viewports[i].maxDepth); 1646bf215546Sopenharmony_ci zmax = MAX2(viewport->viewports[i].minDepth, viewport->viewports[i].maxDepth); 1647bf215546Sopenharmony_ci } 1648bf215546Sopenharmony_ci 1649bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(zmin)); 1650bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(zmax)); 1651bf215546Sopenharmony_ci } 1652bf215546Sopenharmony_ci} 1653bf215546Sopenharmony_ci 1654bf215546Sopenharmony_civoid 1655bf215546Sopenharmony_ciradv_write_scissors(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs) 1656bf215546Sopenharmony_ci{ 1657bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 1658bf215546Sopenharmony_ci uint32_t count = cmd_buffer->state.dynamic.scissor.count; 1659bf215546Sopenharmony_ci unsigned rast_prim; 1660bf215546Sopenharmony_ci 1661bf215546Sopenharmony_ci if (!(pipeline->dynamic_states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) || 1662bf215546Sopenharmony_ci (pipeline->active_stages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | 1663bf215546Sopenharmony_ci VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT | 1664bf215546Sopenharmony_ci VK_SHADER_STAGE_GEOMETRY_BIT | 1665bf215546Sopenharmony_ci VK_SHADER_STAGE_MESH_BIT_NV))) { 1666bf215546Sopenharmony_ci /* Ignore dynamic primitive topology for TES/GS/MS stages. */ 1667bf215546Sopenharmony_ci rast_prim = pipeline->rast_prim; 1668bf215546Sopenharmony_ci } else { 1669bf215546Sopenharmony_ci rast_prim = si_conv_prim_to_gs_out(cmd_buffer->state.dynamic.primitive_topology); 1670bf215546Sopenharmony_ci } 1671bf215546Sopenharmony_ci 1672bf215546Sopenharmony_ci si_write_scissors(cs, 0, count, cmd_buffer->state.dynamic.scissor.scissors, 1673bf215546Sopenharmony_ci cmd_buffer->state.dynamic.viewport.viewports, rast_prim, 1674bf215546Sopenharmony_ci cmd_buffer->state.dynamic.line_width); 1675bf215546Sopenharmony_ci} 1676bf215546Sopenharmony_ci 1677bf215546Sopenharmony_cistatic void 1678bf215546Sopenharmony_ciradv_emit_scissor(struct radv_cmd_buffer *cmd_buffer) 1679bf215546Sopenharmony_ci{ 1680bf215546Sopenharmony_ci radv_write_scissors(cmd_buffer, cmd_buffer->cs); 1681bf215546Sopenharmony_ci 1682bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = false; 1683bf215546Sopenharmony_ci} 1684bf215546Sopenharmony_ci 1685bf215546Sopenharmony_cistatic void 1686bf215546Sopenharmony_ciradv_emit_discard_rectangle(struct radv_cmd_buffer *cmd_buffer) 1687bf215546Sopenharmony_ci{ 1688bf215546Sopenharmony_ci if (!cmd_buffer->state.dynamic.discard_rectangle.count) 1689bf215546Sopenharmony_ci return; 1690bf215546Sopenharmony_ci 1691bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL, 1692bf215546Sopenharmony_ci cmd_buffer->state.dynamic.discard_rectangle.count * 2); 1693bf215546Sopenharmony_ci for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) { 1694bf215546Sopenharmony_ci VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i]; 1695bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y)); 1696bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) | 1697bf215546Sopenharmony_ci S_028214_BR_Y(rect.offset.y + rect.extent.height)); 1698bf215546Sopenharmony_ci } 1699bf215546Sopenharmony_ci} 1700bf215546Sopenharmony_ci 1701bf215546Sopenharmony_cistatic void 1702bf215546Sopenharmony_ciradv_emit_line_width(struct radv_cmd_buffer *cmd_buffer) 1703bf215546Sopenharmony_ci{ 1704bf215546Sopenharmony_ci unsigned width = cmd_buffer->state.dynamic.line_width * 8; 1705bf215546Sopenharmony_ci 1706bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL, 1707bf215546Sopenharmony_ci S_028A08_WIDTH(CLAMP(width, 0, 0xFFFF))); 1708bf215546Sopenharmony_ci} 1709bf215546Sopenharmony_ci 1710bf215546Sopenharmony_cistatic void 1711bf215546Sopenharmony_ciradv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer) 1712bf215546Sopenharmony_ci{ 1713bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1714bf215546Sopenharmony_ci 1715bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4); 1716bf215546Sopenharmony_ci radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4); 1717bf215546Sopenharmony_ci} 1718bf215546Sopenharmony_ci 1719bf215546Sopenharmony_cistatic void 1720bf215546Sopenharmony_ciradv_emit_stencil(struct radv_cmd_buffer *cmd_buffer) 1721bf215546Sopenharmony_ci{ 1722bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1723bf215546Sopenharmony_ci 1724bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028430_DB_STENCILREFMASK, 2); 1725bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028430_STENCILTESTVAL(d->stencil_reference.front) | 1726bf215546Sopenharmony_ci S_028430_STENCILMASK(d->stencil_compare_mask.front) | 1727bf215546Sopenharmony_ci S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) | 1728bf215546Sopenharmony_ci S_028430_STENCILOPVAL(1)); 1729bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) | 1730bf215546Sopenharmony_ci S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) | 1731bf215546Sopenharmony_ci S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) | 1732bf215546Sopenharmony_ci S_028434_STENCILOPVAL_BF(1)); 1733bf215546Sopenharmony_ci} 1734bf215546Sopenharmony_ci 1735bf215546Sopenharmony_cistatic void 1736bf215546Sopenharmony_ciradv_emit_depth_bounds(struct radv_cmd_buffer *cmd_buffer) 1737bf215546Sopenharmony_ci{ 1738bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1739bf215546Sopenharmony_ci 1740bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, 2); 1741bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(d->depth_bounds.min)); 1742bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(d->depth_bounds.max)); 1743bf215546Sopenharmony_ci} 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_cistatic void 1746bf215546Sopenharmony_ciradv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer) 1747bf215546Sopenharmony_ci{ 1748bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1749bf215546Sopenharmony_ci unsigned slope = fui(d->depth_bias.slope * 16.0f); 1750bf215546Sopenharmony_ci 1751bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5); 1752bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */ 1753bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, slope); /* FRONT SCALE */ 1754bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* FRONT OFFSET */ 1755bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, slope); /* BACK SCALE */ 1756bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias)); /* BACK OFFSET */ 1757bf215546Sopenharmony_ci} 1758bf215546Sopenharmony_ci 1759bf215546Sopenharmony_cistatic void 1760bf215546Sopenharmony_ciradv_emit_line_stipple(struct radv_cmd_buffer *cmd_buffer) 1761bf215546Sopenharmony_ci{ 1762bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1763bf215546Sopenharmony_ci uint32_t auto_reset_cntl = 1; 1764bf215546Sopenharmony_ci 1765bf215546Sopenharmony_ci if (d->primitive_topology == V_008958_DI_PT_LINESTRIP) 1766bf215546Sopenharmony_ci auto_reset_cntl = 2; 1767bf215546Sopenharmony_ci 1768bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028A0C_PA_SC_LINE_STIPPLE, 1769bf215546Sopenharmony_ci S_028A0C_LINE_PATTERN(d->line_stipple.pattern) | 1770bf215546Sopenharmony_ci S_028A0C_REPEAT_COUNT(d->line_stipple.factor - 1) | 1771bf215546Sopenharmony_ci S_028A0C_AUTO_RESET_CNTL(auto_reset_cntl)); 1772bf215546Sopenharmony_ci} 1773bf215546Sopenharmony_ci 1774bf215546Sopenharmony_ciuint32_t 1775bf215546Sopenharmony_ciradv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer) 1776bf215546Sopenharmony_ci{ 1777bf215546Sopenharmony_ci unsigned pa_su_sc_mode_cntl = cmd_buffer->state.graphics_pipeline->pa_su_sc_mode_cntl; 1778bf215546Sopenharmony_ci const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1779bf215546Sopenharmony_ci 1780bf215546Sopenharmony_ci pa_su_sc_mode_cntl &= C_028814_CULL_FRONT & 1781bf215546Sopenharmony_ci C_028814_CULL_BACK & 1782bf215546Sopenharmony_ci C_028814_FACE & 1783bf215546Sopenharmony_ci C_028814_POLY_OFFSET_FRONT_ENABLE & 1784bf215546Sopenharmony_ci C_028814_POLY_OFFSET_BACK_ENABLE & 1785bf215546Sopenharmony_ci C_028814_POLY_OFFSET_PARA_ENABLE; 1786bf215546Sopenharmony_ci 1787bf215546Sopenharmony_ci pa_su_sc_mode_cntl |= S_028814_CULL_FRONT(!!(d->cull_mode & VK_CULL_MODE_FRONT_BIT)) | 1788bf215546Sopenharmony_ci S_028814_CULL_BACK(!!(d->cull_mode & VK_CULL_MODE_BACK_BIT)) | 1789bf215546Sopenharmony_ci S_028814_FACE(d->front_face) | 1790bf215546Sopenharmony_ci S_028814_POLY_OFFSET_FRONT_ENABLE(d->depth_bias_enable) | 1791bf215546Sopenharmony_ci S_028814_POLY_OFFSET_BACK_ENABLE(d->depth_bias_enable) | 1792bf215546Sopenharmony_ci S_028814_POLY_OFFSET_PARA_ENABLE(d->depth_bias_enable); 1793bf215546Sopenharmony_ci return pa_su_sc_mode_cntl; 1794bf215546Sopenharmony_ci} 1795bf215546Sopenharmony_ci 1796bf215546Sopenharmony_cistatic void 1797bf215546Sopenharmony_ciradv_emit_culling(struct radv_cmd_buffer *cmd_buffer, uint64_t states) 1798bf215546Sopenharmony_ci{ 1799bf215546Sopenharmony_ci unsigned pa_su_sc_mode_cntl = radv_get_pa_su_sc_mode_cntl(cmd_buffer); 1800bf215546Sopenharmony_ci 1801bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl); 1802bf215546Sopenharmony_ci} 1803bf215546Sopenharmony_ci 1804bf215546Sopenharmony_cistatic void 1805bf215546Sopenharmony_ciradv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer) 1806bf215546Sopenharmony_ci{ 1807bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1808bf215546Sopenharmony_ci 1809bf215546Sopenharmony_ci assert(!cmd_buffer->state.mesh_shading); 1810bf215546Sopenharmony_ci 1811bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { 1812bf215546Sopenharmony_ci radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs, 1813bf215546Sopenharmony_ci R_030908_VGT_PRIMITIVE_TYPE, 1, d->primitive_topology); 1814bf215546Sopenharmony_ci } else { 1815bf215546Sopenharmony_ci radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, d->primitive_topology); 1816bf215546Sopenharmony_ci } 1817bf215546Sopenharmony_ci} 1818bf215546Sopenharmony_ci 1819bf215546Sopenharmony_cistatic void 1820bf215546Sopenharmony_ciradv_emit_depth_control(struct radv_cmd_buffer *cmd_buffer, uint64_t states) 1821bf215546Sopenharmony_ci{ 1822bf215546Sopenharmony_ci unsigned db_depth_control = cmd_buffer->state.graphics_pipeline->db_depth_control; 1823bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1824bf215546Sopenharmony_ci 1825bf215546Sopenharmony_ci db_depth_control &= C_028800_Z_ENABLE & 1826bf215546Sopenharmony_ci C_028800_Z_WRITE_ENABLE & 1827bf215546Sopenharmony_ci C_028800_ZFUNC & 1828bf215546Sopenharmony_ci C_028800_DEPTH_BOUNDS_ENABLE & 1829bf215546Sopenharmony_ci C_028800_STENCIL_ENABLE & 1830bf215546Sopenharmony_ci C_028800_BACKFACE_ENABLE & 1831bf215546Sopenharmony_ci C_028800_STENCILFUNC & 1832bf215546Sopenharmony_ci C_028800_STENCILFUNC_BF; 1833bf215546Sopenharmony_ci 1834bf215546Sopenharmony_ci db_depth_control |= S_028800_Z_ENABLE(d->depth_test_enable ? 1 : 0) | 1835bf215546Sopenharmony_ci S_028800_Z_WRITE_ENABLE(d->depth_write_enable ? 1 : 0) | 1836bf215546Sopenharmony_ci S_028800_ZFUNC(d->depth_compare_op) | 1837bf215546Sopenharmony_ci S_028800_DEPTH_BOUNDS_ENABLE(d->depth_bounds_test_enable ? 1 : 0) | 1838bf215546Sopenharmony_ci S_028800_STENCIL_ENABLE(d->stencil_test_enable ? 1 : 0) | 1839bf215546Sopenharmony_ci S_028800_BACKFACE_ENABLE(d->stencil_test_enable ? 1 : 0) | 1840bf215546Sopenharmony_ci S_028800_STENCILFUNC(d->stencil_op.front.compare_op) | 1841bf215546Sopenharmony_ci S_028800_STENCILFUNC_BF(d->stencil_op.back.compare_op); 1842bf215546Sopenharmony_ci 1843bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1844bf215546Sopenharmony_ci} 1845bf215546Sopenharmony_ci 1846bf215546Sopenharmony_cistatic void 1847bf215546Sopenharmony_ciradv_emit_stencil_control(struct radv_cmd_buffer *cmd_buffer) 1848bf215546Sopenharmony_ci{ 1849bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1850bf215546Sopenharmony_ci 1851bf215546Sopenharmony_ci radeon_set_context_reg( 1852bf215546Sopenharmony_ci cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL, 1853bf215546Sopenharmony_ci S_02842C_STENCILFAIL(si_translate_stencil_op(d->stencil_op.front.fail_op)) | 1854bf215546Sopenharmony_ci S_02842C_STENCILZPASS(si_translate_stencil_op(d->stencil_op.front.pass_op)) | 1855bf215546Sopenharmony_ci S_02842C_STENCILZFAIL(si_translate_stencil_op(d->stencil_op.front.depth_fail_op)) | 1856bf215546Sopenharmony_ci S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->stencil_op.back.fail_op)) | 1857bf215546Sopenharmony_ci S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->stencil_op.back.pass_op)) | 1858bf215546Sopenharmony_ci S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op))); 1859bf215546Sopenharmony_ci} 1860bf215546Sopenharmony_ci 1861bf215546Sopenharmony_cistatic void 1862bf215546Sopenharmony_ciradv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer) 1863bf215546Sopenharmony_ci{ 1864bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 1865bf215546Sopenharmony_ci const struct radv_subpass *subpass = cmd_buffer->state.subpass; 1866bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1867bf215546Sopenharmony_ci uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1; 1868bf215546Sopenharmony_ci uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1; 1869bf215546Sopenharmony_ci uint32_t pa_cl_vrs_cntl = pipeline->vrs.pa_cl_vrs_cntl; 1870bf215546Sopenharmony_ci uint32_t pipeline_comb_mode = d->fragment_shading_rate.combiner_ops[0]; 1871bf215546Sopenharmony_ci uint32_t htile_comb_mode = d->fragment_shading_rate.combiner_ops[1]; 1872bf215546Sopenharmony_ci 1873bf215546Sopenharmony_ci assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3); 1874bf215546Sopenharmony_ci 1875bf215546Sopenharmony_ci if (subpass && !subpass->vrs_attachment) { 1876bf215546Sopenharmony_ci /* When the current subpass has no VRS attachment, the VRS rates are expected to be 1x1, so we 1877bf215546Sopenharmony_ci * can cheat by tweaking the different combiner modes. 1878bf215546Sopenharmony_ci */ 1879bf215546Sopenharmony_ci switch (htile_comb_mode) { 1880bf215546Sopenharmony_ci case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR: 1881bf215546Sopenharmony_ci /* The result of min(A, 1x1) is always 1x1. */ 1882bf215546Sopenharmony_ci FALLTHROUGH; 1883bf215546Sopenharmony_ci case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR: 1884bf215546Sopenharmony_ci /* Force the per-draw VRS rate to 1x1. */ 1885bf215546Sopenharmony_ci rate_x = rate_y = 0; 1886bf215546Sopenharmony_ci 1887bf215546Sopenharmony_ci /* As the result of min(A, 1x1) or replace(A, 1x1) are always 1x1, set the vertex rate 1888bf215546Sopenharmony_ci * combiner mode as passthrough. 1889bf215546Sopenharmony_ci */ 1890bf215546Sopenharmony_ci pipeline_comb_mode = V_028848_VRS_COMB_MODE_PASSTHRU; 1891bf215546Sopenharmony_ci break; 1892bf215546Sopenharmony_ci case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR: 1893bf215546Sopenharmony_ci /* The result of max(A, 1x1) is always A. */ 1894bf215546Sopenharmony_ci FALLTHROUGH; 1895bf215546Sopenharmony_ci case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR: 1896bf215546Sopenharmony_ci /* Nothing to do here because the SAMPLE_ITER combiner mode should already be passthrough. */ 1897bf215546Sopenharmony_ci break; 1898bf215546Sopenharmony_ci default: 1899bf215546Sopenharmony_ci break; 1900bf215546Sopenharmony_ci } 1901bf215546Sopenharmony_ci } 1902bf215546Sopenharmony_ci 1903bf215546Sopenharmony_ci /* Emit per-draw VRS rate which is the first combiner. */ 1904bf215546Sopenharmony_ci radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE, 1905bf215546Sopenharmony_ci S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y)); 1906bf215546Sopenharmony_ci 1907bf215546Sopenharmony_ci /* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the 1908bf215546Sopenharmony_ci * draw rate and the vertex rate. 1909bf215546Sopenharmony_ci */ 1910bf215546Sopenharmony_ci if (cmd_buffer->state.mesh_shading) { 1911bf215546Sopenharmony_ci pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU) | 1912bf215546Sopenharmony_ci S_028848_PRIMITIVE_RATE_COMBINER_MODE(pipeline_comb_mode); 1913bf215546Sopenharmony_ci } else { 1914bf215546Sopenharmony_ci pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(pipeline_comb_mode) | 1915bf215546Sopenharmony_ci S_028848_PRIMITIVE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU); 1916bf215546Sopenharmony_ci } 1917bf215546Sopenharmony_ci 1918bf215546Sopenharmony_ci /* HTILE_RATE_COMBINER_MODE controls the combiner mode between the primitive rate and the HTILE 1919bf215546Sopenharmony_ci * rate. 1920bf215546Sopenharmony_ci */ 1921bf215546Sopenharmony_ci pa_cl_vrs_cntl |= S_028848_HTILE_RATE_COMBINER_MODE(htile_comb_mode); 1922bf215546Sopenharmony_ci 1923bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl); 1924bf215546Sopenharmony_ci} 1925bf215546Sopenharmony_ci 1926bf215546Sopenharmony_cistatic void 1927bf215546Sopenharmony_ciradv_emit_primitive_restart_enable(struct radv_cmd_buffer *cmd_buffer) 1928bf215546Sopenharmony_ci{ 1929bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1930bf215546Sopenharmony_ci 1931bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { 1932bf215546Sopenharmony_ci radeon_set_uconfig_reg(cmd_buffer->cs, R_03092C_GE_MULTI_PRIM_IB_RESET_EN, 1933bf215546Sopenharmony_ci d->primitive_restart_enable); 1934bf215546Sopenharmony_ci } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { 1935bf215546Sopenharmony_ci radeon_set_uconfig_reg(cmd_buffer->cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN, 1936bf215546Sopenharmony_ci d->primitive_restart_enable); 1937bf215546Sopenharmony_ci } else { 1938bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN, 1939bf215546Sopenharmony_ci d->primitive_restart_enable); 1940bf215546Sopenharmony_ci } 1941bf215546Sopenharmony_ci} 1942bf215546Sopenharmony_ci 1943bf215546Sopenharmony_cistatic void 1944bf215546Sopenharmony_ciradv_emit_rasterizer_discard_enable(struct radv_cmd_buffer *cmd_buffer) 1945bf215546Sopenharmony_ci{ 1946bf215546Sopenharmony_ci unsigned pa_cl_clip_cntl = cmd_buffer->state.graphics_pipeline->pa_cl_clip_cntl; 1947bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1948bf215546Sopenharmony_ci 1949bf215546Sopenharmony_ci pa_cl_clip_cntl &= C_028810_DX_RASTERIZATION_KILL; 1950bf215546Sopenharmony_ci pa_cl_clip_cntl |= S_028810_DX_RASTERIZATION_KILL(d->rasterizer_discard_enable); 1951bf215546Sopenharmony_ci 1952bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL, pa_cl_clip_cntl); 1953bf215546Sopenharmony_ci} 1954bf215546Sopenharmony_ci 1955bf215546Sopenharmony_cistatic void 1956bf215546Sopenharmony_ciradv_emit_logic_op(struct radv_cmd_buffer *cmd_buffer) 1957bf215546Sopenharmony_ci{ 1958bf215546Sopenharmony_ci unsigned cb_color_control = cmd_buffer->state.graphics_pipeline->cb_color_control; 1959bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1960bf215546Sopenharmony_ci 1961bf215546Sopenharmony_ci cb_color_control &= C_028808_ROP3; 1962bf215546Sopenharmony_ci cb_color_control |= S_028808_ROP3(d->logic_op); 1963bf215546Sopenharmony_ci 1964bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028808_CB_COLOR_CONTROL, cb_color_control); 1965bf215546Sopenharmony_ci} 1966bf215546Sopenharmony_ci 1967bf215546Sopenharmony_cistatic void 1968bf215546Sopenharmony_ciradv_emit_color_write_enable(struct radv_cmd_buffer *cmd_buffer) 1969bf215546Sopenharmony_ci{ 1970bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 1971bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 1972bf215546Sopenharmony_ci 1973bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK, 1974bf215546Sopenharmony_ci pipeline->cb_target_mask & d->color_write_enable); 1975bf215546Sopenharmony_ci} 1976bf215546Sopenharmony_ci 1977bf215546Sopenharmony_cistatic void 1978bf215546Sopenharmony_ciradv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index, 1979bf215546Sopenharmony_ci struct radv_color_buffer_info *cb, struct radv_image_view *iview, 1980bf215546Sopenharmony_ci VkImageLayout layout, bool in_render_loop) 1981bf215546Sopenharmony_ci{ 1982bf215546Sopenharmony_ci bool is_vi = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8; 1983bf215546Sopenharmony_ci uint32_t cb_fdcc_control = cb->cb_dcc_control; 1984bf215546Sopenharmony_ci uint32_t cb_color_info = cb->cb_color_info; 1985bf215546Sopenharmony_ci struct radv_image *image = iview->image; 1986bf215546Sopenharmony_ci 1987bf215546Sopenharmony_ci if (!radv_layout_dcc_compressed( 1988bf215546Sopenharmony_ci cmd_buffer->device, image, iview->vk.base_mip_level, layout, in_render_loop, 1989bf215546Sopenharmony_ci radv_image_queue_family_mask(image, cmd_buffer->qf, 1990bf215546Sopenharmony_ci cmd_buffer->qf))) { 1991bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { 1992bf215546Sopenharmony_ci cb_fdcc_control &= C_028C78_FDCC_ENABLE; 1993bf215546Sopenharmony_ci } else { 1994bf215546Sopenharmony_ci cb_color_info &= C_028C70_DCC_ENABLE; 1995bf215546Sopenharmony_ci } 1996bf215546Sopenharmony_ci } 1997bf215546Sopenharmony_ci 1998bf215546Sopenharmony_ci if (!radv_layout_fmask_compressed( 1999bf215546Sopenharmony_ci cmd_buffer->device, image, layout, 2000bf215546Sopenharmony_ci radv_image_queue_family_mask(image, cmd_buffer->qf, 2001bf215546Sopenharmony_ci cmd_buffer->qf))) { 2002bf215546Sopenharmony_ci cb_color_info &= C_028C70_COMPRESSION; 2003bf215546Sopenharmony_ci } 2004bf215546Sopenharmony_ci 2005bf215546Sopenharmony_ci if (radv_image_is_tc_compat_cmask(image) && (radv_is_fmask_decompress_pipeline(cmd_buffer) || 2006bf215546Sopenharmony_ci radv_is_dcc_decompress_pipeline(cmd_buffer))) { 2007bf215546Sopenharmony_ci /* If this bit is set, the FMASK decompression operation 2008bf215546Sopenharmony_ci * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS). 2009bf215546Sopenharmony_ci */ 2010bf215546Sopenharmony_ci cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY; 2011bf215546Sopenharmony_ci } 2012bf215546Sopenharmony_ci 2013bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { 2014bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028C6C_CB_COLOR0_VIEW + index * 0x3c, 4); 2015bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_view); /* CB_COLOR0_VIEW */ 2016bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_info); /* CB_COLOR0_INFO */ 2017bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); /* CB_COLOR0_ATTRIB */ 2018bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb_fdcc_control); /* CB_COLOR0_FDCC_CONTROL */ 2019bf215546Sopenharmony_ci 2020bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, cb->cb_color_base); 2021bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, cb->cb_color_base >> 32); 2022bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); 2023bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, cb->cb_dcc_base >> 32); 2024bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, cb->cb_color_attrib2); 2025bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, cb->cb_color_attrib3); 2026bf215546Sopenharmony_ci } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { 2027bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); 2028bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_base); 2029bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0); 2030bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0); 2031bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_view); 2032bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb_color_info); 2033bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); 2034bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); 2035bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); 2036bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0); 2037bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); 2038bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0); 2039bf215546Sopenharmony_ci 2040bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base); 2041bf215546Sopenharmony_ci 2042bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, 2043bf215546Sopenharmony_ci cb->cb_color_base >> 32); 2044bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4, 2045bf215546Sopenharmony_ci cb->cb_color_cmask >> 32); 2046bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4, 2047bf215546Sopenharmony_ci cb->cb_color_fmask >> 32); 2048bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, 2049bf215546Sopenharmony_ci cb->cb_dcc_base >> 32); 2050bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, 2051bf215546Sopenharmony_ci cb->cb_color_attrib2); 2052bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, 2053bf215546Sopenharmony_ci cb->cb_color_attrib3); 2054bf215546Sopenharmony_ci } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) { 2055bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); 2056bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_base); 2057bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32)); 2058bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2); 2059bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_view); 2060bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb_color_info); 2061bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); 2062bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); 2063bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); 2064bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028C80_BASE_256B(cb->cb_color_cmask >> 32)); 2065bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); 2066bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028C88_BASE_256B(cb->cb_color_fmask >> 32)); 2067bf215546Sopenharmony_ci 2068bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2); 2069bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_dcc_base); 2070bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32)); 2071bf215546Sopenharmony_ci 2072bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4, 2073bf215546Sopenharmony_ci cb->cb_mrt_epitch); 2074bf215546Sopenharmony_ci } else { 2075bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11); 2076bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_base); 2077bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_pitch); 2078bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_slice); 2079bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_view); 2080bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb_color_info); 2081bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_attrib); 2082bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_dcc_control); 2083bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_cmask); 2084bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice); 2085bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_fmask); 2086bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice); 2087bf215546Sopenharmony_ci 2088bf215546Sopenharmony_ci if (is_vi) { /* DCC BASE */ 2089bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2090bf215546Sopenharmony_ci cb->cb_dcc_base); 2091bf215546Sopenharmony_ci } 2092bf215546Sopenharmony_ci } 2093bf215546Sopenharmony_ci 2094bf215546Sopenharmony_ci if (G_028C70_DCC_ENABLE(cb_color_info)) { 2095bf215546Sopenharmony_ci /* Drawing with DCC enabled also compresses colorbuffers. */ 2096bf215546Sopenharmony_ci VkImageSubresourceRange range = { 2097bf215546Sopenharmony_ci .aspectMask = iview->vk.aspects, 2098bf215546Sopenharmony_ci .baseMipLevel = iview->vk.base_mip_level, 2099bf215546Sopenharmony_ci .levelCount = iview->vk.level_count, 2100bf215546Sopenharmony_ci .baseArrayLayer = iview->vk.base_array_layer, 2101bf215546Sopenharmony_ci .layerCount = iview->vk.layer_count, 2102bf215546Sopenharmony_ci }; 2103bf215546Sopenharmony_ci 2104bf215546Sopenharmony_ci radv_update_dcc_metadata(cmd_buffer, image, &range, true); 2105bf215546Sopenharmony_ci } 2106bf215546Sopenharmony_ci} 2107bf215546Sopenharmony_ci 2108bf215546Sopenharmony_cistatic void 2109bf215546Sopenharmony_ciradv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, 2110bf215546Sopenharmony_ci const struct radv_image_view *iview, VkImageLayout layout, 2111bf215546Sopenharmony_ci bool in_render_loop, bool requires_cond_exec) 2112bf215546Sopenharmony_ci{ 2113bf215546Sopenharmony_ci const struct radv_image *image = iview->image; 2114bf215546Sopenharmony_ci uint32_t db_z_info = ds->db_z_info; 2115bf215546Sopenharmony_ci uint32_t db_z_info_reg; 2116bf215546Sopenharmony_ci 2117bf215546Sopenharmony_ci if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug || 2118bf215546Sopenharmony_ci !radv_image_is_tc_compat_htile(image)) 2119bf215546Sopenharmony_ci return; 2120bf215546Sopenharmony_ci 2121bf215546Sopenharmony_ci if (!radv_layout_is_htile_compressed( 2122bf215546Sopenharmony_ci cmd_buffer->device, image, layout, in_render_loop, 2123bf215546Sopenharmony_ci radv_image_queue_family_mask(image, cmd_buffer->qf, 2124bf215546Sopenharmony_ci cmd_buffer->qf))) { 2125bf215546Sopenharmony_ci db_z_info &= C_028040_TILE_SURFACE_ENABLE; 2126bf215546Sopenharmony_ci } 2127bf215546Sopenharmony_ci 2128bf215546Sopenharmony_ci db_z_info &= C_028040_ZRANGE_PRECISION; 2129bf215546Sopenharmony_ci 2130bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) { 2131bf215546Sopenharmony_ci db_z_info_reg = R_028038_DB_Z_INFO; 2132bf215546Sopenharmony_ci } else { 2133bf215546Sopenharmony_ci db_z_info_reg = R_028040_DB_Z_INFO; 2134bf215546Sopenharmony_ci } 2135bf215546Sopenharmony_ci 2136bf215546Sopenharmony_ci /* When we don't know the last fast clear value we need to emit a 2137bf215546Sopenharmony_ci * conditional packet that will eventually skip the following 2138bf215546Sopenharmony_ci * SET_CONTEXT_REG packet. 2139bf215546Sopenharmony_ci */ 2140bf215546Sopenharmony_ci if (requires_cond_exec) { 2141bf215546Sopenharmony_ci uint64_t va = radv_get_tc_compat_zrange_va(image, iview->vk.base_mip_level); 2142bf215546Sopenharmony_ci 2143bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0)); 2144bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va); 2145bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va >> 32); 2146bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0); 2147bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */ 2148bf215546Sopenharmony_ci } 2149bf215546Sopenharmony_ci 2150bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info); 2151bf215546Sopenharmony_ci} 2152bf215546Sopenharmony_ci 2153bf215546Sopenharmony_cistatic void 2154bf215546Sopenharmony_ciradv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds, 2155bf215546Sopenharmony_ci struct radv_image_view *iview, VkImageLayout layout, bool in_render_loop) 2156bf215546Sopenharmony_ci{ 2157bf215546Sopenharmony_ci const struct radv_image *image = iview->image; 2158bf215546Sopenharmony_ci uint32_t db_z_info = ds->db_z_info; 2159bf215546Sopenharmony_ci uint32_t db_stencil_info = ds->db_stencil_info; 2160bf215546Sopenharmony_ci uint32_t db_htile_surface = ds->db_htile_surface; 2161bf215546Sopenharmony_ci 2162bf215546Sopenharmony_ci if (!radv_layout_is_htile_compressed( 2163bf215546Sopenharmony_ci cmd_buffer->device, image, layout, in_render_loop, 2164bf215546Sopenharmony_ci radv_image_queue_family_mask(image, cmd_buffer->qf, 2165bf215546Sopenharmony_ci cmd_buffer->qf))) { 2166bf215546Sopenharmony_ci db_z_info &= C_028040_TILE_SURFACE_ENABLE; 2167bf215546Sopenharmony_ci db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); 2168bf215546Sopenharmony_ci } 2169bf215546Sopenharmony_ci 2170bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3 && 2171bf215546Sopenharmony_ci !cmd_buffer->state.subpass->vrs_attachment) { 2172bf215546Sopenharmony_ci db_htile_surface &= C_028ABC_VRS_HTILE_ENCODING; 2173bf215546Sopenharmony_ci } 2174bf215546Sopenharmony_ci 2175bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view); 2176bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, db_htile_surface); 2177bf215546Sopenharmony_ci 2178bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { 2179bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); 2180bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size); 2181bf215546Sopenharmony_ci 2182bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { 2183bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 6); 2184bf215546Sopenharmony_ci } else { 2185bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7); 2186bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1)); 2187bf215546Sopenharmony_ci } 2188bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, db_z_info); 2189bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, db_stencil_info); 2190bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_z_read_base); 2191bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); 2192bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_z_read_base); 2193bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); 2194bf215546Sopenharmony_ci 2195bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5); 2196bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); 2197bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); 2198bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32); 2199bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32); 2200bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32); 2201bf215546Sopenharmony_ci } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) { 2202bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3); 2203bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_htile_data_base); 2204bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32)); 2205bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_depth_size); 2206bf215546Sopenharmony_ci 2207bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10); 2208bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, db_z_info); /* DB_Z_INFO */ 2209bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, db_stencil_info); /* DB_STENCIL_INFO */ 2210bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */ 2211bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 2212bf215546Sopenharmony_ci S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */ 2213bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* DB_STENCIL_READ_BASE */ 2214bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 2215bf215546Sopenharmony_ci S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ 2216bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* DB_Z_WRITE_BASE */ 2217bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 2218bf215546Sopenharmony_ci S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */ 2219bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* DB_STENCIL_WRITE_BASE */ 2220bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 2221bf215546Sopenharmony_ci S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ 2222bf215546Sopenharmony_ci 2223bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2); 2224bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_z_info2); 2225bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_stencil_info2); 2226bf215546Sopenharmony_ci } else { 2227bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base); 2228bf215546Sopenharmony_ci 2229bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9); 2230bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_depth_info); /* R_02803C_DB_DEPTH_INFO */ 2231bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, db_z_info); /* R_028040_DB_Z_INFO */ 2232bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, db_stencil_info); /* R_028044_DB_STENCIL_INFO */ 2233bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* R_028048_DB_Z_READ_BASE */ 2234bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base); /* R_02804C_DB_STENCIL_READ_BASE */ 2235bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_z_write_base); /* R_028050_DB_Z_WRITE_BASE */ 2236bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* R_028054_DB_STENCIL_WRITE_BASE */ 2237bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_depth_size); /* R_028058_DB_DEPTH_SIZE */ 2238bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ds->db_depth_slice); /* R_02805C_DB_DEPTH_SLICE */ 2239bf215546Sopenharmony_ci } 2240bf215546Sopenharmony_ci 2241bf215546Sopenharmony_ci /* Update the ZRANGE_PRECISION value for the TC-compat bug. */ 2242bf215546Sopenharmony_ci radv_update_zrange_precision(cmd_buffer, ds, iview, layout, in_render_loop, true); 2243bf215546Sopenharmony_ci 2244bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, 2245bf215546Sopenharmony_ci ds->pa_su_poly_offset_db_fmt_cntl); 2246bf215546Sopenharmony_ci} 2247bf215546Sopenharmony_ci 2248bf215546Sopenharmony_ci/** 2249bf215546Sopenharmony_ci * Update the fast clear depth/stencil values if the image is bound as a 2250bf215546Sopenharmony_ci * depth/stencil buffer. 2251bf215546Sopenharmony_ci */ 2252bf215546Sopenharmony_cistatic void 2253bf215546Sopenharmony_ciradv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer, 2254bf215546Sopenharmony_ci const struct radv_image_view *iview, 2255bf215546Sopenharmony_ci VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) 2256bf215546Sopenharmony_ci{ 2257bf215546Sopenharmony_ci const struct radv_subpass *subpass = cmd_buffer->state.subpass; 2258bf215546Sopenharmony_ci const struct radv_image *image = iview->image; 2259bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 2260bf215546Sopenharmony_ci uint32_t att_idx; 2261bf215546Sopenharmony_ci 2262bf215546Sopenharmony_ci if (!cmd_buffer->state.attachments || !subpass) 2263bf215546Sopenharmony_ci return; 2264bf215546Sopenharmony_ci 2265bf215546Sopenharmony_ci if (!subpass->depth_stencil_attachment) 2266bf215546Sopenharmony_ci return; 2267bf215546Sopenharmony_ci 2268bf215546Sopenharmony_ci att_idx = subpass->depth_stencil_attachment->attachment; 2269bf215546Sopenharmony_ci if (cmd_buffer->state.attachments[att_idx].iview->image != image) 2270bf215546Sopenharmony_ci return; 2271bf215546Sopenharmony_ci 2272bf215546Sopenharmony_ci if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { 2273bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2); 2274bf215546Sopenharmony_ci radeon_emit(cs, ds_clear_value.stencil); 2275bf215546Sopenharmony_ci radeon_emit(cs, fui(ds_clear_value.depth)); 2276bf215546Sopenharmony_ci } else if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { 2277bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(ds_clear_value.depth)); 2278bf215546Sopenharmony_ci } else { 2279bf215546Sopenharmony_ci assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); 2280bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028028_DB_STENCIL_CLEAR, ds_clear_value.stencil); 2281bf215546Sopenharmony_ci } 2282bf215546Sopenharmony_ci 2283bf215546Sopenharmony_ci /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is 2284bf215546Sopenharmony_ci * only needed when clearing Z to 0.0. 2285bf215546Sopenharmony_ci */ 2286bf215546Sopenharmony_ci if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && ds_clear_value.depth == 0.0) { 2287bf215546Sopenharmony_ci VkImageLayout layout = subpass->depth_stencil_attachment->layout; 2288bf215546Sopenharmony_ci bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop; 2289bf215546Sopenharmony_ci 2290bf215546Sopenharmony_ci radv_update_zrange_precision(cmd_buffer, &cmd_buffer->state.attachments[att_idx].ds, iview, 2291bf215546Sopenharmony_ci layout, in_render_loop, false); 2292bf215546Sopenharmony_ci } 2293bf215546Sopenharmony_ci 2294bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 2295bf215546Sopenharmony_ci} 2296bf215546Sopenharmony_ci 2297bf215546Sopenharmony_ci/** 2298bf215546Sopenharmony_ci * Set the clear depth/stencil values to the image's metadata. 2299bf215546Sopenharmony_ci */ 2300bf215546Sopenharmony_cistatic void 2301bf215546Sopenharmony_ciradv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2302bf215546Sopenharmony_ci const VkImageSubresourceRange *range, 2303bf215546Sopenharmony_ci VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) 2304bf215546Sopenharmony_ci{ 2305bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 2306bf215546Sopenharmony_ci uint32_t level_count = radv_get_levelCount(image, range); 2307bf215546Sopenharmony_ci 2308bf215546Sopenharmony_ci if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { 2309bf215546Sopenharmony_ci uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel); 2310bf215546Sopenharmony_ci 2311bf215546Sopenharmony_ci /* Use the fastest way when both aspects are used. */ 2312bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + 2 * level_count, cmd_buffer->state.predicating)); 2313bf215546Sopenharmony_ci radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); 2314bf215546Sopenharmony_ci radeon_emit(cs, va); 2315bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 2316bf215546Sopenharmony_ci 2317bf215546Sopenharmony_ci for (uint32_t l = 0; l < level_count; l++) { 2318bf215546Sopenharmony_ci radeon_emit(cs, ds_clear_value.stencil); 2319bf215546Sopenharmony_ci radeon_emit(cs, fui(ds_clear_value.depth)); 2320bf215546Sopenharmony_ci } 2321bf215546Sopenharmony_ci } else { 2322bf215546Sopenharmony_ci /* Otherwise we need one WRITE_DATA packet per level. */ 2323bf215546Sopenharmony_ci for (uint32_t l = 0; l < level_count; l++) { 2324bf215546Sopenharmony_ci uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel + l); 2325bf215546Sopenharmony_ci unsigned value; 2326bf215546Sopenharmony_ci 2327bf215546Sopenharmony_ci if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) { 2328bf215546Sopenharmony_ci value = fui(ds_clear_value.depth); 2329bf215546Sopenharmony_ci va += 4; 2330bf215546Sopenharmony_ci } else { 2331bf215546Sopenharmony_ci assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT); 2332bf215546Sopenharmony_ci value = ds_clear_value.stencil; 2333bf215546Sopenharmony_ci } 2334bf215546Sopenharmony_ci 2335bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating)); 2336bf215546Sopenharmony_ci radeon_emit(cs, 2337bf215546Sopenharmony_ci S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); 2338bf215546Sopenharmony_ci radeon_emit(cs, va); 2339bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 2340bf215546Sopenharmony_ci radeon_emit(cs, value); 2341bf215546Sopenharmony_ci } 2342bf215546Sopenharmony_ci } 2343bf215546Sopenharmony_ci} 2344bf215546Sopenharmony_ci 2345bf215546Sopenharmony_ci/** 2346bf215546Sopenharmony_ci * Update the TC-compat metadata value for this image. 2347bf215546Sopenharmony_ci */ 2348bf215546Sopenharmony_cistatic void 2349bf215546Sopenharmony_ciradv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2350bf215546Sopenharmony_ci const VkImageSubresourceRange *range, uint32_t value) 2351bf215546Sopenharmony_ci{ 2352bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 2353bf215546Sopenharmony_ci 2354bf215546Sopenharmony_ci if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug) 2355bf215546Sopenharmony_ci return; 2356bf215546Sopenharmony_ci 2357bf215546Sopenharmony_ci uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel); 2358bf215546Sopenharmony_ci uint32_t level_count = radv_get_levelCount(image, range); 2359bf215546Sopenharmony_ci 2360bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + level_count, cmd_buffer->state.predicating)); 2361bf215546Sopenharmony_ci radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); 2362bf215546Sopenharmony_ci radeon_emit(cs, va); 2363bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 2364bf215546Sopenharmony_ci 2365bf215546Sopenharmony_ci for (uint32_t l = 0; l < level_count; l++) 2366bf215546Sopenharmony_ci radeon_emit(cs, value); 2367bf215546Sopenharmony_ci} 2368bf215546Sopenharmony_ci 2369bf215546Sopenharmony_cistatic void 2370bf215546Sopenharmony_ciradv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, 2371bf215546Sopenharmony_ci const struct radv_image_view *iview, 2372bf215546Sopenharmony_ci VkClearDepthStencilValue ds_clear_value) 2373bf215546Sopenharmony_ci{ 2374bf215546Sopenharmony_ci VkImageSubresourceRange range = { 2375bf215546Sopenharmony_ci .aspectMask = iview->vk.aspects, 2376bf215546Sopenharmony_ci .baseMipLevel = iview->vk.base_mip_level, 2377bf215546Sopenharmony_ci .levelCount = iview->vk.level_count, 2378bf215546Sopenharmony_ci .baseArrayLayer = iview->vk.base_array_layer, 2379bf215546Sopenharmony_ci .layerCount = iview->vk.layer_count, 2380bf215546Sopenharmony_ci }; 2381bf215546Sopenharmony_ci uint32_t cond_val; 2382bf215546Sopenharmony_ci 2383bf215546Sopenharmony_ci /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last 2384bf215546Sopenharmony_ci * depth clear value is 0.0f. 2385bf215546Sopenharmony_ci */ 2386bf215546Sopenharmony_ci cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0; 2387bf215546Sopenharmony_ci 2388bf215546Sopenharmony_ci radv_set_tc_compat_zrange_metadata(cmd_buffer, iview->image, &range, cond_val); 2389bf215546Sopenharmony_ci} 2390bf215546Sopenharmony_ci 2391bf215546Sopenharmony_ci/** 2392bf215546Sopenharmony_ci * Update the clear depth/stencil values for this image. 2393bf215546Sopenharmony_ci */ 2394bf215546Sopenharmony_civoid 2395bf215546Sopenharmony_ciradv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, 2396bf215546Sopenharmony_ci const struct radv_image_view *iview, 2397bf215546Sopenharmony_ci VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects) 2398bf215546Sopenharmony_ci{ 2399bf215546Sopenharmony_ci VkImageSubresourceRange range = { 2400bf215546Sopenharmony_ci .aspectMask = iview->vk.aspects, 2401bf215546Sopenharmony_ci .baseMipLevel = iview->vk.base_mip_level, 2402bf215546Sopenharmony_ci .levelCount = iview->vk.level_count, 2403bf215546Sopenharmony_ci .baseArrayLayer = iview->vk.base_array_layer, 2404bf215546Sopenharmony_ci .layerCount = iview->vk.layer_count, 2405bf215546Sopenharmony_ci }; 2406bf215546Sopenharmony_ci struct radv_image *image = iview->image; 2407bf215546Sopenharmony_ci 2408bf215546Sopenharmony_ci assert(radv_htile_enabled(image, range.baseMipLevel)); 2409bf215546Sopenharmony_ci 2410bf215546Sopenharmony_ci radv_set_ds_clear_metadata(cmd_buffer, iview->image, &range, ds_clear_value, aspects); 2411bf215546Sopenharmony_ci 2412bf215546Sopenharmony_ci if (radv_image_is_tc_compat_htile(image) && (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) { 2413bf215546Sopenharmony_ci radv_update_tc_compat_zrange_metadata(cmd_buffer, iview, ds_clear_value); 2414bf215546Sopenharmony_ci } 2415bf215546Sopenharmony_ci 2416bf215546Sopenharmony_ci radv_update_bound_fast_clear_ds(cmd_buffer, iview, ds_clear_value, aspects); 2417bf215546Sopenharmony_ci} 2418bf215546Sopenharmony_ci 2419bf215546Sopenharmony_ci/** 2420bf215546Sopenharmony_ci * Load the clear depth/stencil values from the image's metadata. 2421bf215546Sopenharmony_ci */ 2422bf215546Sopenharmony_cistatic void 2423bf215546Sopenharmony_ciradv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview) 2424bf215546Sopenharmony_ci{ 2425bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 2426bf215546Sopenharmony_ci const struct radv_image *image = iview->image; 2427bf215546Sopenharmony_ci VkImageAspectFlags aspects = vk_format_aspects(image->vk.format); 2428bf215546Sopenharmony_ci uint64_t va = radv_get_ds_clear_value_va(image, iview->vk.base_mip_level); 2429bf215546Sopenharmony_ci unsigned reg_offset = 0, reg_count = 0; 2430bf215546Sopenharmony_ci 2431bf215546Sopenharmony_ci assert(radv_image_has_htile(image)); 2432bf215546Sopenharmony_ci 2433bf215546Sopenharmony_ci if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) { 2434bf215546Sopenharmony_ci ++reg_count; 2435bf215546Sopenharmony_ci } else { 2436bf215546Sopenharmony_ci ++reg_offset; 2437bf215546Sopenharmony_ci va += 4; 2438bf215546Sopenharmony_ci } 2439bf215546Sopenharmony_ci if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) 2440bf215546Sopenharmony_ci ++reg_count; 2441bf215546Sopenharmony_ci 2442bf215546Sopenharmony_ci uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset; 2443bf215546Sopenharmony_ci 2444bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) { 2445bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0)); 2446bf215546Sopenharmony_ci radeon_emit(cs, va); 2447bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 2448bf215546Sopenharmony_ci radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); 2449bf215546Sopenharmony_ci radeon_emit(cs, reg_count); 2450bf215546Sopenharmony_ci } else { 2451bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 2452bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | 2453bf215546Sopenharmony_ci (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0)); 2454bf215546Sopenharmony_ci radeon_emit(cs, va); 2455bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 2456bf215546Sopenharmony_ci radeon_emit(cs, reg >> 2); 2457bf215546Sopenharmony_ci radeon_emit(cs, 0); 2458bf215546Sopenharmony_ci 2459bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 2460bf215546Sopenharmony_ci radeon_emit(cs, 0); 2461bf215546Sopenharmony_ci } 2462bf215546Sopenharmony_ci} 2463bf215546Sopenharmony_ci 2464bf215546Sopenharmony_ci/* 2465bf215546Sopenharmony_ci * With DCC some colors don't require CMASK elimination before being 2466bf215546Sopenharmony_ci * used as a texture. This sets a predicate value to determine if the 2467bf215546Sopenharmony_ci * cmask eliminate is required. 2468bf215546Sopenharmony_ci */ 2469bf215546Sopenharmony_civoid 2470bf215546Sopenharmony_ciradv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2471bf215546Sopenharmony_ci const VkImageSubresourceRange *range, bool value) 2472bf215546Sopenharmony_ci{ 2473bf215546Sopenharmony_ci if (!image->fce_pred_offset) 2474bf215546Sopenharmony_ci return; 2475bf215546Sopenharmony_ci 2476bf215546Sopenharmony_ci uint64_t pred_val = value; 2477bf215546Sopenharmony_ci uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel); 2478bf215546Sopenharmony_ci uint32_t level_count = radv_get_levelCount(image, range); 2479bf215546Sopenharmony_ci uint32_t count = 2 * level_count; 2480bf215546Sopenharmony_ci 2481bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); 2482bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 2483bf215546Sopenharmony_ci S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); 2484bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va); 2485bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va >> 32); 2486bf215546Sopenharmony_ci 2487bf215546Sopenharmony_ci for (uint32_t l = 0; l < level_count; l++) { 2488bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, pred_val); 2489bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, pred_val >> 32); 2490bf215546Sopenharmony_ci } 2491bf215546Sopenharmony_ci} 2492bf215546Sopenharmony_ci 2493bf215546Sopenharmony_ci/** 2494bf215546Sopenharmony_ci * Update the DCC predicate to reflect the compression state. 2495bf215546Sopenharmony_ci */ 2496bf215546Sopenharmony_civoid 2497bf215546Sopenharmony_ciradv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2498bf215546Sopenharmony_ci const VkImageSubresourceRange *range, bool value) 2499bf215546Sopenharmony_ci{ 2500bf215546Sopenharmony_ci if (image->dcc_pred_offset == 0) 2501bf215546Sopenharmony_ci return; 2502bf215546Sopenharmony_ci 2503bf215546Sopenharmony_ci uint64_t pred_val = value; 2504bf215546Sopenharmony_ci uint64_t va = radv_image_get_dcc_pred_va(image, range->baseMipLevel); 2505bf215546Sopenharmony_ci uint32_t level_count = radv_get_levelCount(image, range); 2506bf215546Sopenharmony_ci uint32_t count = 2 * level_count; 2507bf215546Sopenharmony_ci 2508bf215546Sopenharmony_ci assert(radv_dcc_enabled(image, range->baseMipLevel)); 2509bf215546Sopenharmony_ci 2510bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0)); 2511bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 2512bf215546Sopenharmony_ci S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); 2513bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va); 2514bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va >> 32); 2515bf215546Sopenharmony_ci 2516bf215546Sopenharmony_ci for (uint32_t l = 0; l < level_count; l++) { 2517bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, pred_val); 2518bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, pred_val >> 32); 2519bf215546Sopenharmony_ci } 2520bf215546Sopenharmony_ci} 2521bf215546Sopenharmony_ci 2522bf215546Sopenharmony_ci/** 2523bf215546Sopenharmony_ci * Update the fast clear color values if the image is bound as a color buffer. 2524bf215546Sopenharmony_ci */ 2525bf215546Sopenharmony_cistatic void 2526bf215546Sopenharmony_ciradv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2527bf215546Sopenharmony_ci int cb_idx, uint32_t color_values[2]) 2528bf215546Sopenharmony_ci{ 2529bf215546Sopenharmony_ci const struct radv_subpass *subpass = cmd_buffer->state.subpass; 2530bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 2531bf215546Sopenharmony_ci uint32_t att_idx; 2532bf215546Sopenharmony_ci 2533bf215546Sopenharmony_ci if (!cmd_buffer->state.attachments || !subpass) 2534bf215546Sopenharmony_ci return; 2535bf215546Sopenharmony_ci 2536bf215546Sopenharmony_ci att_idx = subpass->color_attachments[cb_idx].attachment; 2537bf215546Sopenharmony_ci if (att_idx == VK_ATTACHMENT_UNUSED) 2538bf215546Sopenharmony_ci return; 2539bf215546Sopenharmony_ci 2540bf215546Sopenharmony_ci if (cmd_buffer->state.attachments[att_idx].iview->image != image) 2541bf215546Sopenharmony_ci return; 2542bf215546Sopenharmony_ci 2543bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2); 2544bf215546Sopenharmony_ci radeon_emit(cs, color_values[0]); 2545bf215546Sopenharmony_ci radeon_emit(cs, color_values[1]); 2546bf215546Sopenharmony_ci 2547bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 2548bf215546Sopenharmony_ci} 2549bf215546Sopenharmony_ci 2550bf215546Sopenharmony_ci/** 2551bf215546Sopenharmony_ci * Set the clear color values to the image's metadata. 2552bf215546Sopenharmony_ci */ 2553bf215546Sopenharmony_cistatic void 2554bf215546Sopenharmony_ciradv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 2555bf215546Sopenharmony_ci const VkImageSubresourceRange *range, uint32_t color_values[2]) 2556bf215546Sopenharmony_ci{ 2557bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 2558bf215546Sopenharmony_ci uint32_t level_count = radv_get_levelCount(image, range); 2559bf215546Sopenharmony_ci uint32_t count = 2 * level_count; 2560bf215546Sopenharmony_ci 2561bf215546Sopenharmony_ci assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel)); 2562bf215546Sopenharmony_ci 2563bf215546Sopenharmony_ci if (radv_image_has_clear_value(image)) { 2564bf215546Sopenharmony_ci uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel); 2565bf215546Sopenharmony_ci 2566bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating)); 2567bf215546Sopenharmony_ci radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); 2568bf215546Sopenharmony_ci radeon_emit(cs, va); 2569bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 2570bf215546Sopenharmony_ci 2571bf215546Sopenharmony_ci for (uint32_t l = 0; l < level_count; l++) { 2572bf215546Sopenharmony_ci radeon_emit(cs, color_values[0]); 2573bf215546Sopenharmony_ci radeon_emit(cs, color_values[1]); 2574bf215546Sopenharmony_ci } 2575bf215546Sopenharmony_ci } else { 2576bf215546Sopenharmony_ci /* Some default value we can set in the update. */ 2577bf215546Sopenharmony_ci assert(color_values[0] == 0 && color_values[1] == 0); 2578bf215546Sopenharmony_ci } 2579bf215546Sopenharmony_ci} 2580bf215546Sopenharmony_ci 2581bf215546Sopenharmony_ci/** 2582bf215546Sopenharmony_ci * Update the clear color values for this image. 2583bf215546Sopenharmony_ci */ 2584bf215546Sopenharmony_civoid 2585bf215546Sopenharmony_ciradv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, 2586bf215546Sopenharmony_ci const struct radv_image_view *iview, int cb_idx, 2587bf215546Sopenharmony_ci uint32_t color_values[2]) 2588bf215546Sopenharmony_ci{ 2589bf215546Sopenharmony_ci struct radv_image *image = iview->image; 2590bf215546Sopenharmony_ci VkImageSubresourceRange range = { 2591bf215546Sopenharmony_ci .aspectMask = iview->vk.aspects, 2592bf215546Sopenharmony_ci .baseMipLevel = iview->vk.base_mip_level, 2593bf215546Sopenharmony_ci .levelCount = iview->vk.level_count, 2594bf215546Sopenharmony_ci .baseArrayLayer = iview->vk.base_array_layer, 2595bf215546Sopenharmony_ci .layerCount = iview->vk.layer_count, 2596bf215546Sopenharmony_ci }; 2597bf215546Sopenharmony_ci 2598bf215546Sopenharmony_ci assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, iview->vk.base_mip_level)); 2599bf215546Sopenharmony_ci 2600bf215546Sopenharmony_ci /* Do not need to update the clear value for images that are fast cleared with the comp-to-single 2601bf215546Sopenharmony_ci * mode because the hardware gets the value from the image directly. 2602bf215546Sopenharmony_ci */ 2603bf215546Sopenharmony_ci if (iview->image->support_comp_to_single) 2604bf215546Sopenharmony_ci return; 2605bf215546Sopenharmony_ci 2606bf215546Sopenharmony_ci radv_set_color_clear_metadata(cmd_buffer, image, &range, color_values); 2607bf215546Sopenharmony_ci 2608bf215546Sopenharmony_ci radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, color_values); 2609bf215546Sopenharmony_ci} 2610bf215546Sopenharmony_ci 2611bf215546Sopenharmony_ci/** 2612bf215546Sopenharmony_ci * Load the clear color values from the image's metadata. 2613bf215546Sopenharmony_ci */ 2614bf215546Sopenharmony_cistatic void 2615bf215546Sopenharmony_ciradv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *iview, 2616bf215546Sopenharmony_ci int cb_idx) 2617bf215546Sopenharmony_ci{ 2618bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 2619bf215546Sopenharmony_ci struct radv_image *image = iview->image; 2620bf215546Sopenharmony_ci 2621bf215546Sopenharmony_ci if (!radv_image_has_cmask(image) && !radv_dcc_enabled(image, iview->vk.base_mip_level)) 2622bf215546Sopenharmony_ci return; 2623bf215546Sopenharmony_ci 2624bf215546Sopenharmony_ci if (iview->image->support_comp_to_single) 2625bf215546Sopenharmony_ci return; 2626bf215546Sopenharmony_ci 2627bf215546Sopenharmony_ci if (!radv_image_has_clear_value(image)) { 2628bf215546Sopenharmony_ci uint32_t color_values[2] = {0, 0}; 2629bf215546Sopenharmony_ci radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, color_values); 2630bf215546Sopenharmony_ci return; 2631bf215546Sopenharmony_ci } 2632bf215546Sopenharmony_ci 2633bf215546Sopenharmony_ci uint64_t va = radv_image_get_fast_clear_va(image, iview->vk.base_mip_level); 2634bf215546Sopenharmony_ci uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c; 2635bf215546Sopenharmony_ci 2636bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) { 2637bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, cmd_buffer->state.predicating)); 2638bf215546Sopenharmony_ci radeon_emit(cs, va); 2639bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 2640bf215546Sopenharmony_ci radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2); 2641bf215546Sopenharmony_ci radeon_emit(cs, 2); 2642bf215546Sopenharmony_ci } else { 2643bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); 2644bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | 2645bf215546Sopenharmony_ci COPY_DATA_COUNT_SEL); 2646bf215546Sopenharmony_ci radeon_emit(cs, va); 2647bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 2648bf215546Sopenharmony_ci radeon_emit(cs, reg >> 2); 2649bf215546Sopenharmony_ci radeon_emit(cs, 0); 2650bf215546Sopenharmony_ci 2651bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); 2652bf215546Sopenharmony_ci radeon_emit(cs, 0); 2653bf215546Sopenharmony_ci } 2654bf215546Sopenharmony_ci} 2655bf215546Sopenharmony_ci 2656bf215546Sopenharmony_ci/* GFX9+ metadata cache flushing workaround. metadata cache coherency is 2657bf215546Sopenharmony_ci * broken if the CB caches data of multiple mips of the same image at the 2658bf215546Sopenharmony_ci * same time. 2659bf215546Sopenharmony_ci * 2660bf215546Sopenharmony_ci * Insert some flushes to avoid this. 2661bf215546Sopenharmony_ci */ 2662bf215546Sopenharmony_cistatic void 2663bf215546Sopenharmony_ciradv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer) 2664bf215546Sopenharmony_ci{ 2665bf215546Sopenharmony_ci struct vk_framebuffer *framebuffer = cmd_buffer->state.framebuffer; 2666bf215546Sopenharmony_ci const struct radv_subpass *subpass = cmd_buffer->state.subpass; 2667bf215546Sopenharmony_ci bool color_mip_changed = false; 2668bf215546Sopenharmony_ci 2669bf215546Sopenharmony_ci /* Entire workaround is not applicable before GFX9 */ 2670bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9) 2671bf215546Sopenharmony_ci return; 2672bf215546Sopenharmony_ci 2673bf215546Sopenharmony_ci if (!framebuffer) 2674bf215546Sopenharmony_ci return; 2675bf215546Sopenharmony_ci 2676bf215546Sopenharmony_ci for (int i = 0; i < subpass->color_count; ++i) { 2677bf215546Sopenharmony_ci int idx = subpass->color_attachments[i].attachment; 2678bf215546Sopenharmony_ci if (idx == VK_ATTACHMENT_UNUSED) 2679bf215546Sopenharmony_ci continue; 2680bf215546Sopenharmony_ci 2681bf215546Sopenharmony_ci struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview; 2682bf215546Sopenharmony_ci 2683bf215546Sopenharmony_ci if ((radv_image_has_CB_metadata(iview->image) || 2684bf215546Sopenharmony_ci radv_dcc_enabled(iview->image, iview->vk.base_mip_level) || 2685bf215546Sopenharmony_ci radv_dcc_enabled(iview->image, cmd_buffer->state.cb_mip[i])) && 2686bf215546Sopenharmony_ci cmd_buffer->state.cb_mip[i] != iview->vk.base_mip_level) 2687bf215546Sopenharmony_ci color_mip_changed = true; 2688bf215546Sopenharmony_ci 2689bf215546Sopenharmony_ci cmd_buffer->state.cb_mip[i] = iview->vk.base_mip_level; 2690bf215546Sopenharmony_ci } 2691bf215546Sopenharmony_ci 2692bf215546Sopenharmony_ci if (color_mip_changed) { 2693bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= 2694bf215546Sopenharmony_ci RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 2695bf215546Sopenharmony_ci } 2696bf215546Sopenharmony_ci} 2697bf215546Sopenharmony_ci 2698bf215546Sopenharmony_ci/* This function does the flushes for mip changes if the levels are not zero for 2699bf215546Sopenharmony_ci * all render targets. This way we can assume at the start of the next cmd_buffer 2700bf215546Sopenharmony_ci * that rendering to mip 0 doesn't need any flushes. As that is the most common 2701bf215546Sopenharmony_ci * case that saves some flushes. */ 2702bf215546Sopenharmony_cistatic void 2703bf215546Sopenharmony_ciradv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer) 2704bf215546Sopenharmony_ci{ 2705bf215546Sopenharmony_ci /* Entire workaround is not applicable before GFX9 */ 2706bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9) 2707bf215546Sopenharmony_ci return; 2708bf215546Sopenharmony_ci 2709bf215546Sopenharmony_ci bool need_color_mip_flush = false; 2710bf215546Sopenharmony_ci for (unsigned i = 0; i < 8; ++i) { 2711bf215546Sopenharmony_ci if (cmd_buffer->state.cb_mip[i]) { 2712bf215546Sopenharmony_ci need_color_mip_flush = true; 2713bf215546Sopenharmony_ci break; 2714bf215546Sopenharmony_ci } 2715bf215546Sopenharmony_ci } 2716bf215546Sopenharmony_ci 2717bf215546Sopenharmony_ci if (need_color_mip_flush) { 2718bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= 2719bf215546Sopenharmony_ci RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 2720bf215546Sopenharmony_ci } 2721bf215546Sopenharmony_ci 2722bf215546Sopenharmony_ci memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip)); 2723bf215546Sopenharmony_ci} 2724bf215546Sopenharmony_ci 2725bf215546Sopenharmony_cistatic struct radv_image * 2726bf215546Sopenharmony_ciradv_cmd_buffer_get_vrs_image(struct radv_cmd_buffer *cmd_buffer) 2727bf215546Sopenharmony_ci{ 2728bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 2729bf215546Sopenharmony_ci 2730bf215546Sopenharmony_ci if (!device->vrs.image) { 2731bf215546Sopenharmony_ci VkResult result; 2732bf215546Sopenharmony_ci 2733bf215546Sopenharmony_ci /* The global VRS state is initialized on-demand to avoid wasting VRAM. */ 2734bf215546Sopenharmony_ci result = radv_device_init_vrs_state(device); 2735bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 2736bf215546Sopenharmony_ci cmd_buffer->record_result = result; 2737bf215546Sopenharmony_ci return NULL; 2738bf215546Sopenharmony_ci } 2739bf215546Sopenharmony_ci } 2740bf215546Sopenharmony_ci 2741bf215546Sopenharmony_ci return device->vrs.image; 2742bf215546Sopenharmony_ci} 2743bf215546Sopenharmony_ci 2744bf215546Sopenharmony_cistatic void 2745bf215546Sopenharmony_ciradv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer) 2746bf215546Sopenharmony_ci{ 2747bf215546Sopenharmony_ci int i; 2748bf215546Sopenharmony_ci struct vk_framebuffer *framebuffer = cmd_buffer->state.framebuffer; 2749bf215546Sopenharmony_ci const struct radv_subpass *subpass = cmd_buffer->state.subpass; 2750bf215546Sopenharmony_ci bool disable_constant_encode_ac01 = false; 2751bf215546Sopenharmony_ci unsigned color_invalid = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11 2752bf215546Sopenharmony_ci ? G_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID) 2753bf215546Sopenharmony_ci : G_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID); 2754bf215546Sopenharmony_ci 2755bf215546Sopenharmony_ci for (i = 0; i < subpass->color_count; ++i) { 2756bf215546Sopenharmony_ci if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) { 2757bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, color_invalid); 2758bf215546Sopenharmony_ci continue; 2759bf215546Sopenharmony_ci } 2760bf215546Sopenharmony_ci 2761bf215546Sopenharmony_ci int idx = subpass->color_attachments[i].attachment; 2762bf215546Sopenharmony_ci struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview; 2763bf215546Sopenharmony_ci VkImageLayout layout = subpass->color_attachments[i].layout; 2764bf215546Sopenharmony_ci bool in_render_loop = subpass->color_attachments[i].in_render_loop; 2765bf215546Sopenharmony_ci 2766bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->image->bindings[0].bo); 2767bf215546Sopenharmony_ci 2768bf215546Sopenharmony_ci assert(iview->vk.aspects & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT | 2769bf215546Sopenharmony_ci VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT)); 2770bf215546Sopenharmony_ci 2771bf215546Sopenharmony_ci if (iview->image->disjoint && iview->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT) { 2772bf215546Sopenharmony_ci for (uint32_t plane_id = 0; plane_id < iview->image->plane_count; plane_id++) { 2773bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 2774bf215546Sopenharmony_ci iview->image->bindings[plane_id].bo); 2775bf215546Sopenharmony_ci } 2776bf215546Sopenharmony_ci } else { 2777bf215546Sopenharmony_ci uint32_t plane_id = iview->image->disjoint ? iview->plane_id : 0; 2778bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 2779bf215546Sopenharmony_ci iview->image->bindings[plane_id].bo); 2780bf215546Sopenharmony_ci } 2781bf215546Sopenharmony_ci 2782bf215546Sopenharmony_ci radv_emit_fb_color_state(cmd_buffer, i, &cmd_buffer->state.attachments[idx].cb, iview, layout, 2783bf215546Sopenharmony_ci in_render_loop); 2784bf215546Sopenharmony_ci 2785bf215546Sopenharmony_ci radv_load_color_clear_metadata(cmd_buffer, iview, i); 2786bf215546Sopenharmony_ci 2787bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 && 2788bf215546Sopenharmony_ci iview->image->dcc_sign_reinterpret) { 2789bf215546Sopenharmony_ci /* Disable constant encoding with the clear value of "1" with different DCC signedness 2790bf215546Sopenharmony_ci * because the hardware will fill "1" instead of the clear value. 2791bf215546Sopenharmony_ci */ 2792bf215546Sopenharmony_ci disable_constant_encode_ac01 = true; 2793bf215546Sopenharmony_ci } 2794bf215546Sopenharmony_ci } 2795bf215546Sopenharmony_ci for (; i < cmd_buffer->state.last_subpass_color_count; i++) { 2796bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, color_invalid); 2797bf215546Sopenharmony_ci } 2798bf215546Sopenharmony_ci cmd_buffer->state.last_subpass_color_count = subpass->color_count; 2799bf215546Sopenharmony_ci 2800bf215546Sopenharmony_ci if (subpass->depth_stencil_attachment) { 2801bf215546Sopenharmony_ci int idx = subpass->depth_stencil_attachment->attachment; 2802bf215546Sopenharmony_ci VkImageLayout layout = subpass->depth_stencil_attachment->layout; 2803bf215546Sopenharmony_ci bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop; 2804bf215546Sopenharmony_ci struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview; 2805bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, 2806bf215546Sopenharmony_ci cmd_buffer->state.attachments[idx].iview->image->bindings[0].bo); 2807bf215546Sopenharmony_ci 2808bf215546Sopenharmony_ci radv_emit_fb_ds_state(cmd_buffer, &cmd_buffer->state.attachments[idx].ds, iview, layout, 2809bf215546Sopenharmony_ci in_render_loop); 2810bf215546Sopenharmony_ci 2811bf215546Sopenharmony_ci if (radv_layout_is_htile_compressed( 2812bf215546Sopenharmony_ci cmd_buffer->device, iview->image, layout, in_render_loop, 2813bf215546Sopenharmony_ci radv_image_queue_family_mask(iview->image, cmd_buffer->qf, 2814bf215546Sopenharmony_ci cmd_buffer->qf))) { 2815bf215546Sopenharmony_ci /* Only load the depth/stencil fast clear values when 2816bf215546Sopenharmony_ci * compressed rendering is enabled. 2817bf215546Sopenharmony_ci */ 2818bf215546Sopenharmony_ci radv_load_ds_clear_metadata(cmd_buffer, iview); 2819bf215546Sopenharmony_ci } 2820bf215546Sopenharmony_ci } else if (subpass->vrs_attachment && radv_cmd_buffer_get_vrs_image(cmd_buffer)) { 2821bf215546Sopenharmony_ci /* When a subpass uses a VRS attachment without binding a depth/stencil attachment, we have to 2822bf215546Sopenharmony_ci * bind our internal depth buffer that contains the VRS data as part of HTILE. 2823bf215546Sopenharmony_ci */ 2824bf215546Sopenharmony_ci VkImageLayout layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; 2825bf215546Sopenharmony_ci struct radv_buffer *htile_buffer = cmd_buffer->device->vrs.buffer; 2826bf215546Sopenharmony_ci struct radv_image *image = cmd_buffer->device->vrs.image; 2827bf215546Sopenharmony_ci struct radv_ds_buffer_info ds; 2828bf215546Sopenharmony_ci struct radv_image_view iview; 2829bf215546Sopenharmony_ci 2830bf215546Sopenharmony_ci radv_image_view_init(&iview, cmd_buffer->device, 2831bf215546Sopenharmony_ci &(VkImageViewCreateInfo){ 2832bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, 2833bf215546Sopenharmony_ci .image = radv_image_to_handle(image), 2834bf215546Sopenharmony_ci .viewType = radv_meta_get_view_type(image), 2835bf215546Sopenharmony_ci .format = image->vk.format, 2836bf215546Sopenharmony_ci .subresourceRange = 2837bf215546Sopenharmony_ci { 2838bf215546Sopenharmony_ci .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT, 2839bf215546Sopenharmony_ci .baseMipLevel = 0, 2840bf215546Sopenharmony_ci .levelCount = 1, 2841bf215546Sopenharmony_ci .baseArrayLayer = 0, 2842bf215546Sopenharmony_ci .layerCount = 1, 2843bf215546Sopenharmony_ci }, 2844bf215546Sopenharmony_ci }, 2845bf215546Sopenharmony_ci 0, NULL); 2846bf215546Sopenharmony_ci 2847bf215546Sopenharmony_ci radv_initialise_vrs_surface(image, htile_buffer, &ds); 2848bf215546Sopenharmony_ci 2849bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, htile_buffer->bo); 2850bf215546Sopenharmony_ci 2851bf215546Sopenharmony_ci radv_emit_fb_ds_state(cmd_buffer, &ds, &iview, layout, false); 2852bf215546Sopenharmony_ci 2853bf215546Sopenharmony_ci radv_image_view_finish(&iview); 2854bf215546Sopenharmony_ci } else { 2855bf215546Sopenharmony_ci unsigned num_samples = 0; 2856bf215546Sopenharmony_ci 2857bf215546Sopenharmony_ci /* On GFX11, DB_Z_INFO.NUM_SAMPLES should always match the framebuffer samples. It affects 2858bf215546Sopenharmony_ci * VRS and occlusion queries if depth and stencil are not bound. 2859bf215546Sopenharmony_ci */ 2860bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX11) 2861bf215546Sopenharmony_ci num_samples = util_logbase2(subpass->max_sample_count); 2862bf215546Sopenharmony_ci 2863bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) 2864bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2); 2865bf215546Sopenharmony_ci else 2866bf215546Sopenharmony_ci radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2); 2867bf215546Sopenharmony_ci 2868bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID) | /* DB_Z_INFO */ 2869bf215546Sopenharmony_ci S_028040_NUM_SAMPLES(num_samples)); 2870bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ 2871bf215546Sopenharmony_ci } 2872bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR, 2873bf215546Sopenharmony_ci S_028208_BR_X(framebuffer->width) | S_028208_BR_Y(framebuffer->height)); 2874bf215546Sopenharmony_ci 2875bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8) { 2876bf215546Sopenharmony_ci bool disable_constant_encode = 2877bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode; 2878bf215546Sopenharmony_ci enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level; 2879bf215546Sopenharmony_ci uint8_t watermark = gfx_level >= GFX10 ? 6 : 4; 2880bf215546Sopenharmony_ci 2881bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { 2882bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_FDCC_CONTROL, 2883bf215546Sopenharmony_ci S_028424_SAMPLE_MASK_TRACKER_WATERMARK(watermark)); 2884bf215546Sopenharmony_ci } else { 2885bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL, 2886bf215546Sopenharmony_ci S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(gfx_level <= GFX9) | 2887bf215546Sopenharmony_ci S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) | 2888bf215546Sopenharmony_ci S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) | 2889bf215546Sopenharmony_ci S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode)); 2890bf215546Sopenharmony_ci } 2891bf215546Sopenharmony_ci } 2892bf215546Sopenharmony_ci 2893bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER; 2894bf215546Sopenharmony_ci} 2895bf215546Sopenharmony_ci 2896bf215546Sopenharmony_cistatic void 2897bf215546Sopenharmony_ciradv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer, bool indirect) 2898bf215546Sopenharmony_ci{ 2899bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 2900bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 2901bf215546Sopenharmony_ci 2902bf215546Sopenharmony_ci /* With indirect generated commands the index buffer bind may be part of the 2903bf215546Sopenharmony_ci * indirect command buffer, in which case the app may not have bound any yet. */ 2904bf215546Sopenharmony_ci if (state->index_type < 0) 2905bf215546Sopenharmony_ci return; 2906bf215546Sopenharmony_ci 2907bf215546Sopenharmony_ci /* For the direct indexed draws we use DRAW_INDEX_2, which includes 2908bf215546Sopenharmony_ci * the index_va and max_index_count already. */ 2909bf215546Sopenharmony_ci if (!indirect) 2910bf215546Sopenharmony_ci return; 2911bf215546Sopenharmony_ci 2912bf215546Sopenharmony_ci if (state->max_index_count || 2913bf215546Sopenharmony_ci !cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) { 2914bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0)); 2915bf215546Sopenharmony_ci radeon_emit(cs, state->index_va); 2916bf215546Sopenharmony_ci radeon_emit(cs, state->index_va >> 32); 2917bf215546Sopenharmony_ci 2918bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0)); 2919bf215546Sopenharmony_ci radeon_emit(cs, state->max_index_count); 2920bf215546Sopenharmony_ci } 2921bf215546Sopenharmony_ci 2922bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER; 2923bf215546Sopenharmony_ci} 2924bf215546Sopenharmony_ci 2925bf215546Sopenharmony_civoid 2926bf215546Sopenharmony_ciradv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlusion_queries) 2927bf215546Sopenharmony_ci{ 2928bf215546Sopenharmony_ci bool has_perfect_queries = cmd_buffer->state.perfect_occlusion_queries_enabled; 2929bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 2930bf215546Sopenharmony_ci uint32_t pa_sc_mode_cntl_1 = pipeline ? pipeline->ms.pa_sc_mode_cntl_1 : 0; 2931bf215546Sopenharmony_ci uint32_t db_count_control; 2932bf215546Sopenharmony_ci 2933bf215546Sopenharmony_ci if (!enable_occlusion_queries) { 2934bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { 2935bf215546Sopenharmony_ci if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && 2936bf215546Sopenharmony_ci pipeline->disable_out_of_order_rast_for_occlusion && has_perfect_queries) { 2937bf215546Sopenharmony_ci /* Re-enable out-of-order rasterization if the 2938bf215546Sopenharmony_ci * bound pipeline supports it and if it's has 2939bf215546Sopenharmony_ci * been disabled before starting any perfect 2940bf215546Sopenharmony_ci * occlusion queries. 2941bf215546Sopenharmony_ci */ 2942bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1); 2943bf215546Sopenharmony_ci } 2944bf215546Sopenharmony_ci } 2945bf215546Sopenharmony_ci db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); 2946bf215546Sopenharmony_ci } else { 2947bf215546Sopenharmony_ci const struct radv_subpass *subpass = cmd_buffer->state.subpass; 2948bf215546Sopenharmony_ci uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0; 2949bf215546Sopenharmony_ci bool gfx10_perfect = 2950bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && has_perfect_queries; 2951bf215546Sopenharmony_ci 2952bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { 2953bf215546Sopenharmony_ci /* Always enable PERFECT_ZPASS_COUNTS due to issues with partially 2954bf215546Sopenharmony_ci * covered tiles, discards, and early depth testing. For more details, 2955bf215546Sopenharmony_ci * see https://gitlab.freedesktop.org/mesa/mesa/-/issues/3218 */ 2956bf215546Sopenharmony_ci db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | 2957bf215546Sopenharmony_ci S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) | 2958bf215546Sopenharmony_ci S_028004_SAMPLE_RATE(sample_rate) | S_028004_ZPASS_ENABLE(1) | 2959bf215546Sopenharmony_ci S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1); 2960bf215546Sopenharmony_ci 2961bf215546Sopenharmony_ci if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) && 2962bf215546Sopenharmony_ci pipeline->disable_out_of_order_rast_for_occlusion && has_perfect_queries) { 2963bf215546Sopenharmony_ci /* If the bound pipeline has enabled 2964bf215546Sopenharmony_ci * out-of-order rasterization, we should 2965bf215546Sopenharmony_ci * disable it before starting any perfect 2966bf215546Sopenharmony_ci * occlusion queries. 2967bf215546Sopenharmony_ci */ 2968bf215546Sopenharmony_ci pa_sc_mode_cntl_1 &= C_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE; 2969bf215546Sopenharmony_ci 2970bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1); 2971bf215546Sopenharmony_ci } 2972bf215546Sopenharmony_ci } else { 2973bf215546Sopenharmony_ci db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | S_028004_SAMPLE_RATE(sample_rate); 2974bf215546Sopenharmony_ci } 2975bf215546Sopenharmony_ci } 2976bf215546Sopenharmony_ci 2977bf215546Sopenharmony_ci radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control); 2978bf215546Sopenharmony_ci 2979bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 2980bf215546Sopenharmony_ci} 2981bf215546Sopenharmony_ci 2982bf215546Sopenharmony_ciunsigned 2983bf215546Sopenharmony_ciradv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs) 2984bf215546Sopenharmony_ci{ 2985bf215546Sopenharmony_ci /* instance_rate_vs_prologs is a flattened array of array of arrays of different sizes, or a 2986bf215546Sopenharmony_ci * single array sorted in ascending order using: 2987bf215546Sopenharmony_ci * - total number of attributes 2988bf215546Sopenharmony_ci * - number of instanced attributes 2989bf215546Sopenharmony_ci * - index of first instanced attribute 2990bf215546Sopenharmony_ci */ 2991bf215546Sopenharmony_ci 2992bf215546Sopenharmony_ci /* From total number of attributes to offset. */ 2993bf215546Sopenharmony_ci static const uint16_t total_to_offset[16] = {0, 1, 4, 10, 20, 35, 56, 84, 2994bf215546Sopenharmony_ci 120, 165, 220, 286, 364, 455, 560, 680}; 2995bf215546Sopenharmony_ci unsigned start_index = total_to_offset[num_attributes - 1]; 2996bf215546Sopenharmony_ci 2997bf215546Sopenharmony_ci /* From number of instanced attributes to offset. This would require a different LUT depending on 2998bf215546Sopenharmony_ci * the total number of attributes, but we can exploit a pattern to use just the LUT for 16 total 2999bf215546Sopenharmony_ci * attributes. 3000bf215546Sopenharmony_ci */ 3001bf215546Sopenharmony_ci static const uint8_t count_to_offset_total16[16] = {0, 16, 31, 45, 58, 70, 81, 91, 3002bf215546Sopenharmony_ci 100, 108, 115, 121, 126, 130, 133, 135}; 3003bf215546Sopenharmony_ci unsigned count = util_bitcount(instance_rate_inputs); 3004bf215546Sopenharmony_ci unsigned offset_from_start_index = 3005bf215546Sopenharmony_ci count_to_offset_total16[count - 1] - ((16 - num_attributes) * (count - 1)); 3006bf215546Sopenharmony_ci 3007bf215546Sopenharmony_ci unsigned first = ffs(instance_rate_inputs) - 1; 3008bf215546Sopenharmony_ci return start_index + offset_from_start_index + first; 3009bf215546Sopenharmony_ci} 3010bf215546Sopenharmony_ci 3011bf215546Sopenharmony_ciunion vs_prolog_key_header { 3012bf215546Sopenharmony_ci struct { 3013bf215546Sopenharmony_ci uint32_t key_size : 8; 3014bf215546Sopenharmony_ci uint32_t num_attributes : 6; 3015bf215546Sopenharmony_ci uint32_t as_ls : 1; 3016bf215546Sopenharmony_ci uint32_t is_ngg : 1; 3017bf215546Sopenharmony_ci uint32_t wave32 : 1; 3018bf215546Sopenharmony_ci uint32_t next_stage : 3; 3019bf215546Sopenharmony_ci uint32_t instance_rate_inputs : 1; 3020bf215546Sopenharmony_ci uint32_t alpha_adjust_lo : 1; 3021bf215546Sopenharmony_ci uint32_t alpha_adjust_hi : 1; 3022bf215546Sopenharmony_ci uint32_t misaligned_mask : 1; 3023bf215546Sopenharmony_ci uint32_t post_shuffle : 1; 3024bf215546Sopenharmony_ci uint32_t nontrivial_divisors : 1; 3025bf215546Sopenharmony_ci uint32_t zero_divisors : 1; 3026bf215546Sopenharmony_ci /* We need this to ensure the padding is zero. It's useful even if it's unused. */ 3027bf215546Sopenharmony_ci uint32_t padding0 : 5; 3028bf215546Sopenharmony_ci }; 3029bf215546Sopenharmony_ci uint32_t v; 3030bf215546Sopenharmony_ci}; 3031bf215546Sopenharmony_ci 3032bf215546Sopenharmony_ciuint32_t 3033bf215546Sopenharmony_ciradv_hash_vs_prolog(const void *key_) 3034bf215546Sopenharmony_ci{ 3035bf215546Sopenharmony_ci const uint32_t *key = key_; 3036bf215546Sopenharmony_ci union vs_prolog_key_header header; 3037bf215546Sopenharmony_ci header.v = key[0]; 3038bf215546Sopenharmony_ci return _mesa_hash_data(key, header.key_size); 3039bf215546Sopenharmony_ci} 3040bf215546Sopenharmony_ci 3041bf215546Sopenharmony_cibool 3042bf215546Sopenharmony_ciradv_cmp_vs_prolog(const void *a_, const void *b_) 3043bf215546Sopenharmony_ci{ 3044bf215546Sopenharmony_ci const uint32_t *a = a_; 3045bf215546Sopenharmony_ci const uint32_t *b = b_; 3046bf215546Sopenharmony_ci if (a[0] != b[0]) 3047bf215546Sopenharmony_ci return false; 3048bf215546Sopenharmony_ci 3049bf215546Sopenharmony_ci union vs_prolog_key_header header; 3050bf215546Sopenharmony_ci header.v = a[0]; 3051bf215546Sopenharmony_ci return memcmp(a, b, header.key_size) == 0; 3052bf215546Sopenharmony_ci} 3053bf215546Sopenharmony_ci 3054bf215546Sopenharmony_cistatic struct radv_shader_part * 3055bf215546Sopenharmony_cilookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shader, 3056bf215546Sopenharmony_ci uint32_t *nontrivial_divisors) 3057bf215546Sopenharmony_ci{ 3058bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(union vs_prolog_key_header) == 4); 3059bf215546Sopenharmony_ci assert(vs_shader->info.vs.dynamic_inputs); 3060bf215546Sopenharmony_ci 3061bf215546Sopenharmony_ci const struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input; 3062bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 3063bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 3064bf215546Sopenharmony_ci 3065bf215546Sopenharmony_ci unsigned num_attributes = pipeline->last_vertex_attrib_bit; 3066bf215546Sopenharmony_ci uint32_t attribute_mask = BITFIELD_MASK(num_attributes); 3067bf215546Sopenharmony_ci 3068bf215546Sopenharmony_ci uint32_t instance_rate_inputs = state->instance_rate_inputs & attribute_mask; 3069bf215546Sopenharmony_ci uint32_t zero_divisors = state->zero_divisors & attribute_mask; 3070bf215546Sopenharmony_ci *nontrivial_divisors = state->nontrivial_divisors & attribute_mask; 3071bf215546Sopenharmony_ci uint32_t misaligned_mask = cmd_buffer->state.vbo_misaligned_mask; 3072bf215546Sopenharmony_ci if (cmd_buffer->state.vbo_misaligned_mask_invalid) { 3073bf215546Sopenharmony_ci assert(device->physical_device->rad_info.gfx_level == GFX6 || 3074bf215546Sopenharmony_ci device->physical_device->rad_info.gfx_level >= GFX10); 3075bf215546Sopenharmony_ci 3076bf215546Sopenharmony_ci u_foreach_bit (index, cmd_buffer->state.vbo_misaligned_mask_invalid & attribute_mask) { 3077bf215546Sopenharmony_ci uint8_t binding = state->bindings[index]; 3078bf215546Sopenharmony_ci if (!(cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(binding))) 3079bf215546Sopenharmony_ci continue; 3080bf215546Sopenharmony_ci uint8_t req = state->format_align_req_minus_1[index]; 3081bf215546Sopenharmony_ci struct radv_vertex_binding *vb = &cmd_buffer->vertex_bindings[binding]; 3082bf215546Sopenharmony_ci VkDeviceSize offset = vb->offset + state->offsets[index]; 3083bf215546Sopenharmony_ci if ((offset & req) || (vb->stride & req)) 3084bf215546Sopenharmony_ci misaligned_mask |= BITFIELD_BIT(index); 3085bf215546Sopenharmony_ci } 3086bf215546Sopenharmony_ci cmd_buffer->state.vbo_misaligned_mask = misaligned_mask; 3087bf215546Sopenharmony_ci cmd_buffer->state.vbo_misaligned_mask_invalid &= ~attribute_mask; 3088bf215546Sopenharmony_ci } 3089bf215546Sopenharmony_ci 3090bf215546Sopenharmony_ci /* try to use a pre-compiled prolog first */ 3091bf215546Sopenharmony_ci struct radv_shader_part *prolog = NULL; 3092bf215546Sopenharmony_ci if (pipeline->can_use_simple_input && 3093bf215546Sopenharmony_ci (!vs_shader->info.vs.as_ls || !instance_rate_inputs) && 3094bf215546Sopenharmony_ci !misaligned_mask && !state->alpha_adjust_lo && !state->alpha_adjust_hi) { 3095bf215546Sopenharmony_ci if (!instance_rate_inputs) { 3096bf215546Sopenharmony_ci prolog = device->simple_vs_prologs[num_attributes - 1]; 3097bf215546Sopenharmony_ci } else if (num_attributes <= 16 && !*nontrivial_divisors && !zero_divisors && 3098bf215546Sopenharmony_ci util_bitcount(instance_rate_inputs) == 3099bf215546Sopenharmony_ci (util_last_bit(instance_rate_inputs) - ffs(instance_rate_inputs) + 1)) { 3100bf215546Sopenharmony_ci unsigned index = radv_instance_rate_prolog_index(num_attributes, instance_rate_inputs); 3101bf215546Sopenharmony_ci prolog = device->instance_rate_vs_prologs[index]; 3102bf215546Sopenharmony_ci } 3103bf215546Sopenharmony_ci } 3104bf215546Sopenharmony_ci if (prolog) 3105bf215546Sopenharmony_ci return prolog; 3106bf215546Sopenharmony_ci 3107bf215546Sopenharmony_ci /* if we couldn't use a pre-compiled prolog, find one in the cache or create one */ 3108bf215546Sopenharmony_ci uint32_t key_words[17]; 3109bf215546Sopenharmony_ci unsigned key_size = 1; 3110bf215546Sopenharmony_ci 3111bf215546Sopenharmony_ci struct radv_vs_prolog_key key; 3112bf215546Sopenharmony_ci key.state = state; 3113bf215546Sopenharmony_ci key.num_attributes = num_attributes; 3114bf215546Sopenharmony_ci key.misaligned_mask = misaligned_mask; 3115bf215546Sopenharmony_ci /* The instance ID input VGPR is placed differently when as_ls=true. */ 3116bf215546Sopenharmony_ci key.as_ls = vs_shader->info.vs.as_ls && instance_rate_inputs; 3117bf215546Sopenharmony_ci key.is_ngg = vs_shader->info.is_ngg; 3118bf215546Sopenharmony_ci key.wave32 = vs_shader->info.wave_size == 32; 3119bf215546Sopenharmony_ci key.next_stage = pipeline->next_vertex_stage; 3120bf215546Sopenharmony_ci 3121bf215546Sopenharmony_ci union vs_prolog_key_header header; 3122bf215546Sopenharmony_ci header.v = 0; 3123bf215546Sopenharmony_ci header.num_attributes = num_attributes; 3124bf215546Sopenharmony_ci header.as_ls = key.as_ls; 3125bf215546Sopenharmony_ci header.is_ngg = key.is_ngg; 3126bf215546Sopenharmony_ci header.wave32 = key.wave32; 3127bf215546Sopenharmony_ci header.next_stage = key.next_stage; 3128bf215546Sopenharmony_ci 3129bf215546Sopenharmony_ci if (instance_rate_inputs & ~*nontrivial_divisors) { 3130bf215546Sopenharmony_ci header.instance_rate_inputs = true; 3131bf215546Sopenharmony_ci key_words[key_size++] = instance_rate_inputs; 3132bf215546Sopenharmony_ci } 3133bf215546Sopenharmony_ci if (*nontrivial_divisors) { 3134bf215546Sopenharmony_ci header.nontrivial_divisors = true; 3135bf215546Sopenharmony_ci key_words[key_size++] = *nontrivial_divisors; 3136bf215546Sopenharmony_ci } 3137bf215546Sopenharmony_ci if (zero_divisors) { 3138bf215546Sopenharmony_ci header.zero_divisors = true; 3139bf215546Sopenharmony_ci key_words[key_size++] = zero_divisors; 3140bf215546Sopenharmony_ci } 3141bf215546Sopenharmony_ci if (misaligned_mask) { 3142bf215546Sopenharmony_ci header.misaligned_mask = true; 3143bf215546Sopenharmony_ci key_words[key_size++] = misaligned_mask; 3144bf215546Sopenharmony_ci 3145bf215546Sopenharmony_ci uint8_t *formats = (uint8_t *)&key_words[key_size]; 3146bf215546Sopenharmony_ci unsigned num_formats = 0; 3147bf215546Sopenharmony_ci u_foreach_bit(index, misaligned_mask) formats[num_formats++] = state->formats[index]; 3148bf215546Sopenharmony_ci while (num_formats & 0x3) 3149bf215546Sopenharmony_ci formats[num_formats++] = 0; 3150bf215546Sopenharmony_ci key_size += num_formats / 4u; 3151bf215546Sopenharmony_ci 3152bf215546Sopenharmony_ci if (state->post_shuffle & attribute_mask) { 3153bf215546Sopenharmony_ci header.post_shuffle = true; 3154bf215546Sopenharmony_ci key_words[key_size++] = state->post_shuffle & attribute_mask; 3155bf215546Sopenharmony_ci } 3156bf215546Sopenharmony_ci } 3157bf215546Sopenharmony_ci if (state->alpha_adjust_lo & attribute_mask) { 3158bf215546Sopenharmony_ci header.alpha_adjust_lo = true; 3159bf215546Sopenharmony_ci key_words[key_size++] = state->alpha_adjust_lo & attribute_mask; 3160bf215546Sopenharmony_ci } 3161bf215546Sopenharmony_ci if (state->alpha_adjust_hi & attribute_mask) { 3162bf215546Sopenharmony_ci header.alpha_adjust_hi = true; 3163bf215546Sopenharmony_ci key_words[key_size++] = state->alpha_adjust_hi & attribute_mask; 3164bf215546Sopenharmony_ci } 3165bf215546Sopenharmony_ci 3166bf215546Sopenharmony_ci header.key_size = key_size * sizeof(key_words[0]); 3167bf215546Sopenharmony_ci key_words[0] = header.v; 3168bf215546Sopenharmony_ci 3169bf215546Sopenharmony_ci uint32_t hash = radv_hash_vs_prolog(key_words); 3170bf215546Sopenharmony_ci 3171bf215546Sopenharmony_ci if (cmd_buffer->state.emitted_vs_prolog && 3172bf215546Sopenharmony_ci cmd_buffer->state.emitted_vs_prolog_key_hash == hash && 3173bf215546Sopenharmony_ci radv_cmp_vs_prolog(key_words, cmd_buffer->state.emitted_vs_prolog_key)) 3174bf215546Sopenharmony_ci return cmd_buffer->state.emitted_vs_prolog; 3175bf215546Sopenharmony_ci 3176bf215546Sopenharmony_ci u_rwlock_rdlock(&device->vs_prologs_lock); 3177bf215546Sopenharmony_ci struct hash_entry *prolog_entry = 3178bf215546Sopenharmony_ci _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, key_words); 3179bf215546Sopenharmony_ci u_rwlock_rdunlock(&device->vs_prologs_lock); 3180bf215546Sopenharmony_ci 3181bf215546Sopenharmony_ci if (!prolog_entry) { 3182bf215546Sopenharmony_ci u_rwlock_wrlock(&device->vs_prologs_lock); 3183bf215546Sopenharmony_ci prolog_entry = _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, key_words); 3184bf215546Sopenharmony_ci if (prolog_entry) { 3185bf215546Sopenharmony_ci u_rwlock_wrunlock(&device->vs_prologs_lock); 3186bf215546Sopenharmony_ci return prolog_entry->data; 3187bf215546Sopenharmony_ci } 3188bf215546Sopenharmony_ci 3189bf215546Sopenharmony_ci prolog = radv_create_vs_prolog(device, &key); 3190bf215546Sopenharmony_ci uint32_t *key2 = malloc(key_size * 4); 3191bf215546Sopenharmony_ci if (!prolog || !key2) { 3192bf215546Sopenharmony_ci radv_shader_part_destroy(device, prolog); 3193bf215546Sopenharmony_ci free(key2); 3194bf215546Sopenharmony_ci u_rwlock_wrunlock(&device->vs_prologs_lock); 3195bf215546Sopenharmony_ci return NULL; 3196bf215546Sopenharmony_ci } 3197bf215546Sopenharmony_ci memcpy(key2, key_words, key_size * 4); 3198bf215546Sopenharmony_ci _mesa_hash_table_insert_pre_hashed(device->vs_prologs, hash, key2, prolog); 3199bf215546Sopenharmony_ci 3200bf215546Sopenharmony_ci u_rwlock_wrunlock(&device->vs_prologs_lock); 3201bf215546Sopenharmony_ci return prolog; 3202bf215546Sopenharmony_ci } 3203bf215546Sopenharmony_ci 3204bf215546Sopenharmony_ci return prolog_entry->data; 3205bf215546Sopenharmony_ci} 3206bf215546Sopenharmony_ci 3207bf215546Sopenharmony_cistatic void 3208bf215546Sopenharmony_ciemit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shader, 3209bf215546Sopenharmony_ci struct radv_shader_part *prolog, bool pipeline_is_dirty) 3210bf215546Sopenharmony_ci{ 3211bf215546Sopenharmony_ci /* no need to re-emit anything in this case */ 3212bf215546Sopenharmony_ci if (cmd_buffer->state.emitted_vs_prolog == prolog && !pipeline_is_dirty) 3213bf215546Sopenharmony_ci return; 3214bf215546Sopenharmony_ci 3215bf215546Sopenharmony_ci enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level; 3216bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 3217bf215546Sopenharmony_ci uint64_t prolog_va = radv_buffer_get_va(prolog->bo) + prolog->alloc->offset; 3218bf215546Sopenharmony_ci 3219bf215546Sopenharmony_ci assert(cmd_buffer->state.emitted_graphics_pipeline == cmd_buffer->state.graphics_pipeline); 3220bf215546Sopenharmony_ci 3221bf215546Sopenharmony_ci uint32_t rsrc1 = vs_shader->config.rsrc1; 3222bf215546Sopenharmony_ci if (chip < GFX10 && G_00B228_SGPRS(prolog->rsrc1) > G_00B228_SGPRS(vs_shader->config.rsrc1)) 3223bf215546Sopenharmony_ci rsrc1 = (rsrc1 & C_00B228_SGPRS) | (prolog->rsrc1 & ~C_00B228_SGPRS); 3224bf215546Sopenharmony_ci 3225bf215546Sopenharmony_ci /* The main shader must not use less VGPRs than the prolog, otherwise shared vgprs might not 3226bf215546Sopenharmony_ci * work. 3227bf215546Sopenharmony_ci */ 3228bf215546Sopenharmony_ci assert(G_00B848_VGPRS(vs_shader->config.rsrc1) >= G_00B848_VGPRS(prolog->rsrc1)); 3229bf215546Sopenharmony_ci 3230bf215546Sopenharmony_ci unsigned pgm_lo_reg = R_00B120_SPI_SHADER_PGM_LO_VS; 3231bf215546Sopenharmony_ci unsigned rsrc1_reg = R_00B128_SPI_SHADER_PGM_RSRC1_VS; 3232bf215546Sopenharmony_ci if (vs_shader->info.is_ngg || pipeline->base.shaders[MESA_SHADER_GEOMETRY] == vs_shader) { 3233bf215546Sopenharmony_ci pgm_lo_reg = chip >= GFX10 ? R_00B320_SPI_SHADER_PGM_LO_ES : R_00B210_SPI_SHADER_PGM_LO_ES; 3234bf215546Sopenharmony_ci rsrc1_reg = R_00B228_SPI_SHADER_PGM_RSRC1_GS; 3235bf215546Sopenharmony_ci } else if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL] == vs_shader) { 3236bf215546Sopenharmony_ci pgm_lo_reg = chip >= GFX10 ? R_00B520_SPI_SHADER_PGM_LO_LS : R_00B410_SPI_SHADER_PGM_LO_LS; 3237bf215546Sopenharmony_ci rsrc1_reg = R_00B428_SPI_SHADER_PGM_RSRC1_HS; 3238bf215546Sopenharmony_ci } else if (vs_shader->info.vs.as_ls) { 3239bf215546Sopenharmony_ci pgm_lo_reg = R_00B520_SPI_SHADER_PGM_LO_LS; 3240bf215546Sopenharmony_ci rsrc1_reg = R_00B528_SPI_SHADER_PGM_RSRC1_LS; 3241bf215546Sopenharmony_ci } else if (vs_shader->info.vs.as_es) { 3242bf215546Sopenharmony_ci pgm_lo_reg = R_00B320_SPI_SHADER_PGM_LO_ES; 3243bf215546Sopenharmony_ci rsrc1_reg = R_00B328_SPI_SHADER_PGM_RSRC1_ES; 3244bf215546Sopenharmony_ci } 3245bf215546Sopenharmony_ci 3246bf215546Sopenharmony_ci radeon_set_sh_reg(cmd_buffer->cs, pgm_lo_reg, prolog_va >> 8); 3247bf215546Sopenharmony_ci 3248bf215546Sopenharmony_ci if (chip < GFX10) 3249bf215546Sopenharmony_ci radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg, rsrc1); 3250bf215546Sopenharmony_ci else 3251bf215546Sopenharmony_ci assert(rsrc1 == vs_shader->config.rsrc1); 3252bf215546Sopenharmony_ci 3253bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, prolog->bo); 3254bf215546Sopenharmony_ci} 3255bf215546Sopenharmony_ci 3256bf215546Sopenharmony_cistatic void 3257bf215546Sopenharmony_ciemit_prolog_inputs(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shader, 3258bf215546Sopenharmony_ci uint32_t nontrivial_divisors, bool pipeline_is_dirty) 3259bf215546Sopenharmony_ci{ 3260bf215546Sopenharmony_ci /* no need to re-emit anything in this case */ 3261bf215546Sopenharmony_ci if (!nontrivial_divisors && !pipeline_is_dirty && cmd_buffer->state.emitted_vs_prolog && 3262bf215546Sopenharmony_ci !cmd_buffer->state.emitted_vs_prolog->nontrivial_divisors) 3263bf215546Sopenharmony_ci return; 3264bf215546Sopenharmony_ci 3265bf215546Sopenharmony_ci const struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input; 3266bf215546Sopenharmony_ci uint64_t input_va = radv_shader_get_va(vs_shader); 3267bf215546Sopenharmony_ci 3268bf215546Sopenharmony_ci if (nontrivial_divisors) { 3269bf215546Sopenharmony_ci unsigned inputs_offset; 3270bf215546Sopenharmony_ci uint32_t *inputs; 3271bf215546Sopenharmony_ci unsigned size = 8 + util_bitcount(nontrivial_divisors) * 8; 3272bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &inputs_offset, (void **)&inputs)) 3273bf215546Sopenharmony_ci return; 3274bf215546Sopenharmony_ci 3275bf215546Sopenharmony_ci *(inputs++) = input_va; 3276bf215546Sopenharmony_ci *(inputs++) = input_va >> 32; 3277bf215546Sopenharmony_ci 3278bf215546Sopenharmony_ci u_foreach_bit(index, nontrivial_divisors) 3279bf215546Sopenharmony_ci { 3280bf215546Sopenharmony_ci uint32_t div = state->divisors[index]; 3281bf215546Sopenharmony_ci if (div == 0) { 3282bf215546Sopenharmony_ci *(inputs++) = 0; 3283bf215546Sopenharmony_ci *(inputs++) = 1; 3284bf215546Sopenharmony_ci } else if (util_is_power_of_two_or_zero(div)) { 3285bf215546Sopenharmony_ci *(inputs++) = util_logbase2(div) | (1 << 8); 3286bf215546Sopenharmony_ci *(inputs++) = 0xffffffffu; 3287bf215546Sopenharmony_ci } else { 3288bf215546Sopenharmony_ci struct util_fast_udiv_info info = util_compute_fast_udiv_info(div, 32, 32); 3289bf215546Sopenharmony_ci *(inputs++) = info.pre_shift | (info.increment << 8) | (info.post_shift << 16); 3290bf215546Sopenharmony_ci *(inputs++) = info.multiplier; 3291bf215546Sopenharmony_ci } 3292bf215546Sopenharmony_ci } 3293bf215546Sopenharmony_ci 3294bf215546Sopenharmony_ci input_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + inputs_offset; 3295bf215546Sopenharmony_ci } 3296bf215546Sopenharmony_ci 3297bf215546Sopenharmony_ci struct radv_userdata_info *loc = 3298bf215546Sopenharmony_ci &vs_shader->info.user_sgprs_locs.shader_data[AC_UD_VS_PROLOG_INPUTS]; 3299bf215546Sopenharmony_ci uint32_t base_reg = cmd_buffer->state.graphics_pipeline->base.user_data_0[MESA_SHADER_VERTEX]; 3300bf215546Sopenharmony_ci assert(loc->sgpr_idx != -1); 3301bf215546Sopenharmony_ci assert(loc->num_sgprs == 2); 3302bf215546Sopenharmony_ci radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 3303bf215546Sopenharmony_ci input_va, true); 3304bf215546Sopenharmony_ci} 3305bf215546Sopenharmony_ci 3306bf215546Sopenharmony_cistatic void 3307bf215546Sopenharmony_ciradv_emit_vertex_input(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) 3308bf215546Sopenharmony_ci{ 3309bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 3310bf215546Sopenharmony_ci struct radv_shader *vs_shader = radv_get_shader(&pipeline->base, MESA_SHADER_VERTEX); 3311bf215546Sopenharmony_ci 3312bf215546Sopenharmony_ci assert(!cmd_buffer->state.mesh_shading); 3313bf215546Sopenharmony_ci 3314bf215546Sopenharmony_ci if (!vs_shader->info.vs.has_prolog) 3315bf215546Sopenharmony_ci return; 3316bf215546Sopenharmony_ci 3317bf215546Sopenharmony_ci uint32_t nontrivial_divisors; 3318bf215546Sopenharmony_ci struct radv_shader_part *prolog = 3319bf215546Sopenharmony_ci lookup_vs_prolog(cmd_buffer, vs_shader, &nontrivial_divisors); 3320bf215546Sopenharmony_ci if (!prolog) { 3321bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 3322bf215546Sopenharmony_ci return; 3323bf215546Sopenharmony_ci } 3324bf215546Sopenharmony_ci emit_prolog_regs(cmd_buffer, vs_shader, prolog, pipeline_is_dirty); 3325bf215546Sopenharmony_ci emit_prolog_inputs(cmd_buffer, vs_shader, nontrivial_divisors, pipeline_is_dirty); 3326bf215546Sopenharmony_ci 3327bf215546Sopenharmony_ci cmd_buffer->state.emitted_vs_prolog = prolog; 3328bf215546Sopenharmony_ci 3329bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->trace_bo)) 3330bf215546Sopenharmony_ci radv_save_vs_prolog(cmd_buffer, prolog); 3331bf215546Sopenharmony_ci} 3332bf215546Sopenharmony_ci 3333bf215546Sopenharmony_cistatic void 3334bf215546Sopenharmony_ciradv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) 3335bf215546Sopenharmony_ci{ 3336bf215546Sopenharmony_ci uint64_t states = 3337bf215546Sopenharmony_ci cmd_buffer->state.dirty & cmd_buffer->state.emitted_graphics_pipeline->needed_dynamic_state; 3338bf215546Sopenharmony_ci 3339bf215546Sopenharmony_ci if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT)) 3340bf215546Sopenharmony_ci radv_emit_viewport(cmd_buffer); 3341bf215546Sopenharmony_ci 3342bf215546Sopenharmony_ci if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) && 3343bf215546Sopenharmony_ci !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug) 3344bf215546Sopenharmony_ci radv_emit_scissor(cmd_buffer); 3345bf215546Sopenharmony_ci 3346bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) 3347bf215546Sopenharmony_ci radv_emit_line_width(cmd_buffer); 3348bf215546Sopenharmony_ci 3349bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) 3350bf215546Sopenharmony_ci radv_emit_blend_constants(cmd_buffer); 3351bf215546Sopenharmony_ci 3352bf215546Sopenharmony_ci if (states & 3353bf215546Sopenharmony_ci (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK | 3354bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK)) 3355bf215546Sopenharmony_ci radv_emit_stencil(cmd_buffer); 3356bf215546Sopenharmony_ci 3357bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) 3358bf215546Sopenharmony_ci radv_emit_depth_bounds(cmd_buffer); 3359bf215546Sopenharmony_ci 3360bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) 3361bf215546Sopenharmony_ci radv_emit_depth_bias(cmd_buffer); 3362bf215546Sopenharmony_ci 3363bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE) 3364bf215546Sopenharmony_ci radv_emit_discard_rectangle(cmd_buffer); 3365bf215546Sopenharmony_ci 3366bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) 3367bf215546Sopenharmony_ci radv_emit_sample_locations(cmd_buffer); 3368bf215546Sopenharmony_ci 3369bf215546Sopenharmony_ci if (states & (RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE)) 3370bf215546Sopenharmony_ci radv_emit_line_stipple(cmd_buffer); 3371bf215546Sopenharmony_ci 3372bf215546Sopenharmony_ci if (states & (RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE | 3373bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE)) 3374bf215546Sopenharmony_ci radv_emit_culling(cmd_buffer, states); 3375bf215546Sopenharmony_ci 3376bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) 3377bf215546Sopenharmony_ci radv_emit_primitive_topology(cmd_buffer); 3378bf215546Sopenharmony_ci 3379bf215546Sopenharmony_ci if (states & 3380bf215546Sopenharmony_ci (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE | 3381bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE | 3382bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)) 3383bf215546Sopenharmony_ci radv_emit_depth_control(cmd_buffer, states); 3384bf215546Sopenharmony_ci 3385bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP) 3386bf215546Sopenharmony_ci radv_emit_stencil_control(cmd_buffer); 3387bf215546Sopenharmony_ci 3388bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE) 3389bf215546Sopenharmony_ci radv_emit_fragment_shading_rate(cmd_buffer); 3390bf215546Sopenharmony_ci 3391bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) 3392bf215546Sopenharmony_ci radv_emit_primitive_restart_enable(cmd_buffer); 3393bf215546Sopenharmony_ci 3394bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) 3395bf215546Sopenharmony_ci radv_emit_rasterizer_discard_enable(cmd_buffer); 3396bf215546Sopenharmony_ci 3397bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP) 3398bf215546Sopenharmony_ci radv_emit_logic_op(cmd_buffer); 3399bf215546Sopenharmony_ci 3400bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE) 3401bf215546Sopenharmony_ci radv_emit_color_write_enable(cmd_buffer); 3402bf215546Sopenharmony_ci 3403bf215546Sopenharmony_ci if (states & RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT) 3404bf215546Sopenharmony_ci radv_emit_vertex_input(cmd_buffer, pipeline_is_dirty); 3405bf215546Sopenharmony_ci 3406bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~states; 3407bf215546Sopenharmony_ci} 3408bf215546Sopenharmony_ci 3409bf215546Sopenharmony_cistatic void 3410bf215546Sopenharmony_ciradv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) 3411bf215546Sopenharmony_ci{ 3412bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 3413bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, bind_point); 3414bf215546Sopenharmony_ci struct radv_descriptor_set *set = (struct radv_descriptor_set *)&descriptors_state->push_set.set; 3415bf215546Sopenharmony_ci unsigned bo_offset; 3416bf215546Sopenharmony_ci 3417bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, set->header.mapped_ptr, 3418bf215546Sopenharmony_ci &bo_offset)) 3419bf215546Sopenharmony_ci return; 3420bf215546Sopenharmony_ci 3421bf215546Sopenharmony_ci set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 3422bf215546Sopenharmony_ci set->header.va += bo_offset; 3423bf215546Sopenharmony_ci} 3424bf215546Sopenharmony_ci 3425bf215546Sopenharmony_cistatic void 3426bf215546Sopenharmony_ciradv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer, 3427bf215546Sopenharmony_ci struct radv_pipeline *pipeline, VkPipelineBindPoint bind_point) 3428bf215546Sopenharmony_ci{ 3429bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 3430bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, bind_point); 3431bf215546Sopenharmony_ci uint32_t size = MAX_SETS * 4; 3432bf215546Sopenharmony_ci uint32_t offset; 3433bf215546Sopenharmony_ci void *ptr; 3434bf215546Sopenharmony_ci 3435bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr)) 3436bf215546Sopenharmony_ci return; 3437bf215546Sopenharmony_ci 3438bf215546Sopenharmony_ci for (unsigned i = 0; i < MAX_SETS; i++) { 3439bf215546Sopenharmony_ci uint32_t *uptr = ((uint32_t *)ptr) + i; 3440bf215546Sopenharmony_ci uint64_t set_va = 0; 3441bf215546Sopenharmony_ci struct radv_descriptor_set *set = descriptors_state->sets[i]; 3442bf215546Sopenharmony_ci if (descriptors_state->valid & (1u << i)) 3443bf215546Sopenharmony_ci set_va = set->header.va; 3444bf215546Sopenharmony_ci uptr[0] = set_va & 0xffffffff; 3445bf215546Sopenharmony_ci } 3446bf215546Sopenharmony_ci 3447bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 3448bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 3449bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 3450bf215546Sopenharmony_ci va += offset; 3451bf215546Sopenharmony_ci 3452bf215546Sopenharmony_ci if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) { 3453bf215546Sopenharmony_ci struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline); 3454bf215546Sopenharmony_ci 3455bf215546Sopenharmony_ci if (pipeline->shaders[MESA_SHADER_VERTEX]) 3456bf215546Sopenharmony_ci radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_VERTEX, 3457bf215546Sopenharmony_ci AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 3458bf215546Sopenharmony_ci 3459bf215546Sopenharmony_ci if (pipeline->shaders[MESA_SHADER_FRAGMENT]) 3460bf215546Sopenharmony_ci radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_FRAGMENT, 3461bf215546Sopenharmony_ci AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 3462bf215546Sopenharmony_ci 3463bf215546Sopenharmony_ci if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_MESH)) 3464bf215546Sopenharmony_ci radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_MESH, 3465bf215546Sopenharmony_ci AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 3466bf215546Sopenharmony_ci 3467bf215546Sopenharmony_ci if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TASK)) 3468bf215546Sopenharmony_ci radv_emit_userdata_address(device, cmd_buffer->ace_internal.cs, pipeline, MESA_SHADER_TASK, 3469bf215546Sopenharmony_ci AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 3470bf215546Sopenharmony_ci 3471bf215546Sopenharmony_ci if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_GEOMETRY)) 3472bf215546Sopenharmony_ci radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_GEOMETRY, 3473bf215546Sopenharmony_ci AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 3474bf215546Sopenharmony_ci 3475bf215546Sopenharmony_ci if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TESS_CTRL)) 3476bf215546Sopenharmony_ci radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_TESS_CTRL, 3477bf215546Sopenharmony_ci AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 3478bf215546Sopenharmony_ci 3479bf215546Sopenharmony_ci if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TESS_CTRL)) 3480bf215546Sopenharmony_ci radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_TESS_EVAL, 3481bf215546Sopenharmony_ci AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 3482bf215546Sopenharmony_ci } else { 3483bf215546Sopenharmony_ci radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_COMPUTE, 3484bf215546Sopenharmony_ci AC_UD_INDIRECT_DESCRIPTOR_SETS, va); 3485bf215546Sopenharmony_ci } 3486bf215546Sopenharmony_ci} 3487bf215546Sopenharmony_ci 3488bf215546Sopenharmony_cistatic void 3489bf215546Sopenharmony_ciradv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, 3490bf215546Sopenharmony_ci struct radv_pipeline *pipeline, VkPipelineBindPoint bind_point) 3491bf215546Sopenharmony_ci{ 3492bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 3493bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, bind_point); 3494bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 3495bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 3496bf215546Sopenharmony_ci bool flush_indirect_descriptors; 3497bf215546Sopenharmony_ci 3498bf215546Sopenharmony_ci if (!descriptors_state->dirty) 3499bf215546Sopenharmony_ci return; 3500bf215546Sopenharmony_ci 3501bf215546Sopenharmony_ci if (descriptors_state->push_dirty) 3502bf215546Sopenharmony_ci radv_flush_push_descriptors(cmd_buffer, bind_point); 3503bf215546Sopenharmony_ci 3504bf215546Sopenharmony_ci flush_indirect_descriptors = pipeline->need_indirect_descriptor_sets; 3505bf215546Sopenharmony_ci 3506bf215546Sopenharmony_ci if (flush_indirect_descriptors) 3507bf215546Sopenharmony_ci radv_flush_indirect_descriptor_sets(cmd_buffer, pipeline, bind_point); 3508bf215546Sopenharmony_ci 3509bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = 3510bf215546Sopenharmony_ci radeon_check_space(device->ws, cs, MAX_SETS * MESA_VULKAN_SHADER_STAGES * 4); 3511bf215546Sopenharmony_ci 3512bf215546Sopenharmony_ci if (stages & VK_SHADER_STAGE_COMPUTE_BIT) { 3513bf215546Sopenharmony_ci radv_emit_descriptor_pointers(device, cs, pipeline, descriptors_state, MESA_SHADER_COMPUTE); 3514bf215546Sopenharmony_ci } else { 3515bf215546Sopenharmony_ci radv_foreach_stage(stage, stages & ~VK_SHADER_STAGE_TASK_BIT_NV) 3516bf215546Sopenharmony_ci { 3517bf215546Sopenharmony_ci if (!cmd_buffer->state.graphics_pipeline->base.shaders[stage]) 3518bf215546Sopenharmony_ci continue; 3519bf215546Sopenharmony_ci 3520bf215546Sopenharmony_ci radv_emit_descriptor_pointers(device, cs, pipeline, descriptors_state, stage); 3521bf215546Sopenharmony_ci } 3522bf215546Sopenharmony_ci 3523bf215546Sopenharmony_ci if (stages & VK_SHADER_STAGE_TASK_BIT_NV) { 3524bf215546Sopenharmony_ci radv_emit_descriptor_pointers(device, cmd_buffer->ace_internal.cs, pipeline, 3525bf215546Sopenharmony_ci descriptors_state, MESA_SHADER_TASK); 3526bf215546Sopenharmony_ci } 3527bf215546Sopenharmony_ci } 3528bf215546Sopenharmony_ci 3529bf215546Sopenharmony_ci descriptors_state->dirty = 0; 3530bf215546Sopenharmony_ci descriptors_state->push_dirty = false; 3531bf215546Sopenharmony_ci 3532bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 3533bf215546Sopenharmony_ci 3534bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->trace_bo)) 3535bf215546Sopenharmony_ci radv_save_descriptors(cmd_buffer, bind_point); 3536bf215546Sopenharmony_ci} 3537bf215546Sopenharmony_ci 3538bf215546Sopenharmony_cistatic bool 3539bf215546Sopenharmony_ciradv_shader_loads_push_constants(struct radv_pipeline *pipeline, gl_shader_stage stage) 3540bf215546Sopenharmony_ci{ 3541bf215546Sopenharmony_ci struct radv_userdata_info *loc = 3542bf215546Sopenharmony_ci radv_lookup_user_sgpr(pipeline, stage, AC_UD_PUSH_CONSTANTS); 3543bf215546Sopenharmony_ci return loc->sgpr_idx != -1; 3544bf215546Sopenharmony_ci} 3545bf215546Sopenharmony_ci 3546bf215546Sopenharmony_cistatic void 3547bf215546Sopenharmony_ciradv_emit_all_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs, 3548bf215546Sopenharmony_ci struct radv_pipeline *pipeline, gl_shader_stage stage, 3549bf215546Sopenharmony_ci uint32_t *values, bool *need_push_constants) 3550bf215546Sopenharmony_ci{ 3551bf215546Sopenharmony_ci const struct radv_shader *shader = radv_get_shader(pipeline, stage); 3552bf215546Sopenharmony_ci if (!shader) 3553bf215546Sopenharmony_ci return; 3554bf215546Sopenharmony_ci 3555bf215546Sopenharmony_ci *need_push_constants |= radv_shader_loads_push_constants(pipeline, stage); 3556bf215546Sopenharmony_ci 3557bf215546Sopenharmony_ci const uint64_t mask = shader->info.inline_push_constant_mask; 3558bf215546Sopenharmony_ci if (!mask) 3559bf215546Sopenharmony_ci return; 3560bf215546Sopenharmony_ci 3561bf215546Sopenharmony_ci const uint8_t base = ffs(mask) - 1; 3562bf215546Sopenharmony_ci if (mask == u_bit_consecutive64(base, util_last_bit64(mask) - base)) { 3563bf215546Sopenharmony_ci /* consecutive inline push constants */ 3564bf215546Sopenharmony_ci radv_emit_inline_push_consts(device, cs, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS, 3565bf215546Sopenharmony_ci values + base); 3566bf215546Sopenharmony_ci } else { 3567bf215546Sopenharmony_ci /* sparse inline push constants */ 3568bf215546Sopenharmony_ci uint32_t consts[AC_MAX_INLINE_PUSH_CONSTS]; 3569bf215546Sopenharmony_ci unsigned num_consts = 0; 3570bf215546Sopenharmony_ci u_foreach_bit64 (idx, mask) 3571bf215546Sopenharmony_ci consts[num_consts++] = values[idx]; 3572bf215546Sopenharmony_ci radv_emit_inline_push_consts(device, cs, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS, 3573bf215546Sopenharmony_ci consts); 3574bf215546Sopenharmony_ci } 3575bf215546Sopenharmony_ci} 3576bf215546Sopenharmony_ci 3577bf215546Sopenharmony_cistatic void 3578bf215546Sopenharmony_ciradv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages, 3579bf215546Sopenharmony_ci struct radv_pipeline *pipeline, VkPipelineBindPoint bind_point) 3580bf215546Sopenharmony_ci{ 3581bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 3582bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 3583bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 3584bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, bind_point); 3585bf215546Sopenharmony_ci struct radv_shader *shader, *prev_shader; 3586bf215546Sopenharmony_ci bool need_push_constants = false; 3587bf215546Sopenharmony_ci unsigned offset; 3588bf215546Sopenharmony_ci void *ptr; 3589bf215546Sopenharmony_ci uint64_t va; 3590bf215546Sopenharmony_ci uint32_t internal_stages; 3591bf215546Sopenharmony_ci uint32_t dirty_stages = 0; 3592bf215546Sopenharmony_ci 3593bf215546Sopenharmony_ci stages &= cmd_buffer->push_constant_stages; 3594bf215546Sopenharmony_ci if (!stages || (!pipeline->push_constant_size && !pipeline->dynamic_offset_count)) 3595bf215546Sopenharmony_ci return; 3596bf215546Sopenharmony_ci 3597bf215546Sopenharmony_ci internal_stages = stages; 3598bf215546Sopenharmony_ci switch (bind_point) { 3599bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_GRAPHICS: 3600bf215546Sopenharmony_ci break; 3601bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_COMPUTE: 3602bf215546Sopenharmony_ci dirty_stages = RADV_RT_STAGE_BITS; 3603bf215546Sopenharmony_ci break; 3604bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: 3605bf215546Sopenharmony_ci internal_stages = VK_SHADER_STAGE_COMPUTE_BIT; 3606bf215546Sopenharmony_ci dirty_stages = VK_SHADER_STAGE_COMPUTE_BIT; 3607bf215546Sopenharmony_ci break; 3608bf215546Sopenharmony_ci default: 3609bf215546Sopenharmony_ci unreachable("Unhandled bind point"); 3610bf215546Sopenharmony_ci } 3611bf215546Sopenharmony_ci 3612bf215546Sopenharmony_ci radv_foreach_stage(stage, internal_stages & ~VK_SHADER_STAGE_TASK_BIT_NV) 3613bf215546Sopenharmony_ci { 3614bf215546Sopenharmony_ci radv_emit_all_inline_push_consts( 3615bf215546Sopenharmony_ci device, cs, pipeline, stage, (uint32_t *)cmd_buffer->push_constants, &need_push_constants); 3616bf215546Sopenharmony_ci } 3617bf215546Sopenharmony_ci 3618bf215546Sopenharmony_ci if (internal_stages & VK_SHADER_STAGE_TASK_BIT_NV) { 3619bf215546Sopenharmony_ci radv_emit_all_inline_push_consts(device, cmd_buffer->ace_internal.cs, pipeline, 3620bf215546Sopenharmony_ci MESA_SHADER_TASK, (uint32_t *)cmd_buffer->push_constants, 3621bf215546Sopenharmony_ci &need_push_constants); 3622bf215546Sopenharmony_ci } 3623bf215546Sopenharmony_ci 3624bf215546Sopenharmony_ci if (need_push_constants) { 3625bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_alloc( 3626bf215546Sopenharmony_ci cmd_buffer, pipeline->push_constant_size + 16 * pipeline->dynamic_offset_count, &offset, 3627bf215546Sopenharmony_ci &ptr)) 3628bf215546Sopenharmony_ci return; 3629bf215546Sopenharmony_ci 3630bf215546Sopenharmony_ci memcpy(ptr, cmd_buffer->push_constants, pipeline->push_constant_size); 3631bf215546Sopenharmony_ci memcpy((char *)ptr + pipeline->push_constant_size, descriptors_state->dynamic_buffers, 3632bf215546Sopenharmony_ci 16 * pipeline->dynamic_offset_count); 3633bf215546Sopenharmony_ci 3634bf215546Sopenharmony_ci va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 3635bf215546Sopenharmony_ci va += offset; 3636bf215546Sopenharmony_ci 3637bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = 3638bf215546Sopenharmony_ci radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MESA_VULKAN_SHADER_STAGES * 4); 3639bf215546Sopenharmony_ci 3640bf215546Sopenharmony_ci prev_shader = NULL; 3641bf215546Sopenharmony_ci radv_foreach_stage(stage, internal_stages & ~VK_SHADER_STAGE_TASK_BIT_NV) 3642bf215546Sopenharmony_ci { 3643bf215546Sopenharmony_ci shader = radv_get_shader(pipeline, stage); 3644bf215546Sopenharmony_ci 3645bf215546Sopenharmony_ci /* Avoid redundantly emitting the address for merged stages. */ 3646bf215546Sopenharmony_ci if (shader && shader != prev_shader) { 3647bf215546Sopenharmony_ci radv_emit_userdata_address(device, cs, pipeline, stage, AC_UD_PUSH_CONSTANTS, va); 3648bf215546Sopenharmony_ci 3649bf215546Sopenharmony_ci prev_shader = shader; 3650bf215546Sopenharmony_ci } 3651bf215546Sopenharmony_ci } 3652bf215546Sopenharmony_ci 3653bf215546Sopenharmony_ci if (internal_stages & VK_SHADER_STAGE_TASK_BIT_NV) { 3654bf215546Sopenharmony_ci radv_emit_userdata_address(device, cmd_buffer->ace_internal.cs, pipeline, MESA_SHADER_TASK, 3655bf215546Sopenharmony_ci AC_UD_PUSH_CONSTANTS, va); 3656bf215546Sopenharmony_ci } 3657bf215546Sopenharmony_ci 3658bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 3659bf215546Sopenharmony_ci } 3660bf215546Sopenharmony_ci 3661bf215546Sopenharmony_ci cmd_buffer->push_constant_stages &= ~stages; 3662bf215546Sopenharmony_ci cmd_buffer->push_constant_stages |= dirty_stages; 3663bf215546Sopenharmony_ci} 3664bf215546Sopenharmony_ci 3665bf215546Sopenharmony_cienum radv_dst_sel { 3666bf215546Sopenharmony_ci DST_SEL_0001 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_0) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_0) | 3667bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_0) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_1), 3668bf215546Sopenharmony_ci DST_SEL_X001 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_0) | 3669bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_0) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_1), 3670bf215546Sopenharmony_ci DST_SEL_XY01 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3671bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_0) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_1), 3672bf215546Sopenharmony_ci DST_SEL_XYZ1 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3673bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_1), 3674bf215546Sopenharmony_ci DST_SEL_XYZW = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3675bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W), 3676bf215546Sopenharmony_ci DST_SEL_ZYXW = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3677bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W), 3678bf215546Sopenharmony_ci}; 3679bf215546Sopenharmony_ci 3680bf215546Sopenharmony_cistatic const uint32_t data_format_dst_sel[] = { 3681bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_INVALID] = DST_SEL_0001, 3682bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_8] = DST_SEL_X001, 3683bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_16] = DST_SEL_X001, 3684bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_8_8] = DST_SEL_XY01, 3685bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_32] = DST_SEL_X001, 3686bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_16_16] = DST_SEL_XY01, 3687bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_10_11_11] = DST_SEL_XYZ1, 3688bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_11_11_10] = DST_SEL_XYZ1, 3689bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_10_10_10_2] = DST_SEL_XYZW, 3690bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_2_10_10_10] = DST_SEL_XYZW, 3691bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_8_8_8_8] = DST_SEL_XYZW, 3692bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_32_32] = DST_SEL_XY01, 3693bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_16_16_16_16] = DST_SEL_XYZW, 3694bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_32_32_32] = DST_SEL_XYZ1, 3695bf215546Sopenharmony_ci [V_008F0C_BUF_DATA_FORMAT_32_32_32_32] = DST_SEL_XYZW, 3696bf215546Sopenharmony_ci}; 3697bf215546Sopenharmony_ci 3698bf215546Sopenharmony_civoid 3699bf215546Sopenharmony_ciradv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer, 3700bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, 3701bf215546Sopenharmony_ci bool full_null_descriptors, void *vb_ptr) 3702bf215546Sopenharmony_ci{ 3703bf215546Sopenharmony_ci struct radv_shader *vs_shader = radv_get_shader(&pipeline->base, MESA_SHADER_VERTEX); 3704bf215546Sopenharmony_ci enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level; 3705bf215546Sopenharmony_ci unsigned desc_index = 0; 3706bf215546Sopenharmony_ci uint32_t mask = pipeline->vb_desc_usage_mask; 3707bf215546Sopenharmony_ci uint64_t va; 3708bf215546Sopenharmony_ci const struct radv_vs_input_state *vs_state = 3709bf215546Sopenharmony_ci vs_shader->info.vs.dynamic_inputs ? &cmd_buffer->state.dynamic_vs_input : NULL; 3710bf215546Sopenharmony_ci assert(!vs_state || pipeline->use_per_attribute_vb_descs); 3711bf215546Sopenharmony_ci 3712bf215546Sopenharmony_ci while (mask) { 3713bf215546Sopenharmony_ci unsigned i = u_bit_scan(&mask); 3714bf215546Sopenharmony_ci uint32_t *desc = &((uint32_t *)vb_ptr)[desc_index++ * 4]; 3715bf215546Sopenharmony_ci uint32_t offset, rsrc_word3; 3716bf215546Sopenharmony_ci unsigned binding = 3717bf215546Sopenharmony_ci vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i] 3718bf215546Sopenharmony_ci : (pipeline->use_per_attribute_vb_descs ? pipeline->attrib_bindings[i] : i); 3719bf215546Sopenharmony_ci struct radv_buffer *buffer = cmd_buffer->vertex_binding_buffers[binding]; 3720bf215546Sopenharmony_ci unsigned num_records; 3721bf215546Sopenharmony_ci unsigned stride; 3722bf215546Sopenharmony_ci 3723bf215546Sopenharmony_ci if (vs_state) { 3724bf215546Sopenharmony_ci unsigned format = vs_state->formats[i]; 3725bf215546Sopenharmony_ci unsigned dfmt = format & 0xf; 3726bf215546Sopenharmony_ci unsigned nfmt = (format >> 4) & 0x7; 3727bf215546Sopenharmony_ci 3728bf215546Sopenharmony_ci rsrc_word3 = vs_state->post_shuffle & (1u << i) ? DST_SEL_ZYXW : data_format_dst_sel[dfmt]; 3729bf215546Sopenharmony_ci 3730bf215546Sopenharmony_ci if (chip >= GFX10) 3731bf215546Sopenharmony_ci rsrc_word3 |= S_008F0C_FORMAT(ac_get_tbuffer_format(chip, dfmt, nfmt)); 3732bf215546Sopenharmony_ci else 3733bf215546Sopenharmony_ci rsrc_word3 |= S_008F0C_NUM_FORMAT(nfmt) | S_008F0C_DATA_FORMAT(dfmt); 3734bf215546Sopenharmony_ci } else { 3735bf215546Sopenharmony_ci if (chip >= GFX10) 3736bf215546Sopenharmony_ci rsrc_word3 = DST_SEL_XYZW | S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT); 3737bf215546Sopenharmony_ci else 3738bf215546Sopenharmony_ci rsrc_word3 = DST_SEL_XYZW | S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) | 3739bf215546Sopenharmony_ci S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 3740bf215546Sopenharmony_ci } 3741bf215546Sopenharmony_ci 3742bf215546Sopenharmony_ci if (pipeline->uses_dynamic_stride) { 3743bf215546Sopenharmony_ci stride = cmd_buffer->vertex_bindings[binding].stride; 3744bf215546Sopenharmony_ci } else { 3745bf215546Sopenharmony_ci stride = pipeline->binding_stride[binding]; 3746bf215546Sopenharmony_ci } 3747bf215546Sopenharmony_ci 3748bf215546Sopenharmony_ci if (!buffer) { 3749bf215546Sopenharmony_ci if (full_null_descriptors) { 3750bf215546Sopenharmony_ci /* Put all the info in for the DGC generation shader in case the VBO gets overridden. */ 3751bf215546Sopenharmony_ci desc[0] = 0; 3752bf215546Sopenharmony_ci desc[1] = S_008F04_STRIDE(stride); 3753bf215546Sopenharmony_ci desc[2] = 0; 3754bf215546Sopenharmony_ci desc[3] = rsrc_word3; 3755bf215546Sopenharmony_ci } else if (vs_state) { 3756bf215546Sopenharmony_ci /* Stride needs to be non-zero on GFX9, or else bounds checking is disabled. We need 3757bf215546Sopenharmony_ci * to include the format/word3 so that the alpha channel is 1 for formats without an 3758bf215546Sopenharmony_ci * alpha channel. 3759bf215546Sopenharmony_ci */ 3760bf215546Sopenharmony_ci desc[0] = 0; 3761bf215546Sopenharmony_ci desc[1] = S_008F04_STRIDE(16); 3762bf215546Sopenharmony_ci desc[2] = 0; 3763bf215546Sopenharmony_ci desc[3] = rsrc_word3; 3764bf215546Sopenharmony_ci } else { 3765bf215546Sopenharmony_ci memset(desc, 0, 4 * 4); 3766bf215546Sopenharmony_ci } 3767bf215546Sopenharmony_ci 3768bf215546Sopenharmony_ci continue; 3769bf215546Sopenharmony_ci } 3770bf215546Sopenharmony_ci 3771bf215546Sopenharmony_ci va = radv_buffer_get_va(buffer->bo); 3772bf215546Sopenharmony_ci 3773bf215546Sopenharmony_ci offset = cmd_buffer->vertex_bindings[binding].offset; 3774bf215546Sopenharmony_ci va += offset + buffer->offset; 3775bf215546Sopenharmony_ci if (vs_state) 3776bf215546Sopenharmony_ci va += vs_state->offsets[i]; 3777bf215546Sopenharmony_ci 3778bf215546Sopenharmony_ci if (cmd_buffer->vertex_bindings[binding].size) { 3779bf215546Sopenharmony_ci num_records = cmd_buffer->vertex_bindings[binding].size; 3780bf215546Sopenharmony_ci } else { 3781bf215546Sopenharmony_ci num_records = vk_buffer_range(&buffer->vk, offset, VK_WHOLE_SIZE); 3782bf215546Sopenharmony_ci } 3783bf215546Sopenharmony_ci 3784bf215546Sopenharmony_ci if (pipeline->use_per_attribute_vb_descs) { 3785bf215546Sopenharmony_ci uint32_t attrib_end = 3786bf215546Sopenharmony_ci vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : pipeline->attrib_ends[i]; 3787bf215546Sopenharmony_ci 3788bf215546Sopenharmony_ci if (num_records < attrib_end) { 3789bf215546Sopenharmony_ci num_records = 0; /* not enough space for one vertex */ 3790bf215546Sopenharmony_ci } else if (stride == 0) { 3791bf215546Sopenharmony_ci num_records = 1; /* only one vertex */ 3792bf215546Sopenharmony_ci } else { 3793bf215546Sopenharmony_ci num_records = (num_records - attrib_end) / stride + 1; 3794bf215546Sopenharmony_ci /* If attrib_offset>stride, then the compiler will increase the vertex index by 3795bf215546Sopenharmony_ci * attrib_offset/stride and decrease the offset by attrib_offset%stride. This is 3796bf215546Sopenharmony_ci * only allowed with static strides. 3797bf215546Sopenharmony_ci */ 3798bf215546Sopenharmony_ci num_records += pipeline->attrib_index_offset[i]; 3799bf215546Sopenharmony_ci } 3800bf215546Sopenharmony_ci 3801bf215546Sopenharmony_ci /* GFX10 uses OOB_SELECT_RAW if stride==0, so convert num_records from elements into 3802bf215546Sopenharmony_ci * into bytes in that case. GFX8 always uses bytes. 3803bf215546Sopenharmony_ci */ 3804bf215546Sopenharmony_ci if (num_records && (chip == GFX8 || (chip != GFX9 && !stride))) { 3805bf215546Sopenharmony_ci num_records = (num_records - 1) * stride + attrib_end; 3806bf215546Sopenharmony_ci } else if (!num_records) { 3807bf215546Sopenharmony_ci /* On GFX9, it seems bounds checking is disabled if both 3808bf215546Sopenharmony_ci * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and 3809bf215546Sopenharmony_ci * GFX10.3 but it doesn't hurt. 3810bf215546Sopenharmony_ci */ 3811bf215546Sopenharmony_ci if (full_null_descriptors) { 3812bf215546Sopenharmony_ci /* Put all the info in for the DGC generation shader in case the VBO gets overridden. 3813bf215546Sopenharmony_ci */ 3814bf215546Sopenharmony_ci desc[0] = 0; 3815bf215546Sopenharmony_ci desc[1] = S_008F04_STRIDE(stride); 3816bf215546Sopenharmony_ci desc[2] = 0; 3817bf215546Sopenharmony_ci desc[3] = rsrc_word3; 3818bf215546Sopenharmony_ci } else if (vs_state) { 3819bf215546Sopenharmony_ci desc[0] = 0; 3820bf215546Sopenharmony_ci desc[1] = S_008F04_STRIDE(16); 3821bf215546Sopenharmony_ci desc[2] = 0; 3822bf215546Sopenharmony_ci desc[3] = rsrc_word3; 3823bf215546Sopenharmony_ci } else { 3824bf215546Sopenharmony_ci memset(desc, 0, 16); 3825bf215546Sopenharmony_ci } 3826bf215546Sopenharmony_ci 3827bf215546Sopenharmony_ci continue; 3828bf215546Sopenharmony_ci } 3829bf215546Sopenharmony_ci } else { 3830bf215546Sopenharmony_ci if (chip != GFX8 && stride) 3831bf215546Sopenharmony_ci num_records = DIV_ROUND_UP(num_records, stride); 3832bf215546Sopenharmony_ci } 3833bf215546Sopenharmony_ci 3834bf215546Sopenharmony_ci if (chip >= GFX10) { 3835bf215546Sopenharmony_ci /* OOB_SELECT chooses the out-of-bounds check: 3836bf215546Sopenharmony_ci * - 1: index >= NUM_RECORDS (Structured) 3837bf215546Sopenharmony_ci * - 3: offset >= NUM_RECORDS (Raw) 3838bf215546Sopenharmony_ci */ 3839bf215546Sopenharmony_ci int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW; 3840bf215546Sopenharmony_ci rsrc_word3 |= S_008F0C_OOB_SELECT(oob_select) | S_008F0C_RESOURCE_LEVEL(chip < GFX11); 3841bf215546Sopenharmony_ci } 3842bf215546Sopenharmony_ci 3843bf215546Sopenharmony_ci desc[0] = va; 3844bf215546Sopenharmony_ci desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride); 3845bf215546Sopenharmony_ci desc[2] = num_records; 3846bf215546Sopenharmony_ci desc[3] = rsrc_word3; 3847bf215546Sopenharmony_ci } 3848bf215546Sopenharmony_ci} 3849bf215546Sopenharmony_ci 3850bf215546Sopenharmony_cistatic void 3851bf215546Sopenharmony_ciradv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) 3852bf215546Sopenharmony_ci{ 3853bf215546Sopenharmony_ci if ((pipeline_is_dirty || (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) && 3854bf215546Sopenharmony_ci cmd_buffer->state.graphics_pipeline->vb_desc_usage_mask) { 3855bf215546Sopenharmony_ci /* Mesh shaders don't have vertex descriptors. */ 3856bf215546Sopenharmony_ci assert(!cmd_buffer->state.mesh_shading); 3857bf215546Sopenharmony_ci 3858bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 3859bf215546Sopenharmony_ci unsigned vb_offset; 3860bf215546Sopenharmony_ci void *vb_ptr; 3861bf215546Sopenharmony_ci uint64_t va; 3862bf215546Sopenharmony_ci 3863bf215546Sopenharmony_ci /* allocate some descriptor state for vertex buffers */ 3864bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_alloc(cmd_buffer, pipeline->vb_desc_alloc_size, &vb_offset, 3865bf215546Sopenharmony_ci &vb_ptr)) 3866bf215546Sopenharmony_ci return; 3867bf215546Sopenharmony_ci 3868bf215546Sopenharmony_ci radv_write_vertex_descriptors(cmd_buffer, pipeline, false, vb_ptr); 3869bf215546Sopenharmony_ci 3870bf215546Sopenharmony_ci va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 3871bf215546Sopenharmony_ci va += vb_offset; 3872bf215546Sopenharmony_ci 3873bf215546Sopenharmony_ci radv_emit_userdata_address(cmd_buffer->device, cmd_buffer->cs, &pipeline->base, 3874bf215546Sopenharmony_ci MESA_SHADER_VERTEX, AC_UD_VS_VERTEX_BUFFERS, va); 3875bf215546Sopenharmony_ci 3876bf215546Sopenharmony_ci cmd_buffer->state.vb_va = va; 3877bf215546Sopenharmony_ci cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_VBO_DESCRIPTORS; 3878bf215546Sopenharmony_ci 3879bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->trace_bo)) 3880bf215546Sopenharmony_ci radv_save_vertex_descriptors(cmd_buffer, (uintptr_t)vb_ptr); 3881bf215546Sopenharmony_ci } 3882bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER; 3883bf215546Sopenharmony_ci} 3884bf215546Sopenharmony_ci 3885bf215546Sopenharmony_cistatic void 3886bf215546Sopenharmony_ciradv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va) 3887bf215546Sopenharmony_ci{ 3888bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 3889bf215546Sopenharmony_ci struct radv_userdata_info *loc; 3890bf215546Sopenharmony_ci uint32_t base_reg; 3891bf215546Sopenharmony_ci 3892bf215546Sopenharmony_ci for (unsigned stage = 0; stage < MESA_VULKAN_SHADER_STAGES; ++stage) { 3893bf215546Sopenharmony_ci if (!radv_get_shader(&pipeline->base, stage)) 3894bf215546Sopenharmony_ci continue; 3895bf215546Sopenharmony_ci 3896bf215546Sopenharmony_ci loc = radv_lookup_user_sgpr(&pipeline->base, stage, AC_UD_STREAMOUT_BUFFERS); 3897bf215546Sopenharmony_ci if (loc->sgpr_idx == -1) 3898bf215546Sopenharmony_ci continue; 3899bf215546Sopenharmony_ci 3900bf215546Sopenharmony_ci base_reg = pipeline->base.user_data_0[stage]; 3901bf215546Sopenharmony_ci 3902bf215546Sopenharmony_ci radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va, 3903bf215546Sopenharmony_ci false); 3904bf215546Sopenharmony_ci } 3905bf215546Sopenharmony_ci 3906bf215546Sopenharmony_ci if (radv_pipeline_has_gs_copy_shader(&pipeline->base)) { 3907bf215546Sopenharmony_ci loc = &pipeline->base.gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS]; 3908bf215546Sopenharmony_ci if (loc->sgpr_idx != -1) { 3909bf215546Sopenharmony_ci base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; 3910bf215546Sopenharmony_ci 3911bf215546Sopenharmony_ci radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, 3912bf215546Sopenharmony_ci va, false); 3913bf215546Sopenharmony_ci } 3914bf215546Sopenharmony_ci } 3915bf215546Sopenharmony_ci} 3916bf215546Sopenharmony_ci 3917bf215546Sopenharmony_cistatic void 3918bf215546Sopenharmony_ciradv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer) 3919bf215546Sopenharmony_ci{ 3920bf215546Sopenharmony_ci if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) { 3921bf215546Sopenharmony_ci struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; 3922bf215546Sopenharmony_ci struct radv_streamout_state *so = &cmd_buffer->state.streamout; 3923bf215546Sopenharmony_ci unsigned so_offset; 3924bf215546Sopenharmony_ci void *so_ptr; 3925bf215546Sopenharmony_ci uint64_t va; 3926bf215546Sopenharmony_ci 3927bf215546Sopenharmony_ci /* Allocate some descriptor state for streamout buffers. */ 3928bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_alloc(cmd_buffer, MAX_SO_BUFFERS * 16, &so_offset, &so_ptr)) 3929bf215546Sopenharmony_ci return; 3930bf215546Sopenharmony_ci 3931bf215546Sopenharmony_ci for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) { 3932bf215546Sopenharmony_ci struct radv_buffer *buffer = sb[i].buffer; 3933bf215546Sopenharmony_ci uint32_t *desc = &((uint32_t *)so_ptr)[i * 4]; 3934bf215546Sopenharmony_ci 3935bf215546Sopenharmony_ci if (!(so->enabled_mask & (1 << i))) 3936bf215546Sopenharmony_ci continue; 3937bf215546Sopenharmony_ci 3938bf215546Sopenharmony_ci va = radv_buffer_get_va(buffer->bo) + buffer->offset; 3939bf215546Sopenharmony_ci 3940bf215546Sopenharmony_ci va += sb[i].offset; 3941bf215546Sopenharmony_ci 3942bf215546Sopenharmony_ci /* Set the descriptor. 3943bf215546Sopenharmony_ci * 3944bf215546Sopenharmony_ci * On GFX8, the format must be non-INVALID, otherwise 3945bf215546Sopenharmony_ci * the buffer will be considered not bound and store 3946bf215546Sopenharmony_ci * instructions will be no-ops. 3947bf215546Sopenharmony_ci */ 3948bf215546Sopenharmony_ci uint32_t size = 0xffffffff; 3949bf215546Sopenharmony_ci 3950bf215546Sopenharmony_ci /* Compute the correct buffer size for NGG streamout 3951bf215546Sopenharmony_ci * because it's used to determine the max emit per 3952bf215546Sopenharmony_ci * buffer. 3953bf215546Sopenharmony_ci */ 3954bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->use_ngg_streamout) 3955bf215546Sopenharmony_ci size = buffer->vk.size - sb[i].offset; 3956bf215546Sopenharmony_ci 3957bf215546Sopenharmony_ci uint32_t rsrc_word3 = 3958bf215546Sopenharmony_ci S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 3959bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 3960bf215546Sopenharmony_ci 3961bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { 3962bf215546Sopenharmony_ci rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 3963bf215546Sopenharmony_ci S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); 3964bf215546Sopenharmony_ci } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { 3965bf215546Sopenharmony_ci rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 3966bf215546Sopenharmony_ci S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); 3967bf215546Sopenharmony_ci } else { 3968bf215546Sopenharmony_ci rsrc_word3 |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 3969bf215546Sopenharmony_ci } 3970bf215546Sopenharmony_ci 3971bf215546Sopenharmony_ci desc[0] = va; 3972bf215546Sopenharmony_ci desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); 3973bf215546Sopenharmony_ci desc[2] = size; 3974bf215546Sopenharmony_ci desc[3] = rsrc_word3; 3975bf215546Sopenharmony_ci } 3976bf215546Sopenharmony_ci 3977bf215546Sopenharmony_ci va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 3978bf215546Sopenharmony_ci va += so_offset; 3979bf215546Sopenharmony_ci 3980bf215546Sopenharmony_ci radv_emit_streamout_buffers(cmd_buffer, va); 3981bf215546Sopenharmony_ci } 3982bf215546Sopenharmony_ci 3983bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER; 3984bf215546Sopenharmony_ci} 3985bf215546Sopenharmony_ci 3986bf215546Sopenharmony_cistatic void 3987bf215546Sopenharmony_ciradv_flush_ngg_query_state(struct radv_cmd_buffer *cmd_buffer) 3988bf215546Sopenharmony_ci{ 3989bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 3990bf215546Sopenharmony_ci const unsigned stage = pipeline->last_vgt_api_stage; 3991bf215546Sopenharmony_ci struct radv_userdata_info *loc; 3992bf215546Sopenharmony_ci uint32_t ngg_query_state = 0; 3993bf215546Sopenharmony_ci uint32_t base_reg; 3994bf215546Sopenharmony_ci 3995bf215546Sopenharmony_ci loc = radv_lookup_user_sgpr(&pipeline->base, stage, AC_UD_NGG_QUERY_STATE); 3996bf215546Sopenharmony_ci if (loc->sgpr_idx == -1) 3997bf215546Sopenharmony_ci return; 3998bf215546Sopenharmony_ci 3999bf215546Sopenharmony_ci assert(pipeline->is_ngg); 4000bf215546Sopenharmony_ci 4001bf215546Sopenharmony_ci /* By default NGG queries are disabled but they are enabled if the command buffer has active GDS 4002bf215546Sopenharmony_ci * queries or if it's a secondary command buffer that inherits the number of generated 4003bf215546Sopenharmony_ci * primitives. 4004bf215546Sopenharmony_ci */ 4005bf215546Sopenharmony_ci if (cmd_buffer->state.active_pipeline_gds_queries || 4006bf215546Sopenharmony_ci (cmd_buffer->state.inherited_pipeline_statistics & 4007bf215546Sopenharmony_ci VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT)) 4008bf215546Sopenharmony_ci ngg_query_state = 1; 4009bf215546Sopenharmony_ci 4010bf215546Sopenharmony_ci base_reg = pipeline->base.user_data_0[stage]; 4011bf215546Sopenharmony_ci assert(loc->sgpr_idx != -1); 4012bf215546Sopenharmony_ci 4013bf215546Sopenharmony_ci radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ngg_query_state); 4014bf215546Sopenharmony_ci} 4015bf215546Sopenharmony_ci 4016bf215546Sopenharmony_cistatic void 4017bf215546Sopenharmony_ciradv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer) 4018bf215546Sopenharmony_ci{ 4019bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 4020bf215546Sopenharmony_ci enum amd_gfx_level gfx_level = pipeline->base.device->physical_device->rad_info.gfx_level; 4021bf215546Sopenharmony_ci const unsigned stage = pipeline->last_vgt_api_stage; 4022bf215546Sopenharmony_ci struct radv_userdata_info *loc; 4023bf215546Sopenharmony_ci uint32_t vrs_rates = 0; 4024bf215546Sopenharmony_ci uint32_t base_reg; 4025bf215546Sopenharmony_ci 4026bf215546Sopenharmony_ci if (!pipeline->force_vrs_per_vertex) { 4027bf215546Sopenharmony_ci /* Un-set the SGPR index so we know to re-emit it later. */ 4028bf215546Sopenharmony_ci cmd_buffer->state.last_vrs_rates_sgpr_idx = -1; 4029bf215546Sopenharmony_ci return; 4030bf215546Sopenharmony_ci } 4031bf215546Sopenharmony_ci 4032bf215546Sopenharmony_ci loc = radv_lookup_user_sgpr(&pipeline->base, stage, AC_UD_FORCE_VRS_RATES); 4033bf215546Sopenharmony_ci assert(loc->sgpr_idx != -1); 4034bf215546Sopenharmony_ci 4035bf215546Sopenharmony_ci base_reg = pipeline->base.user_data_0[stage]; 4036bf215546Sopenharmony_ci 4037bf215546Sopenharmony_ci switch (cmd_buffer->device->force_vrs) { 4038bf215546Sopenharmony_ci case RADV_FORCE_VRS_2x2: 4039bf215546Sopenharmony_ci vrs_rates = gfx_level >= GFX11 ? V_0283D0_VRS_SHADING_RATE_2X2 : (1u << 2) | (1u << 4); 4040bf215546Sopenharmony_ci break; 4041bf215546Sopenharmony_ci case RADV_FORCE_VRS_2x1: 4042bf215546Sopenharmony_ci vrs_rates = gfx_level >= GFX11 ? V_0283D0_VRS_SHADING_RATE_2X1 : (1u << 2) | (0u << 4); 4043bf215546Sopenharmony_ci break; 4044bf215546Sopenharmony_ci case RADV_FORCE_VRS_1x2: 4045bf215546Sopenharmony_ci vrs_rates = gfx_level >= GFX11 ? V_0283D0_VRS_SHADING_RATE_1X2 : (0u << 2) | (1u << 4); 4046bf215546Sopenharmony_ci break; 4047bf215546Sopenharmony_ci default: 4048bf215546Sopenharmony_ci break; 4049bf215546Sopenharmony_ci } 4050bf215546Sopenharmony_ci 4051bf215546Sopenharmony_ci if (cmd_buffer->state.last_vrs_rates != vrs_rates || 4052bf215546Sopenharmony_ci cmd_buffer->state.last_vrs_rates_sgpr_idx != loc->sgpr_idx) { 4053bf215546Sopenharmony_ci radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, vrs_rates); 4054bf215546Sopenharmony_ci } 4055bf215546Sopenharmony_ci 4056bf215546Sopenharmony_ci cmd_buffer->state.last_vrs_rates = vrs_rates; 4057bf215546Sopenharmony_ci cmd_buffer->state.last_vrs_rates_sgpr_idx = loc->sgpr_idx; 4058bf215546Sopenharmony_ci} 4059bf215546Sopenharmony_ci 4060bf215546Sopenharmony_cistatic void 4061bf215546Sopenharmony_ciradv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty) 4062bf215546Sopenharmony_ci{ 4063bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 4064bf215546Sopenharmony_ci 4065bf215546Sopenharmony_ci radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty); 4066bf215546Sopenharmony_ci radv_flush_streamout_descriptors(cmd_buffer); 4067bf215546Sopenharmony_ci 4068bf215546Sopenharmony_ci VkShaderStageFlags stages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_MESH_BIT_NV; 4069bf215546Sopenharmony_ci radv_flush_descriptors(cmd_buffer, stages, &pipeline->base, VK_PIPELINE_BIND_POINT_GRAPHICS); 4070bf215546Sopenharmony_ci radv_flush_constants(cmd_buffer, stages, &pipeline->base, VK_PIPELINE_BIND_POINT_GRAPHICS); 4071bf215546Sopenharmony_ci radv_flush_ngg_query_state(cmd_buffer); 4072bf215546Sopenharmony_ci radv_flush_force_vrs_state(cmd_buffer); 4073bf215546Sopenharmony_ci} 4074bf215546Sopenharmony_ci 4075bf215546Sopenharmony_cistruct radv_draw_info { 4076bf215546Sopenharmony_ci /** 4077bf215546Sopenharmony_ci * Number of vertices. 4078bf215546Sopenharmony_ci */ 4079bf215546Sopenharmony_ci uint32_t count; 4080bf215546Sopenharmony_ci 4081bf215546Sopenharmony_ci /** 4082bf215546Sopenharmony_ci * First instance id. 4083bf215546Sopenharmony_ci */ 4084bf215546Sopenharmony_ci uint32_t first_instance; 4085bf215546Sopenharmony_ci 4086bf215546Sopenharmony_ci /** 4087bf215546Sopenharmony_ci * Number of instances. 4088bf215546Sopenharmony_ci */ 4089bf215546Sopenharmony_ci uint32_t instance_count; 4090bf215546Sopenharmony_ci 4091bf215546Sopenharmony_ci /** 4092bf215546Sopenharmony_ci * Whether it's an indexed draw. 4093bf215546Sopenharmony_ci */ 4094bf215546Sopenharmony_ci bool indexed; 4095bf215546Sopenharmony_ci 4096bf215546Sopenharmony_ci /** 4097bf215546Sopenharmony_ci * Indirect draw parameters resource. 4098bf215546Sopenharmony_ci */ 4099bf215546Sopenharmony_ci struct radv_buffer *indirect; 4100bf215546Sopenharmony_ci uint64_t indirect_offset; 4101bf215546Sopenharmony_ci uint32_t stride; 4102bf215546Sopenharmony_ci 4103bf215546Sopenharmony_ci /** 4104bf215546Sopenharmony_ci * Draw count parameters resource. 4105bf215546Sopenharmony_ci */ 4106bf215546Sopenharmony_ci struct radv_buffer *count_buffer; 4107bf215546Sopenharmony_ci uint64_t count_buffer_offset; 4108bf215546Sopenharmony_ci 4109bf215546Sopenharmony_ci /** 4110bf215546Sopenharmony_ci * Stream output parameters resource. 4111bf215546Sopenharmony_ci */ 4112bf215546Sopenharmony_ci struct radv_buffer *strmout_buffer; 4113bf215546Sopenharmony_ci uint64_t strmout_buffer_offset; 4114bf215546Sopenharmony_ci}; 4115bf215546Sopenharmony_ci 4116bf215546Sopenharmony_cistatic uint32_t 4117bf215546Sopenharmony_ciradv_get_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer) 4118bf215546Sopenharmony_ci{ 4119bf215546Sopenharmony_ci uint32_t index_type = G_028A7C_INDEX_TYPE(cmd_buffer->state.index_type); 4120bf215546Sopenharmony_ci switch (index_type) { 4121bf215546Sopenharmony_ci case V_028A7C_VGT_INDEX_8: 4122bf215546Sopenharmony_ci return 0xffu; 4123bf215546Sopenharmony_ci case V_028A7C_VGT_INDEX_16: 4124bf215546Sopenharmony_ci return 0xffffu; 4125bf215546Sopenharmony_ci case V_028A7C_VGT_INDEX_32: 4126bf215546Sopenharmony_ci return 0xffffffffu; 4127bf215546Sopenharmony_ci default: 4128bf215546Sopenharmony_ci unreachable("invalid index type"); 4129bf215546Sopenharmony_ci } 4130bf215546Sopenharmony_ci} 4131bf215546Sopenharmony_ci 4132bf215546Sopenharmony_cistatic void 4133bf215546Sopenharmony_cisi_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw, 4134bf215546Sopenharmony_ci bool indirect_draw, bool count_from_stream_output, 4135bf215546Sopenharmony_ci uint32_t draw_vertex_count) 4136bf215546Sopenharmony_ci{ 4137bf215546Sopenharmony_ci struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; 4138bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 4139bf215546Sopenharmony_ci unsigned topology = state->dynamic.primitive_topology; 4140bf215546Sopenharmony_ci bool prim_restart_enable = state->dynamic.primitive_restart_enable; 4141bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 4142bf215546Sopenharmony_ci unsigned ia_multi_vgt_param; 4143bf215546Sopenharmony_ci 4144bf215546Sopenharmony_ci ia_multi_vgt_param = 4145bf215546Sopenharmony_ci si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output, 4146bf215546Sopenharmony_ci draw_vertex_count, topology, prim_restart_enable); 4147bf215546Sopenharmony_ci 4148bf215546Sopenharmony_ci if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) { 4149bf215546Sopenharmony_ci if (info->gfx_level == GFX9) { 4150bf215546Sopenharmony_ci radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, 4151bf215546Sopenharmony_ci R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param); 4152bf215546Sopenharmony_ci } else if (info->gfx_level >= GFX7) { 4153bf215546Sopenharmony_ci radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param); 4154bf215546Sopenharmony_ci } else { 4155bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param); 4156bf215546Sopenharmony_ci } 4157bf215546Sopenharmony_ci state->last_ia_multi_vgt_param = ia_multi_vgt_param; 4158bf215546Sopenharmony_ci } 4159bf215546Sopenharmony_ci} 4160bf215546Sopenharmony_ci 4161bf215546Sopenharmony_cistatic void 4162bf215546Sopenharmony_ciradv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info) 4163bf215546Sopenharmony_ci{ 4164bf215546Sopenharmony_ci struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info; 4165bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 4166bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 4167bf215546Sopenharmony_ci uint32_t topology = state->dynamic.primitive_topology; 4168bf215546Sopenharmony_ci bool disable_instance_packing = false; 4169bf215546Sopenharmony_ci 4170bf215546Sopenharmony_ci /* Draw state. */ 4171bf215546Sopenharmony_ci if (info->gfx_level < GFX10) { 4172bf215546Sopenharmony_ci si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect, 4173bf215546Sopenharmony_ci !!draw_info->strmout_buffer, 4174bf215546Sopenharmony_ci draw_info->indirect ? 0 : draw_info->count); 4175bf215546Sopenharmony_ci } 4176bf215546Sopenharmony_ci 4177bf215546Sopenharmony_ci if (state->dynamic.primitive_restart_enable) { 4178bf215546Sopenharmony_ci uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer); 4179bf215546Sopenharmony_ci 4180bf215546Sopenharmony_ci if (primitive_reset_index != state->last_primitive_reset_index) { 4181bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, primitive_reset_index); 4182bf215546Sopenharmony_ci state->last_primitive_reset_index = primitive_reset_index; 4183bf215546Sopenharmony_ci } 4184bf215546Sopenharmony_ci } 4185bf215546Sopenharmony_ci 4186bf215546Sopenharmony_ci if (draw_info->strmout_buffer) { 4187bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo); 4188bf215546Sopenharmony_ci 4189bf215546Sopenharmony_ci va += draw_info->strmout_buffer->offset + draw_info->strmout_buffer_offset; 4190bf215546Sopenharmony_ci 4191bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride); 4192bf215546Sopenharmony_ci 4193bf215546Sopenharmony_ci if (info->gfx_level >= GFX10) { 4194bf215546Sopenharmony_ci /* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption 4195bf215546Sopenharmony_ci * (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+. 4196bf215546Sopenharmony_ci */ 4197bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 4198bf215546Sopenharmony_ci radeon_emit(cs, 0); 4199bf215546Sopenharmony_ci 4200bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0)); 4201bf215546Sopenharmony_ci radeon_emit(cs, va); 4202bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 4203bf215546Sopenharmony_ci radeon_emit(cs, (R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE - SI_CONTEXT_REG_OFFSET) >> 2); 4204bf215546Sopenharmony_ci radeon_emit(cs, 1); /* 1 DWORD */ 4205bf215546Sopenharmony_ci } else { 4206bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 4207bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) | 4208bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 4209bf215546Sopenharmony_ci radeon_emit(cs, va); 4210bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 4211bf215546Sopenharmony_ci radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2); 4212bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 4213bf215546Sopenharmony_ci } 4214bf215546Sopenharmony_ci 4215bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo); 4216bf215546Sopenharmony_ci } 4217bf215546Sopenharmony_ci 4218bf215546Sopenharmony_ci /* RDNA2 is affected by a hardware bug when instance packing is enabled for adjacent primitive 4219bf215546Sopenharmony_ci * topologies and instance_count > 1, pipeline stats generated by GE are incorrect. It needs to 4220bf215546Sopenharmony_ci * be applied for indexed and non-indexed draws. 4221bf215546Sopenharmony_ci */ 4222bf215546Sopenharmony_ci if (info->gfx_level == GFX10_3 && state->active_pipeline_queries > 0 && 4223bf215546Sopenharmony_ci (draw_info->instance_count > 1 || draw_info->indirect) && 4224bf215546Sopenharmony_ci (topology == V_008958_DI_PT_LINELIST_ADJ || topology == V_008958_DI_PT_LINESTRIP_ADJ || 4225bf215546Sopenharmony_ci topology == V_008958_DI_PT_TRILIST_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) { 4226bf215546Sopenharmony_ci disable_instance_packing = true; 4227bf215546Sopenharmony_ci } 4228bf215546Sopenharmony_ci 4229bf215546Sopenharmony_ci if ((draw_info->indexed && state->index_type != state->last_index_type) || 4230bf215546Sopenharmony_ci (info->gfx_level == GFX10_3 && 4231bf215546Sopenharmony_ci (state->last_index_type == -1 || 4232bf215546Sopenharmony_ci disable_instance_packing != G_028A7C_DISABLE_INSTANCE_PACKING(state->last_index_type)))) { 4233bf215546Sopenharmony_ci uint32_t index_type = state->index_type | S_028A7C_DISABLE_INSTANCE_PACKING(disable_instance_packing); 4234bf215546Sopenharmony_ci 4235bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { 4236bf215546Sopenharmony_ci radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs, 4237bf215546Sopenharmony_ci R_03090C_VGT_INDEX_TYPE, 2, index_type); 4238bf215546Sopenharmony_ci } else { 4239bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0)); 4240bf215546Sopenharmony_ci radeon_emit(cs, index_type); 4241bf215546Sopenharmony_ci } 4242bf215546Sopenharmony_ci 4243bf215546Sopenharmony_ci state->last_index_type = index_type; 4244bf215546Sopenharmony_ci } 4245bf215546Sopenharmony_ci} 4246bf215546Sopenharmony_ci 4247bf215546Sopenharmony_cistatic void 4248bf215546Sopenharmony_ciradv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stage_mask) 4249bf215546Sopenharmony_ci{ 4250bf215546Sopenharmony_ci /* For simplicity, if the barrier wants to wait for the task shader, 4251bf215546Sopenharmony_ci * just make it wait for the mesh shader too. 4252bf215546Sopenharmony_ci */ 4253bf215546Sopenharmony_ci if (src_stage_mask & VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_NV) 4254bf215546Sopenharmony_ci src_stage_mask |= VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_NV; 4255bf215546Sopenharmony_ci 4256bf215546Sopenharmony_ci if (src_stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT | 4257bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_RESOLVE_BIT | 4258bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_BLIT_BIT | 4259bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_CLEAR_BIT)) { 4260bf215546Sopenharmony_ci /* Be conservative for now. */ 4261bf215546Sopenharmony_ci src_stage_mask |= VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT; 4262bf215546Sopenharmony_ci } 4263bf215546Sopenharmony_ci 4264bf215546Sopenharmony_ci if (src_stage_mask & 4265bf215546Sopenharmony_ci (VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | 4266bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR | 4267bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR | 4268bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | 4269bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) { 4270bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; 4271bf215546Sopenharmony_ci } 4272bf215546Sopenharmony_ci 4273bf215546Sopenharmony_ci if (src_stage_mask & 4274bf215546Sopenharmony_ci (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | 4275bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT | 4276bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | 4277bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) { 4278bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; 4279bf215546Sopenharmony_ci } else if (src_stage_mask & 4280bf215546Sopenharmony_ci (VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT | 4281bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | 4282bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | 4283bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | 4284bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | 4285bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_NV | 4286bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT | 4287bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT)) { 4288bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; 4289bf215546Sopenharmony_ci } 4290bf215546Sopenharmony_ci} 4291bf215546Sopenharmony_ci 4292bf215546Sopenharmony_cistatic bool 4293bf215546Sopenharmony_cican_skip_buffer_l2_flushes(struct radv_device *device) 4294bf215546Sopenharmony_ci{ 4295bf215546Sopenharmony_ci return device->physical_device->rad_info.gfx_level == GFX9 || 4296bf215546Sopenharmony_ci (device->physical_device->rad_info.gfx_level >= GFX10 && 4297bf215546Sopenharmony_ci !device->physical_device->rad_info.tcc_rb_non_coherent); 4298bf215546Sopenharmony_ci} 4299bf215546Sopenharmony_ci 4300bf215546Sopenharmony_ci/* 4301bf215546Sopenharmony_ci * In vulkan barriers have two kinds of operations: 4302bf215546Sopenharmony_ci * 4303bf215546Sopenharmony_ci * - visibility (implemented with radv_src_access_flush) 4304bf215546Sopenharmony_ci * - availability (implemented with radv_dst_access_flush) 4305bf215546Sopenharmony_ci * 4306bf215546Sopenharmony_ci * for a memory operation to observe the result of a previous memory operation 4307bf215546Sopenharmony_ci * one needs to do a visibility operation from the source memory and then an 4308bf215546Sopenharmony_ci * availability operation to the target memory. 4309bf215546Sopenharmony_ci * 4310bf215546Sopenharmony_ci * The complication is the availability and visibility operations do not need to 4311bf215546Sopenharmony_ci * be in the same barrier. 4312bf215546Sopenharmony_ci * 4313bf215546Sopenharmony_ci * The cleanest way to implement this is to define the visibility operation to 4314bf215546Sopenharmony_ci * bring the caches to a "state of rest", which none of the caches below that 4315bf215546Sopenharmony_ci * level dirty. 4316bf215546Sopenharmony_ci * 4317bf215546Sopenharmony_ci * For GFX8 and earlier this would be VRAM/GTT with none of the caches dirty. 4318bf215546Sopenharmony_ci * 4319bf215546Sopenharmony_ci * For GFX9+ we can define the state at rest to be L2 instead of VRAM for all 4320bf215546Sopenharmony_ci * buffers and for images marked as coherent, and VRAM/GTT for non-coherent 4321bf215546Sopenharmony_ci * images. However, given the existence of memory barriers which do not specify 4322bf215546Sopenharmony_ci * the image/buffer it often devolves to just VRAM/GTT anyway. 4323bf215546Sopenharmony_ci * 4324bf215546Sopenharmony_ci * To help reducing the invalidations for GPUs that have L2 coherency between the 4325bf215546Sopenharmony_ci * RB and the shader caches, we always invalidate L2 on the src side, as we can 4326bf215546Sopenharmony_ci * use our knowledge of past usage to optimize flushes away. 4327bf215546Sopenharmony_ci */ 4328bf215546Sopenharmony_ci 4329bf215546Sopenharmony_cienum radv_cmd_flush_bits 4330bf215546Sopenharmony_ciradv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags, 4331bf215546Sopenharmony_ci const struct radv_image *image) 4332bf215546Sopenharmony_ci{ 4333bf215546Sopenharmony_ci bool has_CB_meta = true, has_DB_meta = true; 4334bf215546Sopenharmony_ci bool image_is_coherent = image ? image->l2_coherent : false; 4335bf215546Sopenharmony_ci enum radv_cmd_flush_bits flush_bits = 0; 4336bf215546Sopenharmony_ci 4337bf215546Sopenharmony_ci if (image) { 4338bf215546Sopenharmony_ci if (!radv_image_has_CB_metadata(image)) 4339bf215546Sopenharmony_ci has_CB_meta = false; 4340bf215546Sopenharmony_ci if (!radv_image_has_htile(image)) 4341bf215546Sopenharmony_ci has_DB_meta = false; 4342bf215546Sopenharmony_ci } 4343bf215546Sopenharmony_ci 4344bf215546Sopenharmony_ci u_foreach_bit64(b, src_flags) 4345bf215546Sopenharmony_ci { 4346bf215546Sopenharmony_ci switch ((VkAccessFlags2)(1 << b)) { 4347bf215546Sopenharmony_ci case VK_ACCESS_2_SHADER_WRITE_BIT: 4348bf215546Sopenharmony_ci case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT: 4349bf215546Sopenharmony_ci /* since the STORAGE bit isn't set we know that this is a meta operation. 4350bf215546Sopenharmony_ci * on the dst flush side we skip CB/DB flushes without the STORAGE bit, so 4351bf215546Sopenharmony_ci * set it here. */ 4352bf215546Sopenharmony_ci if (image && !(image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT)) { 4353bf215546Sopenharmony_ci if (vk_format_is_depth_or_stencil(image->vk.format)) { 4354bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; 4355bf215546Sopenharmony_ci } else { 4356bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; 4357bf215546Sopenharmony_ci } 4358bf215546Sopenharmony_ci } 4359bf215546Sopenharmony_ci 4360bf215546Sopenharmony_ci if (!image_is_coherent) 4361bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_L2; 4362bf215546Sopenharmony_ci break; 4363bf215546Sopenharmony_ci case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR: 4364bf215546Sopenharmony_ci case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT: 4365bf215546Sopenharmony_ci case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: 4366bf215546Sopenharmony_ci if (!image_is_coherent) 4367bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_WB_L2; 4368bf215546Sopenharmony_ci break; 4369bf215546Sopenharmony_ci case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT: 4370bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; 4371bf215546Sopenharmony_ci if (has_CB_meta) 4372bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4373bf215546Sopenharmony_ci break; 4374bf215546Sopenharmony_ci case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: 4375bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; 4376bf215546Sopenharmony_ci if (has_DB_meta) 4377bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 4378bf215546Sopenharmony_ci break; 4379bf215546Sopenharmony_ci case VK_ACCESS_2_TRANSFER_WRITE_BIT: 4380bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; 4381bf215546Sopenharmony_ci 4382bf215546Sopenharmony_ci if (!image_is_coherent) 4383bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_L2; 4384bf215546Sopenharmony_ci if (has_CB_meta) 4385bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4386bf215546Sopenharmony_ci if (has_DB_meta) 4387bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 4388bf215546Sopenharmony_ci break; 4389bf215546Sopenharmony_ci case VK_ACCESS_2_MEMORY_WRITE_BIT: 4390bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB; 4391bf215546Sopenharmony_ci 4392bf215546Sopenharmony_ci if (!image_is_coherent) 4393bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_L2; 4394bf215546Sopenharmony_ci if (has_CB_meta) 4395bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4396bf215546Sopenharmony_ci if (has_DB_meta) 4397bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 4398bf215546Sopenharmony_ci break; 4399bf215546Sopenharmony_ci default: 4400bf215546Sopenharmony_ci break; 4401bf215546Sopenharmony_ci } 4402bf215546Sopenharmony_ci } 4403bf215546Sopenharmony_ci return flush_bits; 4404bf215546Sopenharmony_ci} 4405bf215546Sopenharmony_ci 4406bf215546Sopenharmony_cienum radv_cmd_flush_bits 4407bf215546Sopenharmony_ciradv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags, 4408bf215546Sopenharmony_ci const struct radv_image *image) 4409bf215546Sopenharmony_ci{ 4410bf215546Sopenharmony_ci bool has_CB_meta = true, has_DB_meta = true; 4411bf215546Sopenharmony_ci enum radv_cmd_flush_bits flush_bits = 0; 4412bf215546Sopenharmony_ci bool flush_CB = true, flush_DB = true; 4413bf215546Sopenharmony_ci bool image_is_coherent = image ? image->l2_coherent : false; 4414bf215546Sopenharmony_ci 4415bf215546Sopenharmony_ci if (image) { 4416bf215546Sopenharmony_ci if (!(image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT)) { 4417bf215546Sopenharmony_ci flush_CB = false; 4418bf215546Sopenharmony_ci flush_DB = false; 4419bf215546Sopenharmony_ci } 4420bf215546Sopenharmony_ci 4421bf215546Sopenharmony_ci if (!radv_image_has_CB_metadata(image)) 4422bf215546Sopenharmony_ci has_CB_meta = false; 4423bf215546Sopenharmony_ci if (!radv_image_has_htile(image)) 4424bf215546Sopenharmony_ci has_DB_meta = false; 4425bf215546Sopenharmony_ci } 4426bf215546Sopenharmony_ci 4427bf215546Sopenharmony_ci /* All the L2 invalidations below are not the CB/DB. So if there are no incoherent images 4428bf215546Sopenharmony_ci * in the L2 cache in CB/DB mode then they are already usable from all the other L2 clients. */ 4429bf215546Sopenharmony_ci image_is_coherent |= 4430bf215546Sopenharmony_ci can_skip_buffer_l2_flushes(cmd_buffer->device) && !cmd_buffer->state.rb_noncoherent_dirty; 4431bf215546Sopenharmony_ci 4432bf215546Sopenharmony_ci u_foreach_bit64(b, dst_flags) 4433bf215546Sopenharmony_ci { 4434bf215546Sopenharmony_ci switch ((VkAccessFlags2)(1 << b)) { 4435bf215546Sopenharmony_ci case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT: 4436bf215546Sopenharmony_ci /* SMEM loads are used to read compute dispatch size in shaders */ 4437bf215546Sopenharmony_ci if (!cmd_buffer->device->load_grid_size_from_user_sgpr) 4438bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_SCACHE; 4439bf215546Sopenharmony_ci 4440bf215546Sopenharmony_ci /* Ensure the DGC meta shader can read the commands. */ 4441bf215546Sopenharmony_ci if (cmd_buffer->device->uses_device_generated_commands) { 4442bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE; 4443bf215546Sopenharmony_ci 4444bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9) 4445bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_L2; 4446bf215546Sopenharmony_ci } 4447bf215546Sopenharmony_ci 4448bf215546Sopenharmony_ci break; 4449bf215546Sopenharmony_ci case VK_ACCESS_2_INDEX_READ_BIT: 4450bf215546Sopenharmony_ci case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT: 4451bf215546Sopenharmony_ci break; 4452bf215546Sopenharmony_ci case VK_ACCESS_2_UNIFORM_READ_BIT: 4453bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE; 4454bf215546Sopenharmony_ci break; 4455bf215546Sopenharmony_ci case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT: 4456bf215546Sopenharmony_ci case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT: 4457bf215546Sopenharmony_ci case VK_ACCESS_2_TRANSFER_READ_BIT: 4458bf215546Sopenharmony_ci case VK_ACCESS_2_TRANSFER_WRITE_BIT: 4459bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_VCACHE; 4460bf215546Sopenharmony_ci 4461bf215546Sopenharmony_ci if (has_CB_meta || has_DB_meta) 4462bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA; 4463bf215546Sopenharmony_ci if (!image_is_coherent) 4464bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_L2; 4465bf215546Sopenharmony_ci break; 4466bf215546Sopenharmony_ci case VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR: 4467bf215546Sopenharmony_ci case VK_ACCESS_2_SHADER_READ_BIT: 4468bf215546Sopenharmony_ci case VK_ACCESS_2_SHADER_STORAGE_READ_BIT: 4469bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_VCACHE; 4470bf215546Sopenharmony_ci /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to 4471bf215546Sopenharmony_ci * invalidate the scalar cache. */ 4472bf215546Sopenharmony_ci if (!cmd_buffer->device->physical_device->use_llvm && !image) 4473bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_SCACHE; 4474bf215546Sopenharmony_ci 4475bf215546Sopenharmony_ci if (has_CB_meta || has_DB_meta) 4476bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA; 4477bf215546Sopenharmony_ci if (!image_is_coherent) 4478bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_L2; 4479bf215546Sopenharmony_ci break; 4480bf215546Sopenharmony_ci case VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR: 4481bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_VCACHE; 4482bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9) 4483bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_L2; 4484bf215546Sopenharmony_ci break; 4485bf215546Sopenharmony_ci case VK_ACCESS_2_SHADER_WRITE_BIT: 4486bf215546Sopenharmony_ci case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT: 4487bf215546Sopenharmony_ci case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR: 4488bf215546Sopenharmony_ci break; 4489bf215546Sopenharmony_ci case VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT: 4490bf215546Sopenharmony_ci case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT: 4491bf215546Sopenharmony_ci if (flush_CB) 4492bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; 4493bf215546Sopenharmony_ci if (has_CB_meta) 4494bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4495bf215546Sopenharmony_ci break; 4496bf215546Sopenharmony_ci case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT: 4497bf215546Sopenharmony_ci case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT: 4498bf215546Sopenharmony_ci if (flush_DB) 4499bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; 4500bf215546Sopenharmony_ci if (has_DB_meta) 4501bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 4502bf215546Sopenharmony_ci break; 4503bf215546Sopenharmony_ci case VK_ACCESS_2_MEMORY_READ_BIT: 4504bf215546Sopenharmony_ci case VK_ACCESS_2_MEMORY_WRITE_BIT: 4505bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE; 4506bf215546Sopenharmony_ci if (!image_is_coherent) 4507bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_INV_L2; 4508bf215546Sopenharmony_ci if (flush_CB) 4509bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB; 4510bf215546Sopenharmony_ci if (has_CB_meta) 4511bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META; 4512bf215546Sopenharmony_ci if (flush_DB) 4513bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB; 4514bf215546Sopenharmony_ci if (has_DB_meta) 4515bf215546Sopenharmony_ci flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 4516bf215546Sopenharmony_ci break; 4517bf215546Sopenharmony_ci default: 4518bf215546Sopenharmony_ci break; 4519bf215546Sopenharmony_ci } 4520bf215546Sopenharmony_ci } 4521bf215546Sopenharmony_ci return flush_bits; 4522bf215546Sopenharmony_ci} 4523bf215546Sopenharmony_ci 4524bf215546Sopenharmony_civoid 4525bf215546Sopenharmony_ciradv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer, 4526bf215546Sopenharmony_ci const struct radv_subpass_barrier *barrier) 4527bf215546Sopenharmony_ci{ 4528bf215546Sopenharmony_ci struct radv_render_pass *pass = cmd_buffer->state.pass; 4529bf215546Sopenharmony_ci 4530bf215546Sopenharmony_ci for (uint32_t i = 0; i < pass->attachment_count; i++) { 4531bf215546Sopenharmony_ci struct radv_image_view *iview = cmd_buffer->state.attachments[i].iview; 4532bf215546Sopenharmony_ci 4533bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= 4534bf215546Sopenharmony_ci radv_src_access_flush(cmd_buffer, barrier->src_access_mask, iview->image); 4535bf215546Sopenharmony_ci } 4536bf215546Sopenharmony_ci 4537bf215546Sopenharmony_ci radv_stage_flush(cmd_buffer, barrier->src_stage_mask); 4538bf215546Sopenharmony_ci 4539bf215546Sopenharmony_ci for (uint32_t i = 0; i < pass->attachment_count; i++) { 4540bf215546Sopenharmony_ci struct radv_image_view *iview = cmd_buffer->state.attachments[i].iview; 4541bf215546Sopenharmony_ci 4542bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= 4543bf215546Sopenharmony_ci radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, iview->image); 4544bf215546Sopenharmony_ci } 4545bf215546Sopenharmony_ci 4546bf215546Sopenharmony_ci radv_ace_internal_barrier(cmd_buffer, barrier->src_stage_mask, barrier->dst_stage_mask); 4547bf215546Sopenharmony_ci} 4548bf215546Sopenharmony_ci 4549bf215546Sopenharmony_ciuint32_t 4550bf215546Sopenharmony_ciradv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer) 4551bf215546Sopenharmony_ci{ 4552bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 4553bf215546Sopenharmony_ci uint32_t subpass_id = state->subpass - state->pass->subpasses; 4554bf215546Sopenharmony_ci 4555bf215546Sopenharmony_ci /* The id of this subpass shouldn't exceed the number of subpasses in 4556bf215546Sopenharmony_ci * this render pass minus 1. 4557bf215546Sopenharmony_ci */ 4558bf215546Sopenharmony_ci assert(subpass_id < state->pass->subpass_count); 4559bf215546Sopenharmony_ci return subpass_id; 4560bf215546Sopenharmony_ci} 4561bf215546Sopenharmony_ci 4562bf215546Sopenharmony_cistatic struct radv_sample_locations_state * 4563bf215546Sopenharmony_ciradv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer, uint32_t att_idx, 4564bf215546Sopenharmony_ci bool begin_subpass) 4565bf215546Sopenharmony_ci{ 4566bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 4567bf215546Sopenharmony_ci uint32_t subpass_id = radv_get_subpass_id(cmd_buffer); 4568bf215546Sopenharmony_ci struct radv_image_view *view = state->attachments[att_idx].iview; 4569bf215546Sopenharmony_ci 4570bf215546Sopenharmony_ci if (view->image->info.samples == 1) 4571bf215546Sopenharmony_ci return NULL; 4572bf215546Sopenharmony_ci 4573bf215546Sopenharmony_ci if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) { 4574bf215546Sopenharmony_ci /* Return the initial sample locations if this is the initial 4575bf215546Sopenharmony_ci * layout transition of the given subpass attachemnt. 4576bf215546Sopenharmony_ci */ 4577bf215546Sopenharmony_ci if (state->attachments[att_idx].sample_location.count > 0) 4578bf215546Sopenharmony_ci return &state->attachments[att_idx].sample_location; 4579bf215546Sopenharmony_ci } else { 4580bf215546Sopenharmony_ci /* Otherwise return the subpass sample locations if defined. */ 4581bf215546Sopenharmony_ci if (state->subpass_sample_locs) { 4582bf215546Sopenharmony_ci /* Because the driver sets the current subpass before 4583bf215546Sopenharmony_ci * initial layout transitions, we should use the sample 4584bf215546Sopenharmony_ci * locations from the previous subpass to avoid an 4585bf215546Sopenharmony_ci * off-by-one problem. Otherwise, use the sample 4586bf215546Sopenharmony_ci * locations for the current subpass for final layout 4587bf215546Sopenharmony_ci * transitions. 4588bf215546Sopenharmony_ci */ 4589bf215546Sopenharmony_ci if (begin_subpass) 4590bf215546Sopenharmony_ci subpass_id--; 4591bf215546Sopenharmony_ci 4592bf215546Sopenharmony_ci for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) { 4593bf215546Sopenharmony_ci if (state->subpass_sample_locs[i].subpass_idx == subpass_id) 4594bf215546Sopenharmony_ci return &state->subpass_sample_locs[i].sample_location; 4595bf215546Sopenharmony_ci } 4596bf215546Sopenharmony_ci } 4597bf215546Sopenharmony_ci } 4598bf215546Sopenharmony_ci 4599bf215546Sopenharmony_ci return NULL; 4600bf215546Sopenharmony_ci} 4601bf215546Sopenharmony_ci 4602bf215546Sopenharmony_cistatic void 4603bf215546Sopenharmony_ciradv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer, 4604bf215546Sopenharmony_ci struct radv_subpass_attachment att, bool begin_subpass) 4605bf215546Sopenharmony_ci{ 4606bf215546Sopenharmony_ci unsigned idx = att.attachment; 4607bf215546Sopenharmony_ci struct radv_image_view *view = cmd_buffer->state.attachments[idx].iview; 4608bf215546Sopenharmony_ci struct radv_sample_locations_state *sample_locs; 4609bf215546Sopenharmony_ci VkImageSubresourceRange range; 4610bf215546Sopenharmony_ci range.aspectMask = view->vk.aspects; 4611bf215546Sopenharmony_ci range.baseMipLevel = view->vk.base_mip_level; 4612bf215546Sopenharmony_ci range.levelCount = 1; 4613bf215546Sopenharmony_ci range.baseArrayLayer = view->vk.base_array_layer; 4614bf215546Sopenharmony_ci range.layerCount = cmd_buffer->state.framebuffer->layers; 4615bf215546Sopenharmony_ci 4616bf215546Sopenharmony_ci if (cmd_buffer->state.subpass->view_mask) { 4617bf215546Sopenharmony_ci /* If the current subpass uses multiview, the driver might have 4618bf215546Sopenharmony_ci * performed a fast color/depth clear to the whole image 4619bf215546Sopenharmony_ci * (including all layers). To make sure the driver will 4620bf215546Sopenharmony_ci * decompress the image correctly (if needed), we have to 4621bf215546Sopenharmony_ci * account for the "real" number of layers. If the view mask is 4622bf215546Sopenharmony_ci * sparse, this will decompress more layers than needed. 4623bf215546Sopenharmony_ci */ 4624bf215546Sopenharmony_ci range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask); 4625bf215546Sopenharmony_ci } 4626bf215546Sopenharmony_ci 4627bf215546Sopenharmony_ci /* Get the subpass sample locations for the given attachment, if NULL 4628bf215546Sopenharmony_ci * is returned the driver will use the default HW locations. 4629bf215546Sopenharmony_ci */ 4630bf215546Sopenharmony_ci sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx, begin_subpass); 4631bf215546Sopenharmony_ci 4632bf215546Sopenharmony_ci /* Determine if the subpass uses separate depth/stencil layouts. */ 4633bf215546Sopenharmony_ci bool uses_separate_depth_stencil_layouts = false; 4634bf215546Sopenharmony_ci if ((cmd_buffer->state.attachments[idx].current_layout != 4635bf215546Sopenharmony_ci cmd_buffer->state.attachments[idx].current_stencil_layout) || 4636bf215546Sopenharmony_ci (att.layout != att.stencil_layout)) { 4637bf215546Sopenharmony_ci uses_separate_depth_stencil_layouts = true; 4638bf215546Sopenharmony_ci } 4639bf215546Sopenharmony_ci 4640bf215546Sopenharmony_ci /* For separate layouts, perform depth and stencil transitions 4641bf215546Sopenharmony_ci * separately. 4642bf215546Sopenharmony_ci */ 4643bf215546Sopenharmony_ci if (uses_separate_depth_stencil_layouts && 4644bf215546Sopenharmony_ci (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) { 4645bf215546Sopenharmony_ci /* Depth-only transitions. */ 4646bf215546Sopenharmony_ci range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT; 4647bf215546Sopenharmony_ci radv_handle_image_transition(cmd_buffer, view->image, 4648bf215546Sopenharmony_ci cmd_buffer->state.attachments[idx].current_layout, 4649bf215546Sopenharmony_ci cmd_buffer->state.attachments[idx].current_in_render_loop, 4650bf215546Sopenharmony_ci att.layout, att.in_render_loop, 0, 0, &range, sample_locs); 4651bf215546Sopenharmony_ci 4652bf215546Sopenharmony_ci /* Stencil-only transitions. */ 4653bf215546Sopenharmony_ci range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT; 4654bf215546Sopenharmony_ci radv_handle_image_transition( 4655bf215546Sopenharmony_ci cmd_buffer, view->image, cmd_buffer->state.attachments[idx].current_stencil_layout, 4656bf215546Sopenharmony_ci cmd_buffer->state.attachments[idx].current_in_render_loop, att.stencil_layout, 4657bf215546Sopenharmony_ci att.in_render_loop, 0, 0, &range, sample_locs); 4658bf215546Sopenharmony_ci } else { 4659bf215546Sopenharmony_ci radv_handle_image_transition(cmd_buffer, view->image, 4660bf215546Sopenharmony_ci cmd_buffer->state.attachments[idx].current_layout, 4661bf215546Sopenharmony_ci cmd_buffer->state.attachments[idx].current_in_render_loop, 4662bf215546Sopenharmony_ci att.layout, att.in_render_loop, 0, 0, &range, sample_locs); 4663bf215546Sopenharmony_ci } 4664bf215546Sopenharmony_ci 4665bf215546Sopenharmony_ci cmd_buffer->state.attachments[idx].current_layout = att.layout; 4666bf215546Sopenharmony_ci cmd_buffer->state.attachments[idx].current_stencil_layout = att.stencil_layout; 4667bf215546Sopenharmony_ci cmd_buffer->state.attachments[idx].current_in_render_loop = att.in_render_loop; 4668bf215546Sopenharmony_ci} 4669bf215546Sopenharmony_ci 4670bf215546Sopenharmony_civoid 4671bf215546Sopenharmony_ciradv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass *subpass) 4672bf215546Sopenharmony_ci{ 4673bf215546Sopenharmony_ci cmd_buffer->state.subpass = subpass; 4674bf215546Sopenharmony_ci 4675bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; 4676bf215546Sopenharmony_ci} 4677bf215546Sopenharmony_ci 4678bf215546Sopenharmony_cistatic VkResult 4679bf215546Sopenharmony_ciradv_cmd_state_setup_sample_locations(struct radv_cmd_buffer *cmd_buffer, 4680bf215546Sopenharmony_ci struct radv_render_pass *pass, 4681bf215546Sopenharmony_ci const VkRenderPassBeginInfo *info) 4682bf215546Sopenharmony_ci{ 4683bf215546Sopenharmony_ci const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs = 4684bf215546Sopenharmony_ci vk_find_struct_const(info->pNext, RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT); 4685bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 4686bf215546Sopenharmony_ci 4687bf215546Sopenharmony_ci if (!sample_locs) { 4688bf215546Sopenharmony_ci state->subpass_sample_locs = NULL; 4689bf215546Sopenharmony_ci return VK_SUCCESS; 4690bf215546Sopenharmony_ci } 4691bf215546Sopenharmony_ci 4692bf215546Sopenharmony_ci for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) { 4693bf215546Sopenharmony_ci const VkAttachmentSampleLocationsEXT *att_sample_locs = 4694bf215546Sopenharmony_ci &sample_locs->pAttachmentInitialSampleLocations[i]; 4695bf215546Sopenharmony_ci uint32_t att_idx = att_sample_locs->attachmentIndex; 4696bf215546Sopenharmony_ci struct radv_image *image = cmd_buffer->state.attachments[att_idx].iview->image; 4697bf215546Sopenharmony_ci 4698bf215546Sopenharmony_ci assert(vk_format_is_depth_or_stencil(image->vk.format)); 4699bf215546Sopenharmony_ci 4700bf215546Sopenharmony_ci /* From the Vulkan spec 1.1.108: 4701bf215546Sopenharmony_ci * 4702bf215546Sopenharmony_ci * "If the image referenced by the framebuffer attachment at 4703bf215546Sopenharmony_ci * index attachmentIndex was not created with 4704bf215546Sopenharmony_ci * VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT 4705bf215546Sopenharmony_ci * then the values specified in sampleLocationsInfo are 4706bf215546Sopenharmony_ci * ignored." 4707bf215546Sopenharmony_ci */ 4708bf215546Sopenharmony_ci if (!(image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT)) 4709bf215546Sopenharmony_ci continue; 4710bf215546Sopenharmony_ci 4711bf215546Sopenharmony_ci const VkSampleLocationsInfoEXT *sample_locs_info = &att_sample_locs->sampleLocationsInfo; 4712bf215546Sopenharmony_ci 4713bf215546Sopenharmony_ci state->attachments[att_idx].sample_location.per_pixel = 4714bf215546Sopenharmony_ci sample_locs_info->sampleLocationsPerPixel; 4715bf215546Sopenharmony_ci state->attachments[att_idx].sample_location.grid_size = 4716bf215546Sopenharmony_ci sample_locs_info->sampleLocationGridSize; 4717bf215546Sopenharmony_ci state->attachments[att_idx].sample_location.count = sample_locs_info->sampleLocationsCount; 4718bf215546Sopenharmony_ci typed_memcpy(&state->attachments[att_idx].sample_location.locations[0], 4719bf215546Sopenharmony_ci sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount); 4720bf215546Sopenharmony_ci } 4721bf215546Sopenharmony_ci 4722bf215546Sopenharmony_ci state->subpass_sample_locs = 4723bf215546Sopenharmony_ci vk_alloc(&cmd_buffer->pool->vk.alloc, 4724bf215546Sopenharmony_ci sample_locs->postSubpassSampleLocationsCount * sizeof(state->subpass_sample_locs[0]), 4725bf215546Sopenharmony_ci 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 4726bf215546Sopenharmony_ci if (state->subpass_sample_locs == NULL) { 4727bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 4728bf215546Sopenharmony_ci return cmd_buffer->record_result; 4729bf215546Sopenharmony_ci } 4730bf215546Sopenharmony_ci 4731bf215546Sopenharmony_ci state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount; 4732bf215546Sopenharmony_ci 4733bf215546Sopenharmony_ci for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) { 4734bf215546Sopenharmony_ci const VkSubpassSampleLocationsEXT *subpass_sample_locs_info = 4735bf215546Sopenharmony_ci &sample_locs->pPostSubpassSampleLocations[i]; 4736bf215546Sopenharmony_ci const VkSampleLocationsInfoEXT *sample_locs_info = 4737bf215546Sopenharmony_ci &subpass_sample_locs_info->sampleLocationsInfo; 4738bf215546Sopenharmony_ci 4739bf215546Sopenharmony_ci state->subpass_sample_locs[i].subpass_idx = subpass_sample_locs_info->subpassIndex; 4740bf215546Sopenharmony_ci state->subpass_sample_locs[i].sample_location.per_pixel = 4741bf215546Sopenharmony_ci sample_locs_info->sampleLocationsPerPixel; 4742bf215546Sopenharmony_ci state->subpass_sample_locs[i].sample_location.grid_size = 4743bf215546Sopenharmony_ci sample_locs_info->sampleLocationGridSize; 4744bf215546Sopenharmony_ci state->subpass_sample_locs[i].sample_location.count = sample_locs_info->sampleLocationsCount; 4745bf215546Sopenharmony_ci typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0], 4746bf215546Sopenharmony_ci sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount); 4747bf215546Sopenharmony_ci } 4748bf215546Sopenharmony_ci 4749bf215546Sopenharmony_ci return VK_SUCCESS; 4750bf215546Sopenharmony_ci} 4751bf215546Sopenharmony_ci 4752bf215546Sopenharmony_cistatic VkResult 4753bf215546Sopenharmony_ciradv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer, struct radv_render_pass *pass, 4754bf215546Sopenharmony_ci const VkRenderPassBeginInfo *info) 4755bf215546Sopenharmony_ci{ 4756bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 4757bf215546Sopenharmony_ci const struct VkRenderPassAttachmentBeginInfo *attachment_info = NULL; 4758bf215546Sopenharmony_ci 4759bf215546Sopenharmony_ci if (info) { 4760bf215546Sopenharmony_ci attachment_info = vk_find_struct_const(info->pNext, RENDER_PASS_ATTACHMENT_BEGIN_INFO); 4761bf215546Sopenharmony_ci } 4762bf215546Sopenharmony_ci 4763bf215546Sopenharmony_ci if (pass->attachment_count == 0) { 4764bf215546Sopenharmony_ci state->attachments = NULL; 4765bf215546Sopenharmony_ci return VK_SUCCESS; 4766bf215546Sopenharmony_ci } 4767bf215546Sopenharmony_ci 4768bf215546Sopenharmony_ci state->attachments = 4769bf215546Sopenharmony_ci vk_alloc(&cmd_buffer->pool->vk.alloc, pass->attachment_count * sizeof(state->attachments[0]), 4770bf215546Sopenharmony_ci 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 4771bf215546Sopenharmony_ci if (state->attachments == NULL) { 4772bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 4773bf215546Sopenharmony_ci return cmd_buffer->record_result; 4774bf215546Sopenharmony_ci } 4775bf215546Sopenharmony_ci 4776bf215546Sopenharmony_ci for (uint32_t i = 0; i < pass->attachment_count; ++i) { 4777bf215546Sopenharmony_ci struct radv_render_pass_attachment *att = &pass->attachments[i]; 4778bf215546Sopenharmony_ci VkImageAspectFlags att_aspects = vk_format_aspects(att->format); 4779bf215546Sopenharmony_ci VkImageAspectFlags clear_aspects = 0; 4780bf215546Sopenharmony_ci 4781bf215546Sopenharmony_ci if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) { 4782bf215546Sopenharmony_ci /* color attachment */ 4783bf215546Sopenharmony_ci if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 4784bf215546Sopenharmony_ci clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT; 4785bf215546Sopenharmony_ci } 4786bf215546Sopenharmony_ci } else { 4787bf215546Sopenharmony_ci /* depthstencil attachment */ 4788bf215546Sopenharmony_ci if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && 4789bf215546Sopenharmony_ci att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 4790bf215546Sopenharmony_ci clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; 4791bf215546Sopenharmony_ci if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 4792bf215546Sopenharmony_ci att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE) 4793bf215546Sopenharmony_ci clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 4794bf215546Sopenharmony_ci } 4795bf215546Sopenharmony_ci if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && 4796bf215546Sopenharmony_ci att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) { 4797bf215546Sopenharmony_ci clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 4798bf215546Sopenharmony_ci } 4799bf215546Sopenharmony_ci } 4800bf215546Sopenharmony_ci 4801bf215546Sopenharmony_ci state->attachments[i].pending_clear_aspects = clear_aspects; 4802bf215546Sopenharmony_ci state->attachments[i].cleared_views = 0; 4803bf215546Sopenharmony_ci if (clear_aspects && info) { 4804bf215546Sopenharmony_ci assert(info->clearValueCount > i); 4805bf215546Sopenharmony_ci state->attachments[i].clear_value = info->pClearValues[i]; 4806bf215546Sopenharmony_ci } 4807bf215546Sopenharmony_ci 4808bf215546Sopenharmony_ci state->attachments[i].current_layout = att->initial_layout; 4809bf215546Sopenharmony_ci state->attachments[i].current_in_render_loop = false; 4810bf215546Sopenharmony_ci state->attachments[i].current_stencil_layout = att->stencil_initial_layout; 4811bf215546Sopenharmony_ci state->attachments[i].sample_location.count = 0; 4812bf215546Sopenharmony_ci 4813bf215546Sopenharmony_ci struct radv_image_view *iview; 4814bf215546Sopenharmony_ci if (attachment_info && attachment_info->attachmentCount > i) { 4815bf215546Sopenharmony_ci iview = radv_image_view_from_handle(attachment_info->pAttachments[i]); 4816bf215546Sopenharmony_ci } else { 4817bf215546Sopenharmony_ci iview = radv_image_view_from_handle(state->framebuffer->attachments[i]); 4818bf215546Sopenharmony_ci } 4819bf215546Sopenharmony_ci 4820bf215546Sopenharmony_ci state->attachments[i].iview = iview; 4821bf215546Sopenharmony_ci if (iview->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { 4822bf215546Sopenharmony_ci radv_initialise_ds_surface(cmd_buffer->device, &state->attachments[i].ds, iview); 4823bf215546Sopenharmony_ci } else { 4824bf215546Sopenharmony_ci radv_initialise_color_surface(cmd_buffer->device, &state->attachments[i].cb, iview); 4825bf215546Sopenharmony_ci } 4826bf215546Sopenharmony_ci } 4827bf215546Sopenharmony_ci 4828bf215546Sopenharmony_ci return VK_SUCCESS; 4829bf215546Sopenharmony_ci} 4830bf215546Sopenharmony_ci 4831bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 4832bf215546Sopenharmony_ciradv_AllocateCommandBuffers(VkDevice _device, const VkCommandBufferAllocateInfo *pAllocateInfo, 4833bf215546Sopenharmony_ci VkCommandBuffer *pCommandBuffers) 4834bf215546Sopenharmony_ci{ 4835bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 4836bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_pool, pool, pAllocateInfo->commandPool); 4837bf215546Sopenharmony_ci 4838bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 4839bf215546Sopenharmony_ci uint32_t i; 4840bf215546Sopenharmony_ci 4841bf215546Sopenharmony_ci for (i = 0; i < pAllocateInfo->commandBufferCount; i++) { 4842bf215546Sopenharmony_ci 4843bf215546Sopenharmony_ci if (!list_is_empty(&pool->free_cmd_buffers)) { 4844bf215546Sopenharmony_ci struct radv_cmd_buffer *cmd_buffer = 4845bf215546Sopenharmony_ci list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link); 4846bf215546Sopenharmony_ci 4847bf215546Sopenharmony_ci list_del(&cmd_buffer->pool_link); 4848bf215546Sopenharmony_ci list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers); 4849bf215546Sopenharmony_ci 4850bf215546Sopenharmony_ci result = radv_reset_cmd_buffer(cmd_buffer); 4851bf215546Sopenharmony_ci vk_command_buffer_finish(&cmd_buffer->vk); 4852bf215546Sopenharmony_ci VkResult init_result = 4853bf215546Sopenharmony_ci vk_command_buffer_init(&cmd_buffer->vk, &pool->vk, pAllocateInfo->level); 4854bf215546Sopenharmony_ci if (init_result != VK_SUCCESS) 4855bf215546Sopenharmony_ci result = init_result; 4856bf215546Sopenharmony_ci 4857bf215546Sopenharmony_ci pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer); 4858bf215546Sopenharmony_ci } else { 4859bf215546Sopenharmony_ci result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, &pCommandBuffers[i]); 4860bf215546Sopenharmony_ci } 4861bf215546Sopenharmony_ci if (result != VK_SUCCESS) 4862bf215546Sopenharmony_ci break; 4863bf215546Sopenharmony_ci } 4864bf215546Sopenharmony_ci 4865bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 4866bf215546Sopenharmony_ci radv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, pCommandBuffers); 4867bf215546Sopenharmony_ci 4868bf215546Sopenharmony_ci /* From the Vulkan 1.0.66 spec: 4869bf215546Sopenharmony_ci * 4870bf215546Sopenharmony_ci * "vkAllocateCommandBuffers can be used to create multiple 4871bf215546Sopenharmony_ci * command buffers. If the creation of any of those command 4872bf215546Sopenharmony_ci * buffers fails, the implementation must destroy all 4873bf215546Sopenharmony_ci * successfully created command buffer objects from this 4874bf215546Sopenharmony_ci * command, set all entries of the pCommandBuffers array to 4875bf215546Sopenharmony_ci * NULL and return the error." 4876bf215546Sopenharmony_ci */ 4877bf215546Sopenharmony_ci memset(pCommandBuffers, 0, sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount); 4878bf215546Sopenharmony_ci } 4879bf215546Sopenharmony_ci 4880bf215546Sopenharmony_ci return result; 4881bf215546Sopenharmony_ci} 4882bf215546Sopenharmony_ci 4883bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 4884bf215546Sopenharmony_ciradv_FreeCommandBuffers(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount, 4885bf215546Sopenharmony_ci const VkCommandBuffer *pCommandBuffers) 4886bf215546Sopenharmony_ci{ 4887bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); 4888bf215546Sopenharmony_ci 4889bf215546Sopenharmony_ci for (uint32_t i = 0; i < commandBufferCount; i++) { 4890bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]); 4891bf215546Sopenharmony_ci 4892bf215546Sopenharmony_ci if (!cmd_buffer) 4893bf215546Sopenharmony_ci continue; 4894bf215546Sopenharmony_ci assert(cmd_buffer->pool == pool); 4895bf215546Sopenharmony_ci 4896bf215546Sopenharmony_ci list_del(&cmd_buffer->pool_link); 4897bf215546Sopenharmony_ci list_addtail(&cmd_buffer->pool_link, &pool->free_cmd_buffers); 4898bf215546Sopenharmony_ci } 4899bf215546Sopenharmony_ci} 4900bf215546Sopenharmony_ci 4901bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 4902bf215546Sopenharmony_ciradv_ResetCommandBuffer(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags) 4903bf215546Sopenharmony_ci{ 4904bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 4905bf215546Sopenharmony_ci return radv_reset_cmd_buffer(cmd_buffer); 4906bf215546Sopenharmony_ci} 4907bf215546Sopenharmony_ci 4908bf215546Sopenharmony_cistatic void 4909bf215546Sopenharmony_ciradv_inherit_dynamic_rendering(struct radv_cmd_buffer *cmd_buffer, 4910bf215546Sopenharmony_ci const VkCommandBufferInheritanceInfo *inherit_info, 4911bf215546Sopenharmony_ci const VkCommandBufferInheritanceRenderingInfo *dyn_info) 4912bf215546Sopenharmony_ci{ 4913bf215546Sopenharmony_ci const VkAttachmentSampleCountInfoAMD *sample_info = 4914bf215546Sopenharmony_ci vk_find_struct_const(inherit_info->pNext, ATTACHMENT_SAMPLE_COUNT_INFO_AMD); 4915bf215546Sopenharmony_ci VkResult result; 4916bf215546Sopenharmony_ci /* (normal + resolve) for color attachments and ds and a VRS attachment */ 4917bf215546Sopenharmony_ci VkAttachmentDescription2 att_desc[MAX_RTS * 2 + 3]; 4918bf215546Sopenharmony_ci VkAttachmentReference2 color_refs[MAX_RTS], ds_ref; 4919bf215546Sopenharmony_ci unsigned att_count = 0; 4920bf215546Sopenharmony_ci 4921bf215546Sopenharmony_ci VkSubpassDescription2 subpass = { 4922bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2, 4923bf215546Sopenharmony_ci .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 4924bf215546Sopenharmony_ci .viewMask = dyn_info->viewMask, 4925bf215546Sopenharmony_ci .colorAttachmentCount = dyn_info->colorAttachmentCount, 4926bf215546Sopenharmony_ci .pColorAttachments = color_refs, 4927bf215546Sopenharmony_ci }; 4928bf215546Sopenharmony_ci 4929bf215546Sopenharmony_ci for (unsigned i = 0; i < dyn_info->colorAttachmentCount; ++i) { 4930bf215546Sopenharmony_ci if (dyn_info->pColorAttachmentFormats[i] == VK_FORMAT_UNDEFINED) { 4931bf215546Sopenharmony_ci color_refs[i] = (VkAttachmentReference2){ 4932bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 4933bf215546Sopenharmony_ci .attachment = VK_ATTACHMENT_UNUSED, 4934bf215546Sopenharmony_ci }; 4935bf215546Sopenharmony_ci continue; 4936bf215546Sopenharmony_ci } 4937bf215546Sopenharmony_ci 4938bf215546Sopenharmony_ci color_refs[i] = (VkAttachmentReference2){ 4939bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 4940bf215546Sopenharmony_ci .attachment = att_count, 4941bf215546Sopenharmony_ci .layout = VK_IMAGE_LAYOUT_GENERAL, /* Shouldn't be used */ 4942bf215546Sopenharmony_ci .aspectMask = 0, /* Shouldn't be used */ 4943bf215546Sopenharmony_ci }; 4944bf215546Sopenharmony_ci 4945bf215546Sopenharmony_ci VkAttachmentDescription2 *att = att_desc + att_count++; 4946bf215546Sopenharmony_ci memset(att, 0, sizeof(*att)); 4947bf215546Sopenharmony_ci att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; 4948bf215546Sopenharmony_ci att->format = dyn_info->pColorAttachmentFormats[i]; 4949bf215546Sopenharmony_ci att->samples = 4950bf215546Sopenharmony_ci sample_info ? sample_info->pColorAttachmentSamples[i] : dyn_info->rasterizationSamples; 4951bf215546Sopenharmony_ci att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 4952bf215546Sopenharmony_ci att->storeOp = VK_ATTACHMENT_STORE_OP_STORE; 4953bf215546Sopenharmony_ci att->initialLayout = VK_IMAGE_LAYOUT_GENERAL; 4954bf215546Sopenharmony_ci att->finalLayout = VK_IMAGE_LAYOUT_GENERAL; 4955bf215546Sopenharmony_ci } 4956bf215546Sopenharmony_ci 4957bf215546Sopenharmony_ci if (dyn_info->depthAttachmentFormat != VK_FORMAT_UNDEFINED || 4958bf215546Sopenharmony_ci dyn_info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) { 4959bf215546Sopenharmony_ci VkFormat fmt = dyn_info->depthAttachmentFormat != VK_FORMAT_UNDEFINED 4960bf215546Sopenharmony_ci ? dyn_info->depthAttachmentFormat 4961bf215546Sopenharmony_ci : dyn_info->stencilAttachmentFormat; 4962bf215546Sopenharmony_ci 4963bf215546Sopenharmony_ci ds_ref = (VkAttachmentReference2){ 4964bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 4965bf215546Sopenharmony_ci .attachment = att_count, 4966bf215546Sopenharmony_ci .layout = VK_IMAGE_LAYOUT_GENERAL, /* Shouldn't be used */ 4967bf215546Sopenharmony_ci .aspectMask = 0, /* Shouldn't be used */ 4968bf215546Sopenharmony_ci }; 4969bf215546Sopenharmony_ci subpass.pDepthStencilAttachment = &ds_ref; 4970bf215546Sopenharmony_ci 4971bf215546Sopenharmony_ci VkAttachmentDescription2 *att = att_desc + att_count++; 4972bf215546Sopenharmony_ci 4973bf215546Sopenharmony_ci memset(att, 0, sizeof(*att)); 4974bf215546Sopenharmony_ci att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; 4975bf215546Sopenharmony_ci att->format = fmt; 4976bf215546Sopenharmony_ci att->samples = 4977bf215546Sopenharmony_ci sample_info ? sample_info->depthStencilAttachmentSamples : dyn_info->rasterizationSamples; 4978bf215546Sopenharmony_ci att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 4979bf215546Sopenharmony_ci att->storeOp = VK_ATTACHMENT_STORE_OP_STORE; 4980bf215546Sopenharmony_ci att->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 4981bf215546Sopenharmony_ci att->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; 4982bf215546Sopenharmony_ci } 4983bf215546Sopenharmony_ci 4984bf215546Sopenharmony_ci VkRenderPassCreateInfo2 rp_create_info = { 4985bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2, 4986bf215546Sopenharmony_ci .attachmentCount = att_count, 4987bf215546Sopenharmony_ci .pAttachments = att_desc, 4988bf215546Sopenharmony_ci .subpassCount = 1, 4989bf215546Sopenharmony_ci .pSubpasses = &subpass, 4990bf215546Sopenharmony_ci }; 4991bf215546Sopenharmony_ci 4992bf215546Sopenharmony_ci VkRenderPass rp; 4993bf215546Sopenharmony_ci result = 4994bf215546Sopenharmony_ci radv_CreateRenderPass2(radv_device_to_handle(cmd_buffer->device), &rp_create_info, NULL, &rp); 4995bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 4996bf215546Sopenharmony_ci cmd_buffer->record_result = result; 4997bf215546Sopenharmony_ci return; 4998bf215546Sopenharmony_ci } 4999bf215546Sopenharmony_ci 5000bf215546Sopenharmony_ci cmd_buffer->state.pass = radv_render_pass_from_handle(rp); 5001bf215546Sopenharmony_ci cmd_buffer->state.own_render_pass = true; 5002bf215546Sopenharmony_ci} 5003bf215546Sopenharmony_ci 5004bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 5005bf215546Sopenharmony_ciradv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo) 5006bf215546Sopenharmony_ci{ 5007bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5008bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 5009bf215546Sopenharmony_ci 5010bf215546Sopenharmony_ci if (cmd_buffer->status != RADV_CMD_BUFFER_STATUS_INITIAL) { 5011bf215546Sopenharmony_ci /* If the command buffer has already been resetted with 5012bf215546Sopenharmony_ci * vkResetCommandBuffer, no need to do it again. 5013bf215546Sopenharmony_ci */ 5014bf215546Sopenharmony_ci result = radv_reset_cmd_buffer(cmd_buffer); 5015bf215546Sopenharmony_ci if (result != VK_SUCCESS) 5016bf215546Sopenharmony_ci return result; 5017bf215546Sopenharmony_ci } 5018bf215546Sopenharmony_ci 5019bf215546Sopenharmony_ci memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state)); 5020bf215546Sopenharmony_ci cmd_buffer->state.last_primitive_reset_en = -1; 5021bf215546Sopenharmony_ci cmd_buffer->state.last_index_type = -1; 5022bf215546Sopenharmony_ci cmd_buffer->state.last_num_instances = -1; 5023bf215546Sopenharmony_ci cmd_buffer->state.last_vertex_offset = -1; 5024bf215546Sopenharmony_ci cmd_buffer->state.last_first_instance = -1; 5025bf215546Sopenharmony_ci cmd_buffer->state.last_drawid = -1; 5026bf215546Sopenharmony_ci cmd_buffer->state.last_subpass_color_count = MAX_RTS; 5027bf215546Sopenharmony_ci cmd_buffer->state.predication_type = -1; 5028bf215546Sopenharmony_ci cmd_buffer->state.last_sx_ps_downconvert = -1; 5029bf215546Sopenharmony_ci cmd_buffer->state.last_sx_blend_opt_epsilon = -1; 5030bf215546Sopenharmony_ci cmd_buffer->state.last_sx_blend_opt_control = -1; 5031bf215546Sopenharmony_ci cmd_buffer->state.last_nggc_settings = -1; 5032bf215546Sopenharmony_ci cmd_buffer->state.last_nggc_settings_sgpr_idx = -1; 5033bf215546Sopenharmony_ci cmd_buffer->state.mesh_shading = false; 5034bf215546Sopenharmony_ci cmd_buffer->state.last_vrs_rates = -1; 5035bf215546Sopenharmony_ci cmd_buffer->state.last_vrs_rates_sgpr_idx = -1; 5036bf215546Sopenharmony_ci cmd_buffer->usage_flags = pBeginInfo->flags; 5037bf215546Sopenharmony_ci 5038bf215546Sopenharmony_ci if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY && 5039bf215546Sopenharmony_ci (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) { 5040bf215546Sopenharmony_ci struct radv_subpass *subpass = NULL; 5041bf215546Sopenharmony_ci 5042bf215546Sopenharmony_ci assert(pBeginInfo->pInheritanceInfo); 5043bf215546Sopenharmony_ci 5044bf215546Sopenharmony_ci cmd_buffer->state.framebuffer = 5045bf215546Sopenharmony_ci vk_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer); 5046bf215546Sopenharmony_ci 5047bf215546Sopenharmony_ci if (pBeginInfo->pInheritanceInfo->renderPass) { 5048bf215546Sopenharmony_ci cmd_buffer->state.pass = 5049bf215546Sopenharmony_ci radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass); 5050bf215546Sopenharmony_ci assert(pBeginInfo->pInheritanceInfo->subpass < cmd_buffer->state.pass->subpass_count); 5051bf215546Sopenharmony_ci subpass = &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass]; 5052bf215546Sopenharmony_ci } else { 5053bf215546Sopenharmony_ci const VkCommandBufferInheritanceRenderingInfo *dyn_info = 5054bf215546Sopenharmony_ci vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext, 5055bf215546Sopenharmony_ci COMMAND_BUFFER_INHERITANCE_RENDERING_INFO); 5056bf215546Sopenharmony_ci if (dyn_info) { 5057bf215546Sopenharmony_ci radv_inherit_dynamic_rendering(cmd_buffer, pBeginInfo->pInheritanceInfo, dyn_info); 5058bf215546Sopenharmony_ci subpass = &cmd_buffer->state.pass->subpasses[0]; 5059bf215546Sopenharmony_ci } 5060bf215546Sopenharmony_ci } 5061bf215546Sopenharmony_ci 5062bf215546Sopenharmony_ci if (cmd_buffer->state.framebuffer) { 5063bf215546Sopenharmony_ci result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL); 5064bf215546Sopenharmony_ci if (result != VK_SUCCESS) 5065bf215546Sopenharmony_ci return result; 5066bf215546Sopenharmony_ci } 5067bf215546Sopenharmony_ci 5068bf215546Sopenharmony_ci cmd_buffer->state.inherited_pipeline_statistics = 5069bf215546Sopenharmony_ci pBeginInfo->pInheritanceInfo->pipelineStatistics; 5070bf215546Sopenharmony_ci 5071bf215546Sopenharmony_ci if (cmd_buffer->state.pass) { 5072bf215546Sopenharmony_ci cmd_buffer->state.subpass = subpass; 5073bf215546Sopenharmony_ci if (cmd_buffer->state.framebuffer) 5074bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER; 5075bf215546Sopenharmony_ci } 5076bf215546Sopenharmony_ci } 5077bf215546Sopenharmony_ci 5078bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->trace_bo)) 5079bf215546Sopenharmony_ci radv_cmd_buffer_trace_emit(cmd_buffer); 5080bf215546Sopenharmony_ci 5081bf215546Sopenharmony_ci radv_describe_begin_cmd_buffer(cmd_buffer); 5082bf215546Sopenharmony_ci 5083bf215546Sopenharmony_ci cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING; 5084bf215546Sopenharmony_ci 5085bf215546Sopenharmony_ci return result; 5086bf215546Sopenharmony_ci} 5087bf215546Sopenharmony_ci 5088bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5089bf215546Sopenharmony_ciradv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding, 5090bf215546Sopenharmony_ci uint32_t bindingCount, const VkBuffer *pBuffers, 5091bf215546Sopenharmony_ci const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes, 5092bf215546Sopenharmony_ci const VkDeviceSize *pStrides) 5093bf215546Sopenharmony_ci{ 5094bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5095bf215546Sopenharmony_ci struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings; 5096bf215546Sopenharmony_ci const struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input; 5097bf215546Sopenharmony_ci 5098bf215546Sopenharmony_ci /* We have to defer setting up vertex buffer since we need the buffer 5099bf215546Sopenharmony_ci * stride from the pipeline. */ 5100bf215546Sopenharmony_ci 5101bf215546Sopenharmony_ci assert(firstBinding + bindingCount <= MAX_VBS); 5102bf215546Sopenharmony_ci enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level; 5103bf215546Sopenharmony_ci 5104bf215546Sopenharmony_ci if (firstBinding + bindingCount > cmd_buffer->used_vertex_bindings) 5105bf215546Sopenharmony_ci cmd_buffer->used_vertex_bindings = firstBinding + bindingCount; 5106bf215546Sopenharmony_ci 5107bf215546Sopenharmony_ci uint32_t misaligned_mask_invalid = 0; 5108bf215546Sopenharmony_ci 5109bf215546Sopenharmony_ci for (uint32_t i = 0; i < bindingCount; i++) { 5110bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]); 5111bf215546Sopenharmony_ci uint32_t idx = firstBinding + i; 5112bf215546Sopenharmony_ci VkDeviceSize size = pSizes ? pSizes[i] : 0; 5113bf215546Sopenharmony_ci /* if pStrides=NULL, it shouldn't overwrite the strides specified by CmdSetVertexInputEXT */ 5114bf215546Sopenharmony_ci VkDeviceSize stride = pStrides ? pStrides[i] : vb[idx].stride; 5115bf215546Sopenharmony_ci 5116bf215546Sopenharmony_ci if (!!cmd_buffer->vertex_binding_buffers[idx] != !!buffer || 5117bf215546Sopenharmony_ci (buffer && ((vb[idx].offset & 0x3) != (pOffsets[i] & 0x3) || 5118bf215546Sopenharmony_ci (vb[idx].stride & 0x3) != (stride & 0x3)))) { 5119bf215546Sopenharmony_ci misaligned_mask_invalid |= state->bindings_match_attrib ? BITFIELD_BIT(idx) : 0xffffffff; 5120bf215546Sopenharmony_ci } 5121bf215546Sopenharmony_ci 5122bf215546Sopenharmony_ci cmd_buffer->vertex_binding_buffers[idx] = buffer; 5123bf215546Sopenharmony_ci vb[idx].offset = pOffsets[i]; 5124bf215546Sopenharmony_ci vb[idx].size = size; 5125bf215546Sopenharmony_ci vb[idx].stride = stride; 5126bf215546Sopenharmony_ci 5127bf215546Sopenharmony_ci uint32_t bit = BITFIELD_BIT(idx); 5128bf215546Sopenharmony_ci if (buffer) { 5129bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->vertex_binding_buffers[idx]->bo); 5130bf215546Sopenharmony_ci cmd_buffer->state.vbo_bound_mask |= bit; 5131bf215546Sopenharmony_ci } else { 5132bf215546Sopenharmony_ci cmd_buffer->state.vbo_bound_mask &= ~bit; 5133bf215546Sopenharmony_ci } 5134bf215546Sopenharmony_ci } 5135bf215546Sopenharmony_ci 5136bf215546Sopenharmony_ci if ((chip == GFX6 || chip >= GFX10) && misaligned_mask_invalid) { 5137bf215546Sopenharmony_ci cmd_buffer->state.vbo_misaligned_mask_invalid = misaligned_mask_invalid; 5138bf215546Sopenharmony_ci cmd_buffer->state.vbo_misaligned_mask &= ~misaligned_mask_invalid; 5139bf215546Sopenharmony_ci } 5140bf215546Sopenharmony_ci 5141bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER | 5142bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT; 5143bf215546Sopenharmony_ci} 5144bf215546Sopenharmony_ci 5145bf215546Sopenharmony_cistatic uint32_t 5146bf215546Sopenharmony_civk_to_index_type(VkIndexType type) 5147bf215546Sopenharmony_ci{ 5148bf215546Sopenharmony_ci switch (type) { 5149bf215546Sopenharmony_ci case VK_INDEX_TYPE_UINT8_EXT: 5150bf215546Sopenharmony_ci return V_028A7C_VGT_INDEX_8; 5151bf215546Sopenharmony_ci case VK_INDEX_TYPE_UINT16: 5152bf215546Sopenharmony_ci return V_028A7C_VGT_INDEX_16; 5153bf215546Sopenharmony_ci case VK_INDEX_TYPE_UINT32: 5154bf215546Sopenharmony_ci return V_028A7C_VGT_INDEX_32; 5155bf215546Sopenharmony_ci default: 5156bf215546Sopenharmony_ci unreachable("invalid index type"); 5157bf215546Sopenharmony_ci } 5158bf215546Sopenharmony_ci} 5159bf215546Sopenharmony_ci 5160bf215546Sopenharmony_ciuint32_t 5161bf215546Sopenharmony_ciradv_get_vgt_index_size(uint32_t type) 5162bf215546Sopenharmony_ci{ 5163bf215546Sopenharmony_ci uint32_t index_type = G_028A7C_INDEX_TYPE(type); 5164bf215546Sopenharmony_ci switch (index_type) { 5165bf215546Sopenharmony_ci case V_028A7C_VGT_INDEX_8: 5166bf215546Sopenharmony_ci return 1; 5167bf215546Sopenharmony_ci case V_028A7C_VGT_INDEX_16: 5168bf215546Sopenharmony_ci return 2; 5169bf215546Sopenharmony_ci case V_028A7C_VGT_INDEX_32: 5170bf215546Sopenharmony_ci return 4; 5171bf215546Sopenharmony_ci default: 5172bf215546Sopenharmony_ci unreachable("invalid index type"); 5173bf215546Sopenharmony_ci } 5174bf215546Sopenharmony_ci} 5175bf215546Sopenharmony_ci 5176bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5177bf215546Sopenharmony_ciradv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset, 5178bf215546Sopenharmony_ci VkIndexType indexType) 5179bf215546Sopenharmony_ci{ 5180bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5181bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer); 5182bf215546Sopenharmony_ci 5183bf215546Sopenharmony_ci cmd_buffer->state.index_buffer = index_buffer; 5184bf215546Sopenharmony_ci cmd_buffer->state.index_offset = offset; 5185bf215546Sopenharmony_ci cmd_buffer->state.index_type = vk_to_index_type(indexType); 5186bf215546Sopenharmony_ci cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo); 5187bf215546Sopenharmony_ci cmd_buffer->state.index_va += index_buffer->offset + offset; 5188bf215546Sopenharmony_ci 5189bf215546Sopenharmony_ci int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType)); 5190bf215546Sopenharmony_ci cmd_buffer->state.max_index_count = 5191bf215546Sopenharmony_ci (vk_buffer_range(&index_buffer->vk, offset, VK_WHOLE_SIZE)) / index_size; 5192bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; 5193bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo); 5194bf215546Sopenharmony_ci} 5195bf215546Sopenharmony_ci 5196bf215546Sopenharmony_cistatic void 5197bf215546Sopenharmony_ciradv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point, 5198bf215546Sopenharmony_ci struct radv_descriptor_set *set, unsigned idx) 5199bf215546Sopenharmony_ci{ 5200bf215546Sopenharmony_ci struct radeon_winsys *ws = cmd_buffer->device->ws; 5201bf215546Sopenharmony_ci 5202bf215546Sopenharmony_ci radv_set_descriptor_set(cmd_buffer, bind_point, set, idx); 5203bf215546Sopenharmony_ci 5204bf215546Sopenharmony_ci assert(set); 5205bf215546Sopenharmony_ci assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR)); 5206bf215546Sopenharmony_ci 5207bf215546Sopenharmony_ci if (!cmd_buffer->device->use_global_bo_list) { 5208bf215546Sopenharmony_ci for (unsigned j = 0; j < set->header.buffer_count; ++j) 5209bf215546Sopenharmony_ci if (set->descriptors[j]) 5210bf215546Sopenharmony_ci radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j]); 5211bf215546Sopenharmony_ci } 5212bf215546Sopenharmony_ci 5213bf215546Sopenharmony_ci if (set->header.bo) 5214bf215546Sopenharmony_ci radv_cs_add_buffer(ws, cmd_buffer->cs, set->header.bo); 5215bf215546Sopenharmony_ci} 5216bf215546Sopenharmony_ci 5217bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5218bf215546Sopenharmony_ciradv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, 5219bf215546Sopenharmony_ci VkPipelineLayout _layout, uint32_t firstSet, uint32_t descriptorSetCount, 5220bf215546Sopenharmony_ci const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount, 5221bf215546Sopenharmony_ci const uint32_t *pDynamicOffsets) 5222bf215546Sopenharmony_ci{ 5223bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5224bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 5225bf215546Sopenharmony_ci unsigned dyn_idx = 0; 5226bf215546Sopenharmony_ci 5227bf215546Sopenharmony_ci const bool no_dynamic_bounds = 5228bf215546Sopenharmony_ci cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS; 5229bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 5230bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); 5231bf215546Sopenharmony_ci 5232bf215546Sopenharmony_ci for (unsigned i = 0; i < descriptorSetCount; ++i) { 5233bf215546Sopenharmony_ci unsigned set_idx = i + firstSet; 5234bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]); 5235bf215546Sopenharmony_ci 5236bf215546Sopenharmony_ci if (!set) { 5237bf215546Sopenharmony_ci /* From the Vulkan spec 1.3.211: 5238bf215546Sopenharmony_ci * 5239bf215546Sopenharmony_ci * "VUID-vkCmdBindDescriptorSets-layout-06564 5240bf215546Sopenharmony_ci * If layout was not created with VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT, each 5241bf215546Sopenharmony_ci * element of pDescriptorSets must be a valid VkDescriptorSet" 5242bf215546Sopenharmony_ci */ 5243bf215546Sopenharmony_ci assert(layout->independent_sets); 5244bf215546Sopenharmony_ci continue; 5245bf215546Sopenharmony_ci } 5246bf215546Sopenharmony_ci 5247bf215546Sopenharmony_ci /* If the set is already bound we only need to update the 5248bf215546Sopenharmony_ci * (potentially changed) dynamic offsets. */ 5249bf215546Sopenharmony_ci if (descriptors_state->sets[set_idx] != set || 5250bf215546Sopenharmony_ci !(descriptors_state->valid & (1u << set_idx))) { 5251bf215546Sopenharmony_ci radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx); 5252bf215546Sopenharmony_ci } 5253bf215546Sopenharmony_ci 5254bf215546Sopenharmony_ci for (unsigned j = 0; j < set->header.layout->dynamic_offset_count; ++j, ++dyn_idx) { 5255bf215546Sopenharmony_ci unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start; 5256bf215546Sopenharmony_ci uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4; 5257bf215546Sopenharmony_ci assert(dyn_idx < dynamicOffsetCount); 5258bf215546Sopenharmony_ci 5259bf215546Sopenharmony_ci struct radv_descriptor_range *range = set->header.dynamic_descriptors + j; 5260bf215546Sopenharmony_ci 5261bf215546Sopenharmony_ci if (!range->va) { 5262bf215546Sopenharmony_ci memset(dst, 0, 4 * 4); 5263bf215546Sopenharmony_ci } else { 5264bf215546Sopenharmony_ci uint64_t va = range->va + pDynamicOffsets[dyn_idx]; 5265bf215546Sopenharmony_ci dst[0] = va; 5266bf215546Sopenharmony_ci dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32); 5267bf215546Sopenharmony_ci dst[2] = no_dynamic_bounds ? 0xffffffffu : range->size; 5268bf215546Sopenharmony_ci dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 5269bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W); 5270bf215546Sopenharmony_ci 5271bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) { 5272bf215546Sopenharmony_ci dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) | 5273bf215546Sopenharmony_ci S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW); 5274bf215546Sopenharmony_ci } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) { 5275bf215546Sopenharmony_ci dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) | 5276bf215546Sopenharmony_ci S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1); 5277bf215546Sopenharmony_ci } else { 5278bf215546Sopenharmony_ci dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) | 5279bf215546Sopenharmony_ci S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 5280bf215546Sopenharmony_ci } 5281bf215546Sopenharmony_ci } 5282bf215546Sopenharmony_ci 5283bf215546Sopenharmony_ci cmd_buffer->push_constant_stages |= set->header.layout->dynamic_shader_stages; 5284bf215546Sopenharmony_ci } 5285bf215546Sopenharmony_ci } 5286bf215546Sopenharmony_ci} 5287bf215546Sopenharmony_ci 5288bf215546Sopenharmony_cistatic bool 5289bf215546Sopenharmony_ciradv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set, 5290bf215546Sopenharmony_ci struct radv_descriptor_set_layout *layout, 5291bf215546Sopenharmony_ci VkPipelineBindPoint bind_point) 5292bf215546Sopenharmony_ci{ 5293bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 5294bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, bind_point); 5295bf215546Sopenharmony_ci set->header.size = layout->size; 5296bf215546Sopenharmony_ci 5297bf215546Sopenharmony_ci if (set->header.layout != layout) { 5298bf215546Sopenharmony_ci if (set->header.layout) 5299bf215546Sopenharmony_ci vk_descriptor_set_layout_unref(&cmd_buffer->device->vk, &set->header.layout->vk); 5300bf215546Sopenharmony_ci vk_descriptor_set_layout_ref(&layout->vk); 5301bf215546Sopenharmony_ci set->header.layout = layout; 5302bf215546Sopenharmony_ci } 5303bf215546Sopenharmony_ci 5304bf215546Sopenharmony_ci if (descriptors_state->push_set.capacity < set->header.size) { 5305bf215546Sopenharmony_ci size_t new_size = MAX2(set->header.size, 1024); 5306bf215546Sopenharmony_ci new_size = MAX2(new_size, 2 * descriptors_state->push_set.capacity); 5307bf215546Sopenharmony_ci new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS); 5308bf215546Sopenharmony_ci 5309bf215546Sopenharmony_ci free(set->header.mapped_ptr); 5310bf215546Sopenharmony_ci set->header.mapped_ptr = malloc(new_size); 5311bf215546Sopenharmony_ci 5312bf215546Sopenharmony_ci if (!set->header.mapped_ptr) { 5313bf215546Sopenharmony_ci descriptors_state->push_set.capacity = 0; 5314bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 5315bf215546Sopenharmony_ci return false; 5316bf215546Sopenharmony_ci } 5317bf215546Sopenharmony_ci 5318bf215546Sopenharmony_ci descriptors_state->push_set.capacity = new_size; 5319bf215546Sopenharmony_ci } 5320bf215546Sopenharmony_ci 5321bf215546Sopenharmony_ci return true; 5322bf215546Sopenharmony_ci} 5323bf215546Sopenharmony_ci 5324bf215546Sopenharmony_civoid 5325bf215546Sopenharmony_ciradv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, 5326bf215546Sopenharmony_ci VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout, 5327bf215546Sopenharmony_ci uint32_t set, uint32_t descriptorWriteCount, 5328bf215546Sopenharmony_ci const VkWriteDescriptorSet *pDescriptorWrites) 5329bf215546Sopenharmony_ci{ 5330bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 5331bf215546Sopenharmony_ci struct radv_descriptor_set *push_set = 5332bf215546Sopenharmony_ci (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors; 5333bf215546Sopenharmony_ci unsigned bo_offset; 5334bf215546Sopenharmony_ci 5335bf215546Sopenharmony_ci assert(set == 0); 5336bf215546Sopenharmony_ci assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); 5337bf215546Sopenharmony_ci 5338bf215546Sopenharmony_ci push_set->header.size = layout->set[set].layout->size; 5339bf215546Sopenharmony_ci push_set->header.layout = layout->set[set].layout; 5340bf215546Sopenharmony_ci 5341bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size, &bo_offset, 5342bf215546Sopenharmony_ci (void **)&push_set->header.mapped_ptr)) 5343bf215546Sopenharmony_ci return; 5344bf215546Sopenharmony_ci 5345bf215546Sopenharmony_ci push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo); 5346bf215546Sopenharmony_ci push_set->header.va += bo_offset; 5347bf215546Sopenharmony_ci 5348bf215546Sopenharmony_ci radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer, 5349bf215546Sopenharmony_ci radv_descriptor_set_to_handle(push_set), descriptorWriteCount, 5350bf215546Sopenharmony_ci pDescriptorWrites, 0, NULL); 5351bf215546Sopenharmony_ci 5352bf215546Sopenharmony_ci radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set); 5353bf215546Sopenharmony_ci} 5354bf215546Sopenharmony_ci 5355bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5356bf215546Sopenharmony_ciradv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, 5357bf215546Sopenharmony_ci VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount, 5358bf215546Sopenharmony_ci const VkWriteDescriptorSet *pDescriptorWrites) 5359bf215546Sopenharmony_ci{ 5360bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5361bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 5362bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 5363bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, pipelineBindPoint); 5364bf215546Sopenharmony_ci struct radv_descriptor_set *push_set = 5365bf215546Sopenharmony_ci (struct radv_descriptor_set *)&descriptors_state->push_set.set; 5366bf215546Sopenharmony_ci 5367bf215546Sopenharmony_ci assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); 5368bf215546Sopenharmony_ci 5369bf215546Sopenharmony_ci if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout, 5370bf215546Sopenharmony_ci pipelineBindPoint)) 5371bf215546Sopenharmony_ci return; 5372bf215546Sopenharmony_ci 5373bf215546Sopenharmony_ci /* Check that there are no inline uniform block updates when calling vkCmdPushDescriptorSetKHR() 5374bf215546Sopenharmony_ci * because it is invalid, according to Vulkan spec. 5375bf215546Sopenharmony_ci */ 5376bf215546Sopenharmony_ci for (int i = 0; i < descriptorWriteCount; i++) { 5377bf215546Sopenharmony_ci ASSERTED const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i]; 5378bf215546Sopenharmony_ci assert(writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK); 5379bf215546Sopenharmony_ci } 5380bf215546Sopenharmony_ci 5381bf215546Sopenharmony_ci radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer, 5382bf215546Sopenharmony_ci radv_descriptor_set_to_handle(push_set), descriptorWriteCount, 5383bf215546Sopenharmony_ci pDescriptorWrites, 0, NULL); 5384bf215546Sopenharmony_ci 5385bf215546Sopenharmony_ci radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set); 5386bf215546Sopenharmony_ci descriptors_state->push_dirty = true; 5387bf215546Sopenharmony_ci} 5388bf215546Sopenharmony_ci 5389bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5390bf215546Sopenharmony_ciradv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer, 5391bf215546Sopenharmony_ci VkDescriptorUpdateTemplate descriptorUpdateTemplate, 5392bf215546Sopenharmony_ci VkPipelineLayout _layout, uint32_t set, const void *pData) 5393bf215546Sopenharmony_ci{ 5394bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5395bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout); 5396bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate); 5397bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 5398bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, templ->bind_point); 5399bf215546Sopenharmony_ci struct radv_descriptor_set *push_set = 5400bf215546Sopenharmony_ci (struct radv_descriptor_set *)&descriptors_state->push_set.set; 5401bf215546Sopenharmony_ci 5402bf215546Sopenharmony_ci assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR); 5403bf215546Sopenharmony_ci 5404bf215546Sopenharmony_ci if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout, 5405bf215546Sopenharmony_ci templ->bind_point)) 5406bf215546Sopenharmony_ci return; 5407bf215546Sopenharmony_ci 5408bf215546Sopenharmony_ci radv_cmd_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set, 5409bf215546Sopenharmony_ci descriptorUpdateTemplate, pData); 5410bf215546Sopenharmony_ci 5411bf215546Sopenharmony_ci radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set); 5412bf215546Sopenharmony_ci descriptors_state->push_dirty = true; 5413bf215546Sopenharmony_ci} 5414bf215546Sopenharmony_ci 5415bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5416bf215546Sopenharmony_ciradv_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout, 5417bf215546Sopenharmony_ci VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size, 5418bf215546Sopenharmony_ci const void *pValues) 5419bf215546Sopenharmony_ci{ 5420bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5421bf215546Sopenharmony_ci memcpy(cmd_buffer->push_constants + offset, pValues, size); 5422bf215546Sopenharmony_ci cmd_buffer->push_constant_stages |= stageFlags; 5423bf215546Sopenharmony_ci} 5424bf215546Sopenharmony_ci 5425bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 5426bf215546Sopenharmony_ciradv_EndCommandBuffer(VkCommandBuffer commandBuffer) 5427bf215546Sopenharmony_ci{ 5428bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5429bf215546Sopenharmony_ci 5430bf215546Sopenharmony_ci radv_emit_mip_change_flush_default(cmd_buffer); 5431bf215546Sopenharmony_ci 5432bf215546Sopenharmony_ci if (cmd_buffer->qf != RADV_QUEUE_TRANSFER) { 5433bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX6) 5434bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= 5435bf215546Sopenharmony_ci RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2; 5436bf215546Sopenharmony_ci 5437bf215546Sopenharmony_ci /* Make sure to sync all pending active queries at the end of 5438bf215546Sopenharmony_ci * command buffer. 5439bf215546Sopenharmony_ci */ 5440bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits; 5441bf215546Sopenharmony_ci 5442bf215546Sopenharmony_ci /* Flush noncoherent images on GFX9+ so we can assume they're clean on the start of a 5443bf215546Sopenharmony_ci * command buffer. 5444bf215546Sopenharmony_ci */ 5445bf215546Sopenharmony_ci if (cmd_buffer->state.rb_noncoherent_dirty && can_skip_buffer_l2_flushes(cmd_buffer->device)) 5446bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= radv_src_access_flush( 5447bf215546Sopenharmony_ci cmd_buffer, 5448bf215546Sopenharmony_ci VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT | 5449bf215546Sopenharmony_ci VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 5450bf215546Sopenharmony_ci NULL); 5451bf215546Sopenharmony_ci 5452bf215546Sopenharmony_ci /* Since NGG streamout uses GDS, we need to make GDS idle when 5453bf215546Sopenharmony_ci * we leave the IB, otherwise another process might overwrite 5454bf215546Sopenharmony_ci * it while our shaders are busy. 5455bf215546Sopenharmony_ci */ 5456bf215546Sopenharmony_ci if (cmd_buffer->gds_needed) 5457bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH; 5458bf215546Sopenharmony_ci 5459bf215546Sopenharmony_ci /* Finalize the internal compute command stream, if it exists. */ 5460bf215546Sopenharmony_ci if (cmd_buffer->ace_internal.cs) { 5461bf215546Sopenharmony_ci VkResult result = radv_ace_internal_finalize(cmd_buffer); 5462bf215546Sopenharmony_ci if (result != VK_SUCCESS) 5463bf215546Sopenharmony_ci return vk_error(cmd_buffer, result); 5464bf215546Sopenharmony_ci } 5465bf215546Sopenharmony_ci 5466bf215546Sopenharmony_ci si_emit_cache_flush(cmd_buffer); 5467bf215546Sopenharmony_ci } 5468bf215546Sopenharmony_ci 5469bf215546Sopenharmony_ci /* Make sure CP DMA is idle at the end of IBs because the kernel 5470bf215546Sopenharmony_ci * doesn't wait for it. 5471bf215546Sopenharmony_ci */ 5472bf215546Sopenharmony_ci si_cp_dma_wait_for_idle(cmd_buffer); 5473bf215546Sopenharmony_ci 5474bf215546Sopenharmony_ci radv_describe_end_cmd_buffer(cmd_buffer); 5475bf215546Sopenharmony_ci 5476bf215546Sopenharmony_ci vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachments); 5477bf215546Sopenharmony_ci vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.subpass_sample_locs); 5478bf215546Sopenharmony_ci 5479bf215546Sopenharmony_ci VkResult result = cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs); 5480bf215546Sopenharmony_ci if (result != VK_SUCCESS) 5481bf215546Sopenharmony_ci return vk_error(cmd_buffer, result); 5482bf215546Sopenharmony_ci 5483bf215546Sopenharmony_ci cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE; 5484bf215546Sopenharmony_ci 5485bf215546Sopenharmony_ci return cmd_buffer->record_result; 5486bf215546Sopenharmony_ci} 5487bf215546Sopenharmony_ci 5488bf215546Sopenharmony_cistatic void 5489bf215546Sopenharmony_ciradv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer, 5490bf215546Sopenharmony_ci struct radv_compute_pipeline *pipeline) 5491bf215546Sopenharmony_ci{ 5492bf215546Sopenharmony_ci if (pipeline == cmd_buffer->state.emitted_compute_pipeline) 5493bf215546Sopenharmony_ci return; 5494bf215546Sopenharmony_ci 5495bf215546Sopenharmony_ci assert(!pipeline->base.ctx_cs.cdw); 5496bf215546Sopenharmony_ci 5497bf215546Sopenharmony_ci cmd_buffer->state.emitted_compute_pipeline = pipeline; 5498bf215546Sopenharmony_ci 5499bf215546Sopenharmony_ci radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.cs.cdw); 5500bf215546Sopenharmony_ci radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw); 5501bf215546Sopenharmony_ci 5502bf215546Sopenharmony_ci cmd_buffer->compute_scratch_size_per_wave_needed = 5503bf215546Sopenharmony_ci MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, pipeline->base.scratch_bytes_per_wave); 5504bf215546Sopenharmony_ci cmd_buffer->compute_scratch_waves_wanted = 5505bf215546Sopenharmony_ci MAX2(cmd_buffer->compute_scratch_waves_wanted, pipeline->base.max_waves); 5506bf215546Sopenharmony_ci 5507bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.slab_bo); 5508bf215546Sopenharmony_ci 5509bf215546Sopenharmony_ci if (unlikely(cmd_buffer->device->trace_bo)) 5510bf215546Sopenharmony_ci radv_save_pipeline(cmd_buffer, &pipeline->base); 5511bf215546Sopenharmony_ci} 5512bf215546Sopenharmony_ci 5513bf215546Sopenharmony_cistatic void 5514bf215546Sopenharmony_ciradv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point) 5515bf215546Sopenharmony_ci{ 5516bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 5517bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, bind_point); 5518bf215546Sopenharmony_ci 5519bf215546Sopenharmony_ci descriptors_state->dirty |= descriptors_state->valid; 5520bf215546Sopenharmony_ci} 5521bf215546Sopenharmony_ci 5522bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5523bf215546Sopenharmony_ciradv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint, 5524bf215546Sopenharmony_ci VkPipeline _pipeline) 5525bf215546Sopenharmony_ci{ 5526bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5527bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); 5528bf215546Sopenharmony_ci 5529bf215546Sopenharmony_ci switch (pipelineBindPoint) { 5530bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_COMPUTE: { 5531bf215546Sopenharmony_ci struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline); 5532bf215546Sopenharmony_ci 5533bf215546Sopenharmony_ci if (cmd_buffer->state.compute_pipeline == compute_pipeline) 5534bf215546Sopenharmony_ci return; 5535bf215546Sopenharmony_ci radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); 5536bf215546Sopenharmony_ci 5537bf215546Sopenharmony_ci cmd_buffer->state.compute_pipeline = compute_pipeline; 5538bf215546Sopenharmony_ci cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT; 5539bf215546Sopenharmony_ci cmd_buffer->task_rings_needed |= 5540bf215546Sopenharmony_ci pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.uses_task_rings; 5541bf215546Sopenharmony_ci break; 5542bf215546Sopenharmony_ci } 5543bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: { 5544bf215546Sopenharmony_ci struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline); 5545bf215546Sopenharmony_ci 5546bf215546Sopenharmony_ci if (cmd_buffer->state.rt_pipeline == compute_pipeline) 5547bf215546Sopenharmony_ci return; 5548bf215546Sopenharmony_ci radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); 5549bf215546Sopenharmony_ci 5550bf215546Sopenharmony_ci cmd_buffer->state.rt_pipeline = compute_pipeline; 5551bf215546Sopenharmony_ci cmd_buffer->push_constant_stages |= RADV_RT_STAGE_BITS; 5552bf215546Sopenharmony_ci if (compute_pipeline->dynamic_stack_size) 5553bf215546Sopenharmony_ci radv_set_rt_stack_size(cmd_buffer, cmd_buffer->state.rt_stack_size); 5554bf215546Sopenharmony_ci break; 5555bf215546Sopenharmony_ci } 5556bf215546Sopenharmony_ci case VK_PIPELINE_BIND_POINT_GRAPHICS: { 5557bf215546Sopenharmony_ci struct radv_graphics_pipeline *graphics_pipeline = 5558bf215546Sopenharmony_ci pipeline ? radv_pipeline_to_graphics(pipeline) : NULL; 5559bf215546Sopenharmony_ci 5560bf215546Sopenharmony_ci if (cmd_buffer->state.graphics_pipeline == graphics_pipeline) 5561bf215546Sopenharmony_ci return; 5562bf215546Sopenharmony_ci radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint); 5563bf215546Sopenharmony_ci 5564bf215546Sopenharmony_ci bool vtx_emit_count_changed = 5565bf215546Sopenharmony_ci !pipeline || !cmd_buffer->state.graphics_pipeline || 5566bf215546Sopenharmony_ci cmd_buffer->state.graphics_pipeline->vtx_emit_num != graphics_pipeline->vtx_emit_num || 5567bf215546Sopenharmony_ci cmd_buffer->state.graphics_pipeline->vtx_base_sgpr != graphics_pipeline->vtx_base_sgpr; 5568bf215546Sopenharmony_ci cmd_buffer->state.graphics_pipeline = graphics_pipeline; 5569bf215546Sopenharmony_ci if (!pipeline) 5570bf215546Sopenharmony_ci break; 5571bf215546Sopenharmony_ci 5572bf215546Sopenharmony_ci bool mesh_shading = radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_MESH); 5573bf215546Sopenharmony_ci if (mesh_shading != cmd_buffer->state.mesh_shading) { 5574bf215546Sopenharmony_ci /* Re-emit VRS state because the combiner is different (vertex vs primitive). 5575bf215546Sopenharmony_ci * Re-emit primitive topology because the mesh shading pipeline clobbered it. 5576bf215546Sopenharmony_ci */ 5577bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE | 5578bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; 5579bf215546Sopenharmony_ci } 5580bf215546Sopenharmony_ci 5581bf215546Sopenharmony_ci cmd_buffer->state.mesh_shading = mesh_shading; 5582bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT; 5583bf215546Sopenharmony_ci cmd_buffer->push_constant_stages |= graphics_pipeline->active_stages; 5584bf215546Sopenharmony_ci 5585bf215546Sopenharmony_ci /* the new vertex shader might not have the same user regs */ 5586bf215546Sopenharmony_ci if (vtx_emit_count_changed) { 5587bf215546Sopenharmony_ci cmd_buffer->state.last_first_instance = -1; 5588bf215546Sopenharmony_ci cmd_buffer->state.last_vertex_offset = -1; 5589bf215546Sopenharmony_ci cmd_buffer->state.last_drawid = -1; 5590bf215546Sopenharmony_ci } 5591bf215546Sopenharmony_ci 5592bf215546Sopenharmony_ci /* Prefetch all pipeline shaders at first draw time. */ 5593bf215546Sopenharmony_ci cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS; 5594bf215546Sopenharmony_ci 5595bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.has_vgt_flush_ngg_legacy_bug && 5596bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline && 5597bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline->is_ngg && 5598bf215546Sopenharmony_ci !cmd_buffer->state.graphics_pipeline->is_ngg) { 5599bf215546Sopenharmony_ci /* Transitioning from NGG to legacy GS requires 5600bf215546Sopenharmony_ci * VGT_FLUSH on GFX10 and Navi21. VGT_FLUSH 5601bf215546Sopenharmony_ci * is also emitted at the beginning of IBs when legacy 5602bf215546Sopenharmony_ci * GS ring pointers are set. 5603bf215546Sopenharmony_ci */ 5604bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH; 5605bf215546Sopenharmony_ci } 5606bf215546Sopenharmony_ci 5607bf215546Sopenharmony_ci radv_bind_dynamic_state(cmd_buffer, &graphics_pipeline->dynamic_state); 5608bf215546Sopenharmony_ci 5609bf215546Sopenharmony_ci if (graphics_pipeline->esgs_ring_size > cmd_buffer->esgs_ring_size_needed) 5610bf215546Sopenharmony_ci cmd_buffer->esgs_ring_size_needed = graphics_pipeline->esgs_ring_size; 5611bf215546Sopenharmony_ci if (graphics_pipeline->gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed) 5612bf215546Sopenharmony_ci cmd_buffer->gsvs_ring_size_needed = graphics_pipeline->gsvs_ring_size; 5613bf215546Sopenharmony_ci 5614bf215546Sopenharmony_ci if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TESS_CTRL)) 5615bf215546Sopenharmony_ci cmd_buffer->tess_rings_needed = true; 5616bf215546Sopenharmony_ci if (mesh_shading) 5617bf215546Sopenharmony_ci cmd_buffer->mesh_scratch_ring_needed |= 5618bf215546Sopenharmony_ci pipeline->shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring; 5619bf215546Sopenharmony_ci 5620bf215546Sopenharmony_ci if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TASK)) { 5621bf215546Sopenharmony_ci if (!cmd_buffer->ace_internal.cs) { 5622bf215546Sopenharmony_ci cmd_buffer->ace_internal.cs = radv_ace_internal_create(cmd_buffer); 5623bf215546Sopenharmony_ci if (!cmd_buffer->ace_internal.cs) 5624bf215546Sopenharmony_ci return; 5625bf215546Sopenharmony_ci } 5626bf215546Sopenharmony_ci 5627bf215546Sopenharmony_ci cmd_buffer->task_rings_needed = true; 5628bf215546Sopenharmony_ci } 5629bf215546Sopenharmony_ci break; 5630bf215546Sopenharmony_ci } 5631bf215546Sopenharmony_ci default: 5632bf215546Sopenharmony_ci assert(!"invalid bind point"); 5633bf215546Sopenharmony_ci break; 5634bf215546Sopenharmony_ci } 5635bf215546Sopenharmony_ci} 5636bf215546Sopenharmony_ci 5637bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5638bf215546Sopenharmony_ciradv_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount, 5639bf215546Sopenharmony_ci const VkViewport *pViewports) 5640bf215546Sopenharmony_ci{ 5641bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5642bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5643bf215546Sopenharmony_ci ASSERTED const uint32_t total_count = firstViewport + viewportCount; 5644bf215546Sopenharmony_ci 5645bf215546Sopenharmony_ci assert(firstViewport < MAX_VIEWPORTS); 5646bf215546Sopenharmony_ci assert(total_count >= 1 && total_count <= MAX_VIEWPORTS); 5647bf215546Sopenharmony_ci 5648bf215546Sopenharmony_ci if (state->dynamic.viewport.count < total_count) 5649bf215546Sopenharmony_ci state->dynamic.viewport.count = total_count; 5650bf215546Sopenharmony_ci 5651bf215546Sopenharmony_ci memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports, 5652bf215546Sopenharmony_ci viewportCount * sizeof(*pViewports)); 5653bf215546Sopenharmony_ci for (unsigned i = 0; i < viewportCount; i++) { 5654bf215546Sopenharmony_ci radv_get_viewport_xform(&pViewports[i], 5655bf215546Sopenharmony_ci state->dynamic.viewport.xform[i + firstViewport].scale, 5656bf215546Sopenharmony_ci state->dynamic.viewport.xform[i + firstViewport].translate); 5657bf215546Sopenharmony_ci } 5658bf215546Sopenharmony_ci 5659bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT; 5660bf215546Sopenharmony_ci} 5661bf215546Sopenharmony_ci 5662bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5663bf215546Sopenharmony_ciradv_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount, 5664bf215546Sopenharmony_ci const VkRect2D *pScissors) 5665bf215546Sopenharmony_ci{ 5666bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5667bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5668bf215546Sopenharmony_ci ASSERTED const uint32_t total_count = firstScissor + scissorCount; 5669bf215546Sopenharmony_ci 5670bf215546Sopenharmony_ci assert(firstScissor < MAX_SCISSORS); 5671bf215546Sopenharmony_ci assert(total_count >= 1 && total_count <= MAX_SCISSORS); 5672bf215546Sopenharmony_ci 5673bf215546Sopenharmony_ci if (state->dynamic.scissor.count < total_count) 5674bf215546Sopenharmony_ci state->dynamic.scissor.count = total_count; 5675bf215546Sopenharmony_ci 5676bf215546Sopenharmony_ci memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors, 5677bf215546Sopenharmony_ci scissorCount * sizeof(*pScissors)); 5678bf215546Sopenharmony_ci 5679bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; 5680bf215546Sopenharmony_ci} 5681bf215546Sopenharmony_ci 5682bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5683bf215546Sopenharmony_ciradv_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth) 5684bf215546Sopenharmony_ci{ 5685bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5686bf215546Sopenharmony_ci 5687bf215546Sopenharmony_ci if (cmd_buffer->state.dynamic.line_width != lineWidth) 5688bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; 5689bf215546Sopenharmony_ci 5690bf215546Sopenharmony_ci cmd_buffer->state.dynamic.line_width = lineWidth; 5691bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH; 5692bf215546Sopenharmony_ci} 5693bf215546Sopenharmony_ci 5694bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5695bf215546Sopenharmony_ciradv_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor, 5696bf215546Sopenharmony_ci float depthBiasClamp, float depthBiasSlopeFactor) 5697bf215546Sopenharmony_ci{ 5698bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5699bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5700bf215546Sopenharmony_ci 5701bf215546Sopenharmony_ci state->dynamic.depth_bias.bias = depthBiasConstantFactor; 5702bf215546Sopenharmony_ci state->dynamic.depth_bias.clamp = depthBiasClamp; 5703bf215546Sopenharmony_ci state->dynamic.depth_bias.slope = depthBiasSlopeFactor; 5704bf215546Sopenharmony_ci 5705bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS; 5706bf215546Sopenharmony_ci} 5707bf215546Sopenharmony_ci 5708bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5709bf215546Sopenharmony_ciradv_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4]) 5710bf215546Sopenharmony_ci{ 5711bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5712bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5713bf215546Sopenharmony_ci 5714bf215546Sopenharmony_ci memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4); 5715bf215546Sopenharmony_ci 5716bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS; 5717bf215546Sopenharmony_ci} 5718bf215546Sopenharmony_ci 5719bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5720bf215546Sopenharmony_ciradv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds) 5721bf215546Sopenharmony_ci{ 5722bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5723bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5724bf215546Sopenharmony_ci 5725bf215546Sopenharmony_ci state->dynamic.depth_bounds.min = minDepthBounds; 5726bf215546Sopenharmony_ci state->dynamic.depth_bounds.max = maxDepthBounds; 5727bf215546Sopenharmony_ci 5728bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS; 5729bf215546Sopenharmony_ci} 5730bf215546Sopenharmony_ci 5731bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5732bf215546Sopenharmony_ciradv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, 5733bf215546Sopenharmony_ci uint32_t compareMask) 5734bf215546Sopenharmony_ci{ 5735bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5736bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5737bf215546Sopenharmony_ci 5738bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 5739bf215546Sopenharmony_ci state->dynamic.stencil_compare_mask.front = compareMask; 5740bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_BACK_BIT) 5741bf215546Sopenharmony_ci state->dynamic.stencil_compare_mask.back = compareMask; 5742bf215546Sopenharmony_ci 5743bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK; 5744bf215546Sopenharmony_ci} 5745bf215546Sopenharmony_ci 5746bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5747bf215546Sopenharmony_ciradv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, 5748bf215546Sopenharmony_ci uint32_t writeMask) 5749bf215546Sopenharmony_ci{ 5750bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5751bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5752bf215546Sopenharmony_ci 5753bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 5754bf215546Sopenharmony_ci state->dynamic.stencil_write_mask.front = writeMask; 5755bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_BACK_BIT) 5756bf215546Sopenharmony_ci state->dynamic.stencil_write_mask.back = writeMask; 5757bf215546Sopenharmony_ci 5758bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK; 5759bf215546Sopenharmony_ci} 5760bf215546Sopenharmony_ci 5761bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5762bf215546Sopenharmony_ciradv_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, 5763bf215546Sopenharmony_ci uint32_t reference) 5764bf215546Sopenharmony_ci{ 5765bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5766bf215546Sopenharmony_ci 5767bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_FRONT_BIT) 5768bf215546Sopenharmony_ci cmd_buffer->state.dynamic.stencil_reference.front = reference; 5769bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_BACK_BIT) 5770bf215546Sopenharmony_ci cmd_buffer->state.dynamic.stencil_reference.back = reference; 5771bf215546Sopenharmony_ci 5772bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE; 5773bf215546Sopenharmony_ci} 5774bf215546Sopenharmony_ci 5775bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5776bf215546Sopenharmony_ciradv_CmdSetDiscardRectangleEXT(VkCommandBuffer commandBuffer, uint32_t firstDiscardRectangle, 5777bf215546Sopenharmony_ci uint32_t discardRectangleCount, const VkRect2D *pDiscardRectangles) 5778bf215546Sopenharmony_ci{ 5779bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5780bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5781bf215546Sopenharmony_ci ASSERTED const uint32_t total_count = firstDiscardRectangle + discardRectangleCount; 5782bf215546Sopenharmony_ci 5783bf215546Sopenharmony_ci assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES); 5784bf215546Sopenharmony_ci assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES); 5785bf215546Sopenharmony_ci 5786bf215546Sopenharmony_ci typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle], 5787bf215546Sopenharmony_ci pDiscardRectangles, discardRectangleCount); 5788bf215546Sopenharmony_ci 5789bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE; 5790bf215546Sopenharmony_ci} 5791bf215546Sopenharmony_ci 5792bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5793bf215546Sopenharmony_ciradv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer, 5794bf215546Sopenharmony_ci const VkSampleLocationsInfoEXT *pSampleLocationsInfo) 5795bf215546Sopenharmony_ci{ 5796bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5797bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5798bf215546Sopenharmony_ci 5799bf215546Sopenharmony_ci assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS); 5800bf215546Sopenharmony_ci 5801bf215546Sopenharmony_ci state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel; 5802bf215546Sopenharmony_ci state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize; 5803bf215546Sopenharmony_ci state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount; 5804bf215546Sopenharmony_ci typed_memcpy(&state->dynamic.sample_location.locations[0], 5805bf215546Sopenharmony_ci pSampleLocationsInfo->pSampleLocations, pSampleLocationsInfo->sampleLocationsCount); 5806bf215546Sopenharmony_ci 5807bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS; 5808bf215546Sopenharmony_ci} 5809bf215546Sopenharmony_ci 5810bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5811bf215546Sopenharmony_ciradv_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, uint32_t lineStippleFactor, 5812bf215546Sopenharmony_ci uint16_t lineStipplePattern) 5813bf215546Sopenharmony_ci{ 5814bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5815bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5816bf215546Sopenharmony_ci 5817bf215546Sopenharmony_ci state->dynamic.line_stipple.factor = lineStippleFactor; 5818bf215546Sopenharmony_ci state->dynamic.line_stipple.pattern = lineStipplePattern; 5819bf215546Sopenharmony_ci 5820bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE; 5821bf215546Sopenharmony_ci} 5822bf215546Sopenharmony_ci 5823bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5824bf215546Sopenharmony_ciradv_CmdSetCullMode(VkCommandBuffer commandBuffer, VkCullModeFlags cullMode) 5825bf215546Sopenharmony_ci{ 5826bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5827bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5828bf215546Sopenharmony_ci 5829bf215546Sopenharmony_ci state->dynamic.cull_mode = cullMode; 5830bf215546Sopenharmony_ci 5831bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE; 5832bf215546Sopenharmony_ci} 5833bf215546Sopenharmony_ci 5834bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5835bf215546Sopenharmony_ciradv_CmdSetFrontFace(VkCommandBuffer commandBuffer, VkFrontFace frontFace) 5836bf215546Sopenharmony_ci{ 5837bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5838bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5839bf215546Sopenharmony_ci 5840bf215546Sopenharmony_ci state->dynamic.front_face = frontFace; 5841bf215546Sopenharmony_ci 5842bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE; 5843bf215546Sopenharmony_ci} 5844bf215546Sopenharmony_ci 5845bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5846bf215546Sopenharmony_ciradv_CmdSetPrimitiveTopology(VkCommandBuffer commandBuffer, VkPrimitiveTopology primitiveTopology) 5847bf215546Sopenharmony_ci{ 5848bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5849bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5850bf215546Sopenharmony_ci unsigned primitive_topology = si_translate_prim(primitiveTopology); 5851bf215546Sopenharmony_ci 5852bf215546Sopenharmony_ci if ((state->dynamic.primitive_topology == V_008958_DI_PT_LINESTRIP) != 5853bf215546Sopenharmony_ci (primitive_topology == V_008958_DI_PT_LINESTRIP)) 5854bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE; 5855bf215546Sopenharmony_ci 5856bf215546Sopenharmony_ci if (radv_prim_is_points_or_lines(state->dynamic.primitive_topology) != 5857bf215546Sopenharmony_ci radv_prim_is_points_or_lines(primitive_topology)) 5858bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR; 5859bf215546Sopenharmony_ci 5860bf215546Sopenharmony_ci state->dynamic.primitive_topology = primitive_topology; 5861bf215546Sopenharmony_ci 5862bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY; 5863bf215546Sopenharmony_ci} 5864bf215546Sopenharmony_ci 5865bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5866bf215546Sopenharmony_ciradv_CmdSetViewportWithCount(VkCommandBuffer commandBuffer, uint32_t viewportCount, 5867bf215546Sopenharmony_ci const VkViewport *pViewports) 5868bf215546Sopenharmony_ci{ 5869bf215546Sopenharmony_ci radv_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports); 5870bf215546Sopenharmony_ci} 5871bf215546Sopenharmony_ci 5872bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5873bf215546Sopenharmony_ciradv_CmdSetScissorWithCount(VkCommandBuffer commandBuffer, uint32_t scissorCount, 5874bf215546Sopenharmony_ci const VkRect2D *pScissors) 5875bf215546Sopenharmony_ci{ 5876bf215546Sopenharmony_ci radv_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors); 5877bf215546Sopenharmony_ci} 5878bf215546Sopenharmony_ci 5879bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5880bf215546Sopenharmony_ciradv_CmdSetDepthTestEnable(VkCommandBuffer commandBuffer, VkBool32 depthTestEnable) 5881bf215546Sopenharmony_ci 5882bf215546Sopenharmony_ci{ 5883bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5884bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5885bf215546Sopenharmony_ci 5886bf215546Sopenharmony_ci state->dynamic.depth_test_enable = depthTestEnable; 5887bf215546Sopenharmony_ci 5888bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE; 5889bf215546Sopenharmony_ci} 5890bf215546Sopenharmony_ci 5891bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5892bf215546Sopenharmony_ciradv_CmdSetDepthWriteEnable(VkCommandBuffer commandBuffer, VkBool32 depthWriteEnable) 5893bf215546Sopenharmony_ci{ 5894bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5895bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5896bf215546Sopenharmony_ci 5897bf215546Sopenharmony_ci state->dynamic.depth_write_enable = depthWriteEnable; 5898bf215546Sopenharmony_ci 5899bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE; 5900bf215546Sopenharmony_ci} 5901bf215546Sopenharmony_ci 5902bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5903bf215546Sopenharmony_ciradv_CmdSetDepthCompareOp(VkCommandBuffer commandBuffer, VkCompareOp depthCompareOp) 5904bf215546Sopenharmony_ci{ 5905bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5906bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5907bf215546Sopenharmony_ci 5908bf215546Sopenharmony_ci state->dynamic.depth_compare_op = depthCompareOp; 5909bf215546Sopenharmony_ci 5910bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP; 5911bf215546Sopenharmony_ci} 5912bf215546Sopenharmony_ci 5913bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5914bf215546Sopenharmony_ciradv_CmdSetDepthBoundsTestEnable(VkCommandBuffer commandBuffer, VkBool32 depthBoundsTestEnable) 5915bf215546Sopenharmony_ci{ 5916bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5917bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5918bf215546Sopenharmony_ci 5919bf215546Sopenharmony_ci state->dynamic.depth_bounds_test_enable = depthBoundsTestEnable; 5920bf215546Sopenharmony_ci 5921bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE; 5922bf215546Sopenharmony_ci} 5923bf215546Sopenharmony_ci 5924bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5925bf215546Sopenharmony_ciradv_CmdSetStencilTestEnable(VkCommandBuffer commandBuffer, VkBool32 stencilTestEnable) 5926bf215546Sopenharmony_ci{ 5927bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5928bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5929bf215546Sopenharmony_ci 5930bf215546Sopenharmony_ci state->dynamic.stencil_test_enable = stencilTestEnable; 5931bf215546Sopenharmony_ci 5932bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE; 5933bf215546Sopenharmony_ci} 5934bf215546Sopenharmony_ci 5935bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5936bf215546Sopenharmony_ciradv_CmdSetStencilOp(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask, 5937bf215546Sopenharmony_ci VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp, 5938bf215546Sopenharmony_ci VkCompareOp compareOp) 5939bf215546Sopenharmony_ci{ 5940bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5941bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5942bf215546Sopenharmony_ci 5943bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_FRONT_BIT) { 5944bf215546Sopenharmony_ci state->dynamic.stencil_op.front.fail_op = failOp; 5945bf215546Sopenharmony_ci state->dynamic.stencil_op.front.pass_op = passOp; 5946bf215546Sopenharmony_ci state->dynamic.stencil_op.front.depth_fail_op = depthFailOp; 5947bf215546Sopenharmony_ci state->dynamic.stencil_op.front.compare_op = compareOp; 5948bf215546Sopenharmony_ci } 5949bf215546Sopenharmony_ci 5950bf215546Sopenharmony_ci if (faceMask & VK_STENCIL_FACE_BACK_BIT) { 5951bf215546Sopenharmony_ci state->dynamic.stencil_op.back.fail_op = failOp; 5952bf215546Sopenharmony_ci state->dynamic.stencil_op.back.pass_op = passOp; 5953bf215546Sopenharmony_ci state->dynamic.stencil_op.back.depth_fail_op = depthFailOp; 5954bf215546Sopenharmony_ci state->dynamic.stencil_op.back.compare_op = compareOp; 5955bf215546Sopenharmony_ci } 5956bf215546Sopenharmony_ci 5957bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP; 5958bf215546Sopenharmony_ci} 5959bf215546Sopenharmony_ci 5960bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5961bf215546Sopenharmony_ciradv_CmdSetFragmentShadingRateKHR(VkCommandBuffer commandBuffer, const VkExtent2D *pFragmentSize, 5962bf215546Sopenharmony_ci const VkFragmentShadingRateCombinerOpKHR combinerOps[2]) 5963bf215546Sopenharmony_ci{ 5964bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5965bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5966bf215546Sopenharmony_ci 5967bf215546Sopenharmony_ci state->dynamic.fragment_shading_rate.size = *pFragmentSize; 5968bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) 5969bf215546Sopenharmony_ci state->dynamic.fragment_shading_rate.combiner_ops[i] = combinerOps[i]; 5970bf215546Sopenharmony_ci 5971bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE; 5972bf215546Sopenharmony_ci} 5973bf215546Sopenharmony_ci 5974bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5975bf215546Sopenharmony_ciradv_CmdSetDepthBiasEnable(VkCommandBuffer commandBuffer, VkBool32 depthBiasEnable) 5976bf215546Sopenharmony_ci{ 5977bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5978bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5979bf215546Sopenharmony_ci 5980bf215546Sopenharmony_ci state->dynamic.depth_bias_enable = depthBiasEnable; 5981bf215546Sopenharmony_ci 5982bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE; 5983bf215546Sopenharmony_ci} 5984bf215546Sopenharmony_ci 5985bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5986bf215546Sopenharmony_ciradv_CmdSetPrimitiveRestartEnable(VkCommandBuffer commandBuffer, VkBool32 primitiveRestartEnable) 5987bf215546Sopenharmony_ci{ 5988bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 5989bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 5990bf215546Sopenharmony_ci 5991bf215546Sopenharmony_ci state->dynamic.primitive_restart_enable = primitiveRestartEnable; 5992bf215546Sopenharmony_ci 5993bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE; 5994bf215546Sopenharmony_ci} 5995bf215546Sopenharmony_ci 5996bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 5997bf215546Sopenharmony_ciradv_CmdSetRasterizerDiscardEnable(VkCommandBuffer commandBuffer, VkBool32 rasterizerDiscardEnable) 5998bf215546Sopenharmony_ci{ 5999bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 6000bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 6001bf215546Sopenharmony_ci 6002bf215546Sopenharmony_ci state->dynamic.rasterizer_discard_enable = rasterizerDiscardEnable; 6003bf215546Sopenharmony_ci 6004bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE; 6005bf215546Sopenharmony_ci} 6006bf215546Sopenharmony_ci 6007bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 6008bf215546Sopenharmony_ciradv_CmdSetPatchControlPointsEXT(VkCommandBuffer commandBuffer, uint32_t patchControlPoints) 6009bf215546Sopenharmony_ci{ 6010bf215546Sopenharmony_ci /* not implemented */ 6011bf215546Sopenharmony_ci} 6012bf215546Sopenharmony_ci 6013bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 6014bf215546Sopenharmony_ciradv_CmdSetLogicOpEXT(VkCommandBuffer commandBuffer, VkLogicOp logicOp) 6015bf215546Sopenharmony_ci{ 6016bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 6017bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 6018bf215546Sopenharmony_ci unsigned logic_op = si_translate_blend_logic_op(logicOp); 6019bf215546Sopenharmony_ci 6020bf215546Sopenharmony_ci state->dynamic.logic_op = logic_op; 6021bf215546Sopenharmony_ci 6022bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP; 6023bf215546Sopenharmony_ci} 6024bf215546Sopenharmony_ci 6025bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 6026bf215546Sopenharmony_ciradv_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer, uint32_t attachmentCount, 6027bf215546Sopenharmony_ci const VkBool32 *pColorWriteEnables) 6028bf215546Sopenharmony_ci{ 6029bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 6030bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 6031bf215546Sopenharmony_ci uint32_t color_write_enable = 0; 6032bf215546Sopenharmony_ci 6033bf215546Sopenharmony_ci assert(attachmentCount <= MAX_RTS); 6034bf215546Sopenharmony_ci 6035bf215546Sopenharmony_ci for (uint32_t i = 0; i < attachmentCount; i++) { 6036bf215546Sopenharmony_ci color_write_enable |= pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0; 6037bf215546Sopenharmony_ci } 6038bf215546Sopenharmony_ci 6039bf215546Sopenharmony_ci state->dynamic.color_write_enable = color_write_enable; 6040bf215546Sopenharmony_ci 6041bf215546Sopenharmony_ci state->dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE; 6042bf215546Sopenharmony_ci} 6043bf215546Sopenharmony_ci 6044bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 6045bf215546Sopenharmony_ciradv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingDescriptionCount, 6046bf215546Sopenharmony_ci const VkVertexInputBindingDescription2EXT *pVertexBindingDescriptions, 6047bf215546Sopenharmony_ci uint32_t vertexAttributeDescriptionCount, 6048bf215546Sopenharmony_ci const VkVertexInputAttributeDescription2EXT *pVertexAttributeDescriptions) 6049bf215546Sopenharmony_ci{ 6050bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 6051bf215546Sopenharmony_ci struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input; 6052bf215546Sopenharmony_ci 6053bf215546Sopenharmony_ci const VkVertexInputBindingDescription2EXT *bindings[MAX_VBS]; 6054bf215546Sopenharmony_ci for (unsigned i = 0; i < vertexBindingDescriptionCount; i++) 6055bf215546Sopenharmony_ci bindings[pVertexBindingDescriptions[i].binding] = &pVertexBindingDescriptions[i]; 6056bf215546Sopenharmony_ci 6057bf215546Sopenharmony_ci cmd_buffer->state.vbo_misaligned_mask = 0; 6058bf215546Sopenharmony_ci cmd_buffer->state.vbo_misaligned_mask_invalid = 0; 6059bf215546Sopenharmony_ci 6060bf215546Sopenharmony_ci memset(state, 0, sizeof(*state)); 6061bf215546Sopenharmony_ci state->bindings_match_attrib = true; 6062bf215546Sopenharmony_ci 6063bf215546Sopenharmony_ci enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level; 6064bf215546Sopenharmony_ci for (unsigned i = 0; i < vertexAttributeDescriptionCount; i++) { 6065bf215546Sopenharmony_ci const VkVertexInputAttributeDescription2EXT *attrib = &pVertexAttributeDescriptions[i]; 6066bf215546Sopenharmony_ci const VkVertexInputBindingDescription2EXT *binding = bindings[attrib->binding]; 6067bf215546Sopenharmony_ci unsigned loc = attrib->location; 6068bf215546Sopenharmony_ci 6069bf215546Sopenharmony_ci state->attribute_mask |= 1u << loc; 6070bf215546Sopenharmony_ci state->bindings[loc] = attrib->binding; 6071bf215546Sopenharmony_ci if (attrib->binding != loc) 6072bf215546Sopenharmony_ci state->bindings_match_attrib = false; 6073bf215546Sopenharmony_ci if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) { 6074bf215546Sopenharmony_ci state->instance_rate_inputs |= 1u << loc; 6075bf215546Sopenharmony_ci state->divisors[loc] = binding->divisor; 6076bf215546Sopenharmony_ci if (binding->divisor == 0) { 6077bf215546Sopenharmony_ci state->zero_divisors |= 1u << loc; 6078bf215546Sopenharmony_ci } else if (binding->divisor > 1) { 6079bf215546Sopenharmony_ci state->nontrivial_divisors |= 1u << loc; 6080bf215546Sopenharmony_ci } 6081bf215546Sopenharmony_ci } 6082bf215546Sopenharmony_ci cmd_buffer->vertex_bindings[attrib->binding].stride = binding->stride; 6083bf215546Sopenharmony_ci state->offsets[loc] = attrib->offset; 6084bf215546Sopenharmony_ci 6085bf215546Sopenharmony_ci struct dynamic_vertex_format_cache *found = NULL; 6086bf215546Sopenharmony_ci util_dynarray_foreach(&cmd_buffer->cached_vertex_formats, 6087bf215546Sopenharmony_ci struct dynamic_vertex_format_cache, 6088bf215546Sopenharmony_ci vf) { 6089bf215546Sopenharmony_ci if (vf->format == attrib->format) { 6090bf215546Sopenharmony_ci found = vf; 6091bf215546Sopenharmony_ci break; 6092bf215546Sopenharmony_ci } 6093bf215546Sopenharmony_ci } 6094bf215546Sopenharmony_ci if (!found) { 6095bf215546Sopenharmony_ci unsigned nfmt, dfmt; 6096bf215546Sopenharmony_ci bool post_shuffle; 6097bf215546Sopenharmony_ci enum radv_vs_input_alpha_adjust alpha_adjust; 6098bf215546Sopenharmony_ci const struct util_format_description *format_desc = vk_format_description(attrib->format); 6099bf215546Sopenharmony_ci 6100bf215546Sopenharmony_ci found = util_dynarray_grow(&cmd_buffer->cached_vertex_formats, 6101bf215546Sopenharmony_ci struct dynamic_vertex_format_cache, 1); 6102bf215546Sopenharmony_ci radv_translate_vertex_format(cmd_buffer->device->physical_device, attrib->format, format_desc, 6103bf215546Sopenharmony_ci &dfmt, &nfmt, &post_shuffle, &alpha_adjust); 6104bf215546Sopenharmony_ci found->format = attrib->format; 6105bf215546Sopenharmony_ci found->hw_fmt = dfmt | (nfmt << 4); 6106bf215546Sopenharmony_ci const uint8_t format_align_req_minus_1 = format_desc->channel[0].size >= 32 ? 3 : 6107bf215546Sopenharmony_ci (format_desc->block.bits / 8u - 1); 6108bf215546Sopenharmony_ci found->fmt_align_req_minus_1 = format_align_req_minus_1; 6109bf215546Sopenharmony_ci found->fmt_size = format_desc->block.bits / 8u; 6110bf215546Sopenharmony_ci found->post_shuffle = post_shuffle; 6111bf215546Sopenharmony_ci found->alpha_adjust_lo = alpha_adjust & 0x1; 6112bf215546Sopenharmony_ci found->alpha_adjust_hi = (alpha_adjust >> 1) & 0x1; 6113bf215546Sopenharmony_ci } 6114bf215546Sopenharmony_ci 6115bf215546Sopenharmony_ci state->formats[loc] = found->hw_fmt; 6116bf215546Sopenharmony_ci state->format_align_req_minus_1[loc] = found->fmt_align_req_minus_1; 6117bf215546Sopenharmony_ci state->format_sizes[loc] = found->fmt_size; 6118bf215546Sopenharmony_ci state->alpha_adjust_lo |= found->alpha_adjust_lo << loc; 6119bf215546Sopenharmony_ci state->alpha_adjust_hi |= found->alpha_adjust_hi << loc; 6120bf215546Sopenharmony_ci if (found->post_shuffle) 6121bf215546Sopenharmony_ci state->post_shuffle |= 1u << loc; 6122bf215546Sopenharmony_ci 6123bf215546Sopenharmony_ci if ((chip == GFX6 || chip >= GFX10) && 6124bf215546Sopenharmony_ci cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(attrib->binding)) { 6125bf215546Sopenharmony_ci if (binding->stride & found->fmt_align_req_minus_1) { 6126bf215546Sopenharmony_ci cmd_buffer->state.vbo_misaligned_mask |= BITFIELD_BIT(loc); 6127bf215546Sopenharmony_ci } else if ((cmd_buffer->vertex_bindings[attrib->binding].offset + state->offsets[loc]) & 6128bf215546Sopenharmony_ci found->fmt_align_req_minus_1) { 6129bf215546Sopenharmony_ci cmd_buffer->state.vbo_misaligned_mask |= BITFIELD_BIT(loc); 6130bf215546Sopenharmony_ci } 6131bf215546Sopenharmony_ci } 6132bf215546Sopenharmony_ci } 6133bf215546Sopenharmony_ci 6134bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER | 6135bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT; 6136bf215546Sopenharmony_ci} 6137bf215546Sopenharmony_ci 6138bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 6139bf215546Sopenharmony_ciradv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount, 6140bf215546Sopenharmony_ci const VkCommandBuffer *pCmdBuffers) 6141bf215546Sopenharmony_ci{ 6142bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer); 6143bf215546Sopenharmony_ci 6144bf215546Sopenharmony_ci assert(commandBufferCount > 0); 6145bf215546Sopenharmony_ci 6146bf215546Sopenharmony_ci radv_emit_mip_change_flush_default(primary); 6147bf215546Sopenharmony_ci 6148bf215546Sopenharmony_ci /* Emit pending flushes on primary prior to executing secondary */ 6149bf215546Sopenharmony_ci si_emit_cache_flush(primary); 6150bf215546Sopenharmony_ci 6151bf215546Sopenharmony_ci /* Make sure CP DMA is idle on primary prior to executing secondary. */ 6152bf215546Sopenharmony_ci si_cp_dma_wait_for_idle(primary); 6153bf215546Sopenharmony_ci 6154bf215546Sopenharmony_ci for (uint32_t i = 0; i < commandBufferCount; i++) { 6155bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]); 6156bf215546Sopenharmony_ci bool allow_ib2 = true; 6157bf215546Sopenharmony_ci 6158bf215546Sopenharmony_ci if (secondary->device->physical_device->rad_info.gfx_level == GFX7 && 6159bf215546Sopenharmony_ci secondary->state.uses_draw_indirect_multi) { 6160bf215546Sopenharmony_ci /* Do not launch an IB2 for secondary command buffers that contain 6161bf215546Sopenharmony_ci * DRAW_{INDEX}_INDIRECT_MULTI on GFX7 because it's illegal and hang the GPU. 6162bf215546Sopenharmony_ci */ 6163bf215546Sopenharmony_ci allow_ib2 = false; 6164bf215546Sopenharmony_ci } 6165bf215546Sopenharmony_ci 6166bf215546Sopenharmony_ci if (secondary->qf == RADV_QUEUE_COMPUTE) { 6167bf215546Sopenharmony_ci /* IB2 packets are not supported on compute queues according to PAL. */ 6168bf215546Sopenharmony_ci allow_ib2 = false; 6169bf215546Sopenharmony_ci } 6170bf215546Sopenharmony_ci 6171bf215546Sopenharmony_ci primary->scratch_size_per_wave_needed = 6172bf215546Sopenharmony_ci MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed); 6173bf215546Sopenharmony_ci primary->scratch_waves_wanted = 6174bf215546Sopenharmony_ci MAX2(primary->scratch_waves_wanted, secondary->scratch_waves_wanted); 6175bf215546Sopenharmony_ci primary->compute_scratch_size_per_wave_needed = 6176bf215546Sopenharmony_ci MAX2(primary->compute_scratch_size_per_wave_needed, 6177bf215546Sopenharmony_ci secondary->compute_scratch_size_per_wave_needed); 6178bf215546Sopenharmony_ci primary->compute_scratch_waves_wanted = 6179bf215546Sopenharmony_ci MAX2(primary->compute_scratch_waves_wanted, secondary->compute_scratch_waves_wanted); 6180bf215546Sopenharmony_ci 6181bf215546Sopenharmony_ci if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed) 6182bf215546Sopenharmony_ci primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed; 6183bf215546Sopenharmony_ci if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed) 6184bf215546Sopenharmony_ci primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed; 6185bf215546Sopenharmony_ci if (secondary->tess_rings_needed) 6186bf215546Sopenharmony_ci primary->tess_rings_needed = true; 6187bf215546Sopenharmony_ci if (secondary->task_rings_needed) 6188bf215546Sopenharmony_ci primary->task_rings_needed = true; 6189bf215546Sopenharmony_ci if (secondary->mesh_scratch_ring_needed) 6190bf215546Sopenharmony_ci primary->mesh_scratch_ring_needed = true; 6191bf215546Sopenharmony_ci if (secondary->sample_positions_needed) 6192bf215546Sopenharmony_ci primary->sample_positions_needed = true; 6193bf215546Sopenharmony_ci if (secondary->gds_needed) 6194bf215546Sopenharmony_ci primary->gds_needed = true; 6195bf215546Sopenharmony_ci 6196bf215546Sopenharmony_ci if (!secondary->state.framebuffer && primary->state.pass && (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) { 6197bf215546Sopenharmony_ci /* Emit the framebuffer state from primary if secondary 6198bf215546Sopenharmony_ci * has been recorded without a framebuffer, otherwise 6199bf215546Sopenharmony_ci * fast color/depth clears can't work. 6200bf215546Sopenharmony_ci */ 6201bf215546Sopenharmony_ci radv_emit_fb_mip_change_flush(primary); 6202bf215546Sopenharmony_ci radv_emit_framebuffer_state(primary); 6203bf215546Sopenharmony_ci } 6204bf215546Sopenharmony_ci 6205bf215546Sopenharmony_ci if (secondary->ace_internal.cs) { 6206bf215546Sopenharmony_ci if (!primary->ace_internal.cs) { 6207bf215546Sopenharmony_ci primary->ace_internal.cs = radv_ace_internal_create(primary); 6208bf215546Sopenharmony_ci if (!primary->ace_internal.cs) 6209bf215546Sopenharmony_ci return; 6210bf215546Sopenharmony_ci } 6211bf215546Sopenharmony_ci 6212bf215546Sopenharmony_ci struct radeon_cmdbuf *ace_primary = primary->ace_internal.cs; 6213bf215546Sopenharmony_ci struct radeon_cmdbuf *ace_secondary = secondary->ace_internal.cs; 6214bf215546Sopenharmony_ci 6215bf215546Sopenharmony_ci /* Emit pending flushes on primary prior to executing secondary. */ 6216bf215546Sopenharmony_ci radv_ace_internal_cache_flush(primary); 6217bf215546Sopenharmony_ci 6218bf215546Sopenharmony_ci /* Wait for primary GFX->ACE semaphore, if necessary. */ 6219bf215546Sopenharmony_ci if (radv_flush_gfx2ace_semaphore(primary)) 6220bf215546Sopenharmony_ci radv_wait_gfx2ace_semaphore(primary); 6221bf215546Sopenharmony_ci 6222bf215546Sopenharmony_ci /* Execute the secondary compute cmdbuf. 6223bf215546Sopenharmony_ci * Don't use IB2 packets because they are not supported on compute queues. 6224bf215546Sopenharmony_ci */ 6225bf215546Sopenharmony_ci primary->device->ws->cs_execute_secondary(ace_primary, ace_secondary, false); 6226bf215546Sopenharmony_ci } 6227bf215546Sopenharmony_ci 6228bf215546Sopenharmony_ci /* Update pending ACE internal flush bits from the secondary cmdbuf */ 6229bf215546Sopenharmony_ci primary->ace_internal.flush_bits |= secondary->ace_internal.flush_bits; 6230bf215546Sopenharmony_ci 6231bf215546Sopenharmony_ci /* Increment primary semaphore if secondary was dirty. 6232bf215546Sopenharmony_ci * This happens when the secondary cmdbuf has a barrier which 6233bf215546Sopenharmony_ci * isn't consumed by a draw call. 6234bf215546Sopenharmony_ci */ 6235bf215546Sopenharmony_ci if (radv_ace_internal_sem_dirty(secondary)) 6236bf215546Sopenharmony_ci primary->ace_internal.sem.gfx2ace_value++; 6237bf215546Sopenharmony_ci 6238bf215546Sopenharmony_ci primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs, allow_ib2); 6239bf215546Sopenharmony_ci 6240bf215546Sopenharmony_ci /* When the secondary command buffer is compute only we don't 6241bf215546Sopenharmony_ci * need to re-emit the current graphics pipeline. 6242bf215546Sopenharmony_ci */ 6243bf215546Sopenharmony_ci if (secondary->state.emitted_graphics_pipeline) { 6244bf215546Sopenharmony_ci primary->state.emitted_graphics_pipeline = secondary->state.emitted_graphics_pipeline; 6245bf215546Sopenharmony_ci } 6246bf215546Sopenharmony_ci 6247bf215546Sopenharmony_ci /* When the secondary command buffer is graphics only we don't 6248bf215546Sopenharmony_ci * need to re-emit the current compute pipeline. 6249bf215546Sopenharmony_ci */ 6250bf215546Sopenharmony_ci if (secondary->state.emitted_compute_pipeline) { 6251bf215546Sopenharmony_ci primary->state.emitted_compute_pipeline = secondary->state.emitted_compute_pipeline; 6252bf215546Sopenharmony_ci } 6253bf215546Sopenharmony_ci 6254bf215546Sopenharmony_ci /* Only re-emit the draw packets when needed. */ 6255bf215546Sopenharmony_ci if (secondary->state.last_primitive_reset_en != -1) { 6256bf215546Sopenharmony_ci primary->state.last_primitive_reset_en = secondary->state.last_primitive_reset_en; 6257bf215546Sopenharmony_ci } 6258bf215546Sopenharmony_ci 6259bf215546Sopenharmony_ci if (secondary->state.last_primitive_reset_index) { 6260bf215546Sopenharmony_ci primary->state.last_primitive_reset_index = secondary->state.last_primitive_reset_index; 6261bf215546Sopenharmony_ci } 6262bf215546Sopenharmony_ci 6263bf215546Sopenharmony_ci if (secondary->state.last_ia_multi_vgt_param) { 6264bf215546Sopenharmony_ci primary->state.last_ia_multi_vgt_param = secondary->state.last_ia_multi_vgt_param; 6265bf215546Sopenharmony_ci } 6266bf215546Sopenharmony_ci 6267bf215546Sopenharmony_ci primary->state.last_first_instance = secondary->state.last_first_instance; 6268bf215546Sopenharmony_ci primary->state.last_num_instances = secondary->state.last_num_instances; 6269bf215546Sopenharmony_ci primary->state.last_drawid = secondary->state.last_drawid; 6270bf215546Sopenharmony_ci primary->state.last_subpass_color_count = secondary->state.last_subpass_color_count; 6271bf215546Sopenharmony_ci primary->state.last_vertex_offset = secondary->state.last_vertex_offset; 6272bf215546Sopenharmony_ci primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert; 6273bf215546Sopenharmony_ci primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon; 6274bf215546Sopenharmony_ci primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control; 6275bf215546Sopenharmony_ci 6276bf215546Sopenharmony_ci if (secondary->state.last_index_type != -1) { 6277bf215546Sopenharmony_ci primary->state.last_index_type = secondary->state.last_index_type; 6278bf215546Sopenharmony_ci } 6279bf215546Sopenharmony_ci 6280bf215546Sopenharmony_ci primary->state.last_nggc_settings = secondary->state.last_nggc_settings; 6281bf215546Sopenharmony_ci primary->state.last_nggc_settings_sgpr_idx = secondary->state.last_nggc_settings_sgpr_idx; 6282bf215546Sopenharmony_ci primary->state.last_nggc_skip = secondary->state.last_nggc_skip; 6283bf215546Sopenharmony_ci 6284bf215546Sopenharmony_ci primary->state.last_vrs_rates = secondary->state.last_vrs_rates; 6285bf215546Sopenharmony_ci primary->state.last_vrs_rates_sgpr_idx = secondary->state.last_vrs_rates_sgpr_idx; 6286bf215546Sopenharmony_ci } 6287bf215546Sopenharmony_ci 6288bf215546Sopenharmony_ci /* After executing commands from secondary buffers we have to dirty 6289bf215546Sopenharmony_ci * some states. 6290bf215546Sopenharmony_ci */ 6291bf215546Sopenharmony_ci primary->state.dirty |= 6292bf215546Sopenharmony_ci RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_DYNAMIC_ALL; 6293bf215546Sopenharmony_ci radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS); 6294bf215546Sopenharmony_ci radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE); 6295bf215546Sopenharmony_ci} 6296bf215546Sopenharmony_ci 6297bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 6298bf215546Sopenharmony_ciradv_CreateCommandPool(VkDevice _device, const VkCommandPoolCreateInfo *pCreateInfo, 6299bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, VkCommandPool *pCmdPool) 6300bf215546Sopenharmony_ci{ 6301bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 6302bf215546Sopenharmony_ci struct radv_cmd_pool *pool; 6303bf215546Sopenharmony_ci 6304bf215546Sopenharmony_ci pool = 6305bf215546Sopenharmony_ci vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 6306bf215546Sopenharmony_ci if (pool == NULL) 6307bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 6308bf215546Sopenharmony_ci 6309bf215546Sopenharmony_ci VkResult result = vk_command_pool_init(&pool->vk, &device->vk, pCreateInfo, pAllocator); 6310bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 6311bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, pool); 6312bf215546Sopenharmony_ci return result; 6313bf215546Sopenharmony_ci } 6314bf215546Sopenharmony_ci 6315bf215546Sopenharmony_ci list_inithead(&pool->cmd_buffers); 6316bf215546Sopenharmony_ci list_inithead(&pool->free_cmd_buffers); 6317bf215546Sopenharmony_ci 6318bf215546Sopenharmony_ci *pCmdPool = radv_cmd_pool_to_handle(pool); 6319bf215546Sopenharmony_ci 6320bf215546Sopenharmony_ci return VK_SUCCESS; 6321bf215546Sopenharmony_ci} 6322bf215546Sopenharmony_ci 6323bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 6324bf215546Sopenharmony_ciradv_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool, 6325bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 6326bf215546Sopenharmony_ci{ 6327bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 6328bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); 6329bf215546Sopenharmony_ci 6330bf215546Sopenharmony_ci if (!pool) 6331bf215546Sopenharmony_ci return; 6332bf215546Sopenharmony_ci 6333bf215546Sopenharmony_ci list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) 6334bf215546Sopenharmony_ci { 6335bf215546Sopenharmony_ci radv_destroy_cmd_buffer(cmd_buffer); 6336bf215546Sopenharmony_ci } 6337bf215546Sopenharmony_ci 6338bf215546Sopenharmony_ci list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->free_cmd_buffers, pool_link) 6339bf215546Sopenharmony_ci { 6340bf215546Sopenharmony_ci radv_destroy_cmd_buffer(cmd_buffer); 6341bf215546Sopenharmony_ci } 6342bf215546Sopenharmony_ci 6343bf215546Sopenharmony_ci vk_command_pool_finish(&pool->vk); 6344bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, pAllocator, pool); 6345bf215546Sopenharmony_ci} 6346bf215546Sopenharmony_ci 6347bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 6348bf215546Sopenharmony_ciradv_ResetCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags) 6349bf215546Sopenharmony_ci{ 6350bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); 6351bf215546Sopenharmony_ci VkResult result; 6352bf215546Sopenharmony_ci 6353bf215546Sopenharmony_ci list_for_each_entry(struct radv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link) 6354bf215546Sopenharmony_ci { 6355bf215546Sopenharmony_ci result = radv_reset_cmd_buffer(cmd_buffer); 6356bf215546Sopenharmony_ci if (result != VK_SUCCESS) 6357bf215546Sopenharmony_ci return result; 6358bf215546Sopenharmony_ci } 6359bf215546Sopenharmony_ci 6360bf215546Sopenharmony_ci return VK_SUCCESS; 6361bf215546Sopenharmony_ci} 6362bf215546Sopenharmony_ci 6363bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 6364bf215546Sopenharmony_ciradv_TrimCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlags flags) 6365bf215546Sopenharmony_ci{ 6366bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool); 6367bf215546Sopenharmony_ci 6368bf215546Sopenharmony_ci list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->free_cmd_buffers, pool_link) 6369bf215546Sopenharmony_ci { 6370bf215546Sopenharmony_ci radv_destroy_cmd_buffer(cmd_buffer); 6371bf215546Sopenharmony_ci } 6372bf215546Sopenharmony_ci} 6373bf215546Sopenharmony_ci 6374bf215546Sopenharmony_cistatic void 6375bf215546Sopenharmony_ciradv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer, uint32_t subpass_id) 6376bf215546Sopenharmony_ci{ 6377bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 6378bf215546Sopenharmony_ci struct radv_subpass *subpass = &state->pass->subpasses[subpass_id]; 6379bf215546Sopenharmony_ci 6380bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096); 6381bf215546Sopenharmony_ci 6382bf215546Sopenharmony_ci radv_emit_subpass_barrier(cmd_buffer, &subpass->start_barrier); 6383bf215546Sopenharmony_ci 6384bf215546Sopenharmony_ci radv_cmd_buffer_set_subpass(cmd_buffer, subpass); 6385bf215546Sopenharmony_ci 6386bf215546Sopenharmony_ci radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC); 6387bf215546Sopenharmony_ci 6388bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->attachment_count; ++i) { 6389bf215546Sopenharmony_ci const uint32_t a = subpass->attachments[i].attachment; 6390bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 6391bf215546Sopenharmony_ci continue; 6392bf215546Sopenharmony_ci 6393bf215546Sopenharmony_ci radv_handle_subpass_image_transition(cmd_buffer, subpass->attachments[i], true); 6394bf215546Sopenharmony_ci } 6395bf215546Sopenharmony_ci 6396bf215546Sopenharmony_ci radv_ace_internal_barrier(cmd_buffer, 0, 0); 6397bf215546Sopenharmony_ci radv_describe_barrier_end(cmd_buffer); 6398bf215546Sopenharmony_ci 6399bf215546Sopenharmony_ci radv_cmd_buffer_clear_subpass(cmd_buffer); 6400bf215546Sopenharmony_ci 6401bf215546Sopenharmony_ci if (subpass->vrs_attachment) { 6402bf215546Sopenharmony_ci int idx = subpass->vrs_attachment->attachment; 6403bf215546Sopenharmony_ci struct radv_image_view *vrs_iview = cmd_buffer->state.attachments[idx].iview; 6404bf215546Sopenharmony_ci 6405bf215546Sopenharmony_ci if (subpass->depth_stencil_attachment) { 6406bf215546Sopenharmony_ci /* When a subpass uses a VRS attachment and a depth/stencil attachment, we just need to 6407bf215546Sopenharmony_ci * copy the VRS rates to the HTILE buffer of the attachment. 6408bf215546Sopenharmony_ci */ 6409bf215546Sopenharmony_ci int ds_idx = subpass->depth_stencil_attachment->attachment; 6410bf215546Sopenharmony_ci struct radv_image_view *ds_iview = cmd_buffer->state.attachments[ds_idx].iview; 6411bf215546Sopenharmony_ci struct radv_image *ds_image = ds_iview->image; 6412bf215546Sopenharmony_ci uint32_t level = ds_iview->vk.base_mip_level; 6413bf215546Sopenharmony_ci 6414bf215546Sopenharmony_ci VkExtent2D extent = { 6415bf215546Sopenharmony_ci .width = radv_minify(ds_image->info.width, level), 6416bf215546Sopenharmony_ci .height = radv_minify(ds_image->info.height, level), 6417bf215546Sopenharmony_ci }; 6418bf215546Sopenharmony_ci 6419bf215546Sopenharmony_ci /* HTILE buffer */ 6420bf215546Sopenharmony_ci uint64_t htile_offset = ds_image->bindings[0].offset + ds_image->planes[0].surface.meta_offset + 6421bf215546Sopenharmony_ci ds_image->planes[0].surface.u.gfx9.meta_levels[level].offset; 6422bf215546Sopenharmony_ci uint64_t htile_size = ds_image->planes[0].surface.u.gfx9.meta_levels[level].size; 6423bf215546Sopenharmony_ci struct radv_buffer htile_buffer; 6424bf215546Sopenharmony_ci 6425bf215546Sopenharmony_ci radv_buffer_init(&htile_buffer, cmd_buffer->device, ds_image->bindings[0].bo, htile_size, htile_offset); 6426bf215546Sopenharmony_ci 6427bf215546Sopenharmony_ci /* Copy the VRS rates to the HTILE buffer. */ 6428bf215546Sopenharmony_ci radv_copy_vrs_htile(cmd_buffer, vrs_iview->image, &extent, ds_image, &htile_buffer, true); 6429bf215546Sopenharmony_ci 6430bf215546Sopenharmony_ci radv_buffer_finish(&htile_buffer); 6431bf215546Sopenharmony_ci } else { 6432bf215546Sopenharmony_ci /* When a subpass uses a VRS attachment without binding a depth/stencil attachment, we have 6433bf215546Sopenharmony_ci * to copy the VRS rates to our internal HTILE buffer. 6434bf215546Sopenharmony_ci */ 6435bf215546Sopenharmony_ci struct vk_framebuffer *fb = cmd_buffer->state.framebuffer; 6436bf215546Sopenharmony_ci struct radv_image *ds_image = radv_cmd_buffer_get_vrs_image(cmd_buffer); 6437bf215546Sopenharmony_ci 6438bf215546Sopenharmony_ci if (ds_image) { 6439bf215546Sopenharmony_ci /* HTILE buffer */ 6440bf215546Sopenharmony_ci struct radv_buffer *htile_buffer = cmd_buffer->device->vrs.buffer; 6441bf215546Sopenharmony_ci 6442bf215546Sopenharmony_ci VkExtent2D extent = { 6443bf215546Sopenharmony_ci .width = MIN2(fb->width, ds_image->info.width), 6444bf215546Sopenharmony_ci .height = MIN2(fb->height, ds_image->info.height), 6445bf215546Sopenharmony_ci }; 6446bf215546Sopenharmony_ci 6447bf215546Sopenharmony_ci /* Copy the VRS rates to the HTILE buffer. */ 6448bf215546Sopenharmony_ci radv_copy_vrs_htile(cmd_buffer, vrs_iview->image, &extent, ds_image, htile_buffer, false); 6449bf215546Sopenharmony_ci } 6450bf215546Sopenharmony_ci } 6451bf215546Sopenharmony_ci } 6452bf215546Sopenharmony_ci 6453bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 6454bf215546Sopenharmony_ci} 6455bf215546Sopenharmony_ci 6456bf215546Sopenharmony_cistatic void 6457bf215546Sopenharmony_ciradv_mark_noncoherent_rb(struct radv_cmd_buffer *cmd_buffer) 6458bf215546Sopenharmony_ci{ 6459bf215546Sopenharmony_ci const struct radv_subpass *subpass = cmd_buffer->state.subpass; 6460bf215546Sopenharmony_ci 6461bf215546Sopenharmony_ci /* Have to be conservative in cmdbuffers with inherited attachments. */ 6462bf215546Sopenharmony_ci if (!cmd_buffer->state.attachments) { 6463bf215546Sopenharmony_ci cmd_buffer->state.rb_noncoherent_dirty = true; 6464bf215546Sopenharmony_ci return; 6465bf215546Sopenharmony_ci } 6466bf215546Sopenharmony_ci 6467bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; ++i) { 6468bf215546Sopenharmony_ci const uint32_t a = subpass->color_attachments[i].attachment; 6469bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 6470bf215546Sopenharmony_ci continue; 6471bf215546Sopenharmony_ci if (!cmd_buffer->state.attachments[a].iview->image->l2_coherent) { 6472bf215546Sopenharmony_ci cmd_buffer->state.rb_noncoherent_dirty = true; 6473bf215546Sopenharmony_ci return; 6474bf215546Sopenharmony_ci } 6475bf215546Sopenharmony_ci } 6476bf215546Sopenharmony_ci if (subpass->depth_stencil_attachment && 6477bf215546Sopenharmony_ci !cmd_buffer->state.attachments[subpass->depth_stencil_attachment->attachment] 6478bf215546Sopenharmony_ci .iview->image->l2_coherent) 6479bf215546Sopenharmony_ci cmd_buffer->state.rb_noncoherent_dirty = true; 6480bf215546Sopenharmony_ci} 6481bf215546Sopenharmony_ci 6482bf215546Sopenharmony_civoid 6483bf215546Sopenharmony_ciradv_cmd_buffer_restore_subpass(struct radv_cmd_buffer *cmd_buffer, 6484bf215546Sopenharmony_ci const struct radv_subpass *subpass) 6485bf215546Sopenharmony_ci{ 6486bf215546Sopenharmony_ci radv_mark_noncoherent_rb(cmd_buffer); 6487bf215546Sopenharmony_ci radv_cmd_buffer_set_subpass(cmd_buffer, subpass); 6488bf215546Sopenharmony_ci} 6489bf215546Sopenharmony_ci 6490bf215546Sopenharmony_cistatic void 6491bf215546Sopenharmony_ciradv_cmd_buffer_end_subpass(struct radv_cmd_buffer *cmd_buffer) 6492bf215546Sopenharmony_ci{ 6493bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 6494bf215546Sopenharmony_ci const struct radv_subpass *subpass = state->subpass; 6495bf215546Sopenharmony_ci uint32_t subpass_id = radv_get_subpass_id(cmd_buffer); 6496bf215546Sopenharmony_ci 6497bf215546Sopenharmony_ci radv_cmd_buffer_resolve_subpass(cmd_buffer); 6498bf215546Sopenharmony_ci 6499bf215546Sopenharmony_ci radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC); 6500bf215546Sopenharmony_ci 6501bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->attachment_count; ++i) { 6502bf215546Sopenharmony_ci const uint32_t a = subpass->attachments[i].attachment; 6503bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 6504bf215546Sopenharmony_ci continue; 6505bf215546Sopenharmony_ci 6506bf215546Sopenharmony_ci if (state->pass->attachments[a].last_subpass_idx != subpass_id) 6507bf215546Sopenharmony_ci continue; 6508bf215546Sopenharmony_ci 6509bf215546Sopenharmony_ci VkImageLayout layout = state->pass->attachments[a].final_layout; 6510bf215546Sopenharmony_ci VkImageLayout stencil_layout = state->pass->attachments[a].stencil_final_layout; 6511bf215546Sopenharmony_ci struct radv_subpass_attachment att = {a, layout, stencil_layout}; 6512bf215546Sopenharmony_ci radv_handle_subpass_image_transition(cmd_buffer, att, false); 6513bf215546Sopenharmony_ci } 6514bf215546Sopenharmony_ci 6515bf215546Sopenharmony_ci radv_ace_internal_barrier(cmd_buffer, 0, 0); 6516bf215546Sopenharmony_ci radv_describe_barrier_end(cmd_buffer); 6517bf215546Sopenharmony_ci} 6518bf215546Sopenharmony_ci 6519bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 6520bf215546Sopenharmony_ciradv_CmdBeginRenderPass2(VkCommandBuffer commandBuffer, 6521bf215546Sopenharmony_ci const VkRenderPassBeginInfo *pRenderPassBeginInfo, 6522bf215546Sopenharmony_ci const VkSubpassBeginInfo *pSubpassBeginInfo) 6523bf215546Sopenharmony_ci{ 6524bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 6525bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBeginInfo->renderPass); 6526bf215546Sopenharmony_ci RADV_FROM_HANDLE(vk_framebuffer, framebuffer, pRenderPassBeginInfo->framebuffer); 6527bf215546Sopenharmony_ci VkResult result; 6528bf215546Sopenharmony_ci 6529bf215546Sopenharmony_ci cmd_buffer->state.framebuffer = framebuffer; 6530bf215546Sopenharmony_ci cmd_buffer->state.pass = pass; 6531bf215546Sopenharmony_ci cmd_buffer->state.render_area = pRenderPassBeginInfo->renderArea; 6532bf215546Sopenharmony_ci 6533bf215546Sopenharmony_ci result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBeginInfo); 6534bf215546Sopenharmony_ci if (result != VK_SUCCESS) 6535bf215546Sopenharmony_ci return; 6536bf215546Sopenharmony_ci 6537bf215546Sopenharmony_ci result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBeginInfo); 6538bf215546Sopenharmony_ci if (result != VK_SUCCESS) 6539bf215546Sopenharmony_ci return; 6540bf215546Sopenharmony_ci 6541bf215546Sopenharmony_ci radv_cmd_buffer_begin_subpass(cmd_buffer, 0); 6542bf215546Sopenharmony_ci} 6543bf215546Sopenharmony_ci 6544bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 6545bf215546Sopenharmony_ciradv_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo, 6546bf215546Sopenharmony_ci const VkSubpassEndInfo *pSubpassEndInfo) 6547bf215546Sopenharmony_ci{ 6548bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 6549bf215546Sopenharmony_ci 6550bf215546Sopenharmony_ci radv_mark_noncoherent_rb(cmd_buffer); 6551bf215546Sopenharmony_ci 6552bf215546Sopenharmony_ci uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer); 6553bf215546Sopenharmony_ci radv_cmd_buffer_end_subpass(cmd_buffer); 6554bf215546Sopenharmony_ci radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1); 6555bf215546Sopenharmony_ci} 6556bf215546Sopenharmony_ci 6557bf215546Sopenharmony_cistatic void 6558bf215546Sopenharmony_ciradv_emit_view_index_per_stage(struct radeon_cmdbuf *cs, struct radv_graphics_pipeline *pipeline, 6559bf215546Sopenharmony_ci unsigned stage, unsigned index) 6560bf215546Sopenharmony_ci{ 6561bf215546Sopenharmony_ci struct radv_userdata_info *loc = radv_lookup_user_sgpr(&pipeline->base, stage, AC_UD_VIEW_INDEX); 6562bf215546Sopenharmony_ci if (loc->sgpr_idx == -1) 6563bf215546Sopenharmony_ci return; 6564bf215546Sopenharmony_ci uint32_t base_reg = pipeline->base.user_data_0[stage]; 6565bf215546Sopenharmony_ci radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4, index); 6566bf215546Sopenharmony_ci} 6567bf215546Sopenharmony_ci 6568bf215546Sopenharmony_cistatic void 6569bf215546Sopenharmony_ciradv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index) 6570bf215546Sopenharmony_ci{ 6571bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 6572bf215546Sopenharmony_ci 6573bf215546Sopenharmony_ci radv_foreach_stage(stage, pipeline->active_stages & ~VK_SHADER_STAGE_TASK_BIT_NV) { 6574bf215546Sopenharmony_ci radv_emit_view_index_per_stage(cmd_buffer->cs, pipeline, stage, index); 6575bf215546Sopenharmony_ci } 6576bf215546Sopenharmony_ci if (radv_pipeline_has_gs_copy_shader(&pipeline->base)) { 6577bf215546Sopenharmony_ci struct radv_userdata_info *loc = 6578bf215546Sopenharmony_ci &pipeline->base.gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX]; 6579bf215546Sopenharmony_ci if (loc->sgpr_idx != -1) { 6580bf215546Sopenharmony_ci uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0; 6581bf215546Sopenharmony_ci radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index); 6582bf215546Sopenharmony_ci } 6583bf215546Sopenharmony_ci } 6584bf215546Sopenharmony_ci if (pipeline->active_stages & VK_SHADER_STAGE_TASK_BIT_NV) { 6585bf215546Sopenharmony_ci radv_emit_view_index_per_stage(cmd_buffer->ace_internal.cs, pipeline, MESA_SHADER_TASK, 6586bf215546Sopenharmony_ci index); 6587bf215546Sopenharmony_ci } 6588bf215546Sopenharmony_ci} 6589bf215546Sopenharmony_ci 6590bf215546Sopenharmony_ci/** 6591bf215546Sopenharmony_ci * Emulates predication for MEC using COND_EXEC. 6592bf215546Sopenharmony_ci * When the current command buffer is predicating, emit a COND_EXEC packet 6593bf215546Sopenharmony_ci * so that the MEC skips the next few dwords worth of packets. 6594bf215546Sopenharmony_ci * 6595bf215546Sopenharmony_ci * To make it work with inverted conditional rendering, we allocate 6596bf215546Sopenharmony_ci * space in the upload BO and emit some packets to invert the condition. 6597bf215546Sopenharmony_ci */ 6598bf215546Sopenharmony_cistatic void 6599bf215546Sopenharmony_ciradv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs, 6600bf215546Sopenharmony_ci uint64_t inv_va, bool *inv_emitted, unsigned dwords) 6601bf215546Sopenharmony_ci{ 6602bf215546Sopenharmony_ci if (!state->predicating) 6603bf215546Sopenharmony_ci return; 6604bf215546Sopenharmony_ci 6605bf215546Sopenharmony_ci uint64_t va = state->predication_va; 6606bf215546Sopenharmony_ci 6607bf215546Sopenharmony_ci if (!state->predication_type) { 6608bf215546Sopenharmony_ci /* Invert the condition the first time it is needed. */ 6609bf215546Sopenharmony_ci if (!*inv_emitted) { 6610bf215546Sopenharmony_ci *inv_emitted = true; 6611bf215546Sopenharmony_ci 6612bf215546Sopenharmony_ci /* Write 1 to the inverted predication VA. */ 6613bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 6614bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 6615bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 6616bf215546Sopenharmony_ci radeon_emit(cs, 1); 6617bf215546Sopenharmony_ci radeon_emit(cs, 0); 6618bf215546Sopenharmony_ci radeon_emit(cs, inv_va); 6619bf215546Sopenharmony_ci radeon_emit(cs, inv_va >> 32); 6620bf215546Sopenharmony_ci 6621bf215546Sopenharmony_ci /* If the API predication VA == 0, skip next command. */ 6622bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); 6623bf215546Sopenharmony_ci radeon_emit(cs, va); 6624bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 6625bf215546Sopenharmony_ci radeon_emit(cs, 0); 6626bf215546Sopenharmony_ci radeon_emit(cs, 6); /* 1x COPY_DATA size */ 6627bf215546Sopenharmony_ci 6628bf215546Sopenharmony_ci /* Write 0 to the new predication VA (when the API condition != 0) */ 6629bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 6630bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 6631bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 6632bf215546Sopenharmony_ci radeon_emit(cs, 0); 6633bf215546Sopenharmony_ci radeon_emit(cs, 0); 6634bf215546Sopenharmony_ci radeon_emit(cs, inv_va); 6635bf215546Sopenharmony_ci radeon_emit(cs, inv_va >> 32); 6636bf215546Sopenharmony_ci } 6637bf215546Sopenharmony_ci 6638bf215546Sopenharmony_ci va = inv_va; 6639bf215546Sopenharmony_ci } 6640bf215546Sopenharmony_ci 6641bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0)); 6642bf215546Sopenharmony_ci radeon_emit(cs, va); 6643bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 6644bf215546Sopenharmony_ci radeon_emit(cs, 0); /* Cache policy */ 6645bf215546Sopenharmony_ci radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */ 6646bf215546Sopenharmony_ci} 6647bf215546Sopenharmony_ci 6648bf215546Sopenharmony_cistatic void 6649bf215546Sopenharmony_ciradv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count, 6650bf215546Sopenharmony_ci uint32_t use_opaque) 6651bf215546Sopenharmony_ci{ 6652bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating)); 6653bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, vertex_count); 6654bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque); 6655bf215546Sopenharmony_ci} 6656bf215546Sopenharmony_ci 6657bf215546Sopenharmony_ci/** 6658bf215546Sopenharmony_ci * Emit a PKT3_DRAW_INDEX_2 packet to render "index_count` vertices. 6659bf215546Sopenharmony_ci * 6660bf215546Sopenharmony_ci * The starting address "index_va" may point anywhere within the index buffer. The number of 6661bf215546Sopenharmony_ci * indexes allocated in the index buffer *past that point* is specified by "max_index_count". 6662bf215546Sopenharmony_ci * Hardware uses this information to return 0 for out-of-bounds reads. 6663bf215546Sopenharmony_ci */ 6664bf215546Sopenharmony_cistatic void 6665bf215546Sopenharmony_ciradv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t index_va, 6666bf215546Sopenharmony_ci uint32_t max_index_count, uint32_t index_count, bool not_eop) 6667bf215546Sopenharmony_ci{ 6668bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating)); 6669bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, max_index_count); 6670bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, index_va); 6671bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, index_va >> 32); 6672bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, index_count); 6673bf215546Sopenharmony_ci /* NOT_EOP allows merging multiple draws into 1 wave, but only user VGPRs 6674bf215546Sopenharmony_ci * can be changed between draws and GS fast launch must be disabled. 6675bf215546Sopenharmony_ci * NOT_EOP doesn't work on gfx9 and older. 6676bf215546Sopenharmony_ci */ 6677bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA | S_0287F0_NOT_EOP(not_eop)); 6678bf215546Sopenharmony_ci} 6679bf215546Sopenharmony_ci 6680bf215546Sopenharmony_ci/* MUST inline this function to avoid massive perf loss in drawoverhead */ 6681bf215546Sopenharmony_ciALWAYS_INLINE static void 6682bf215546Sopenharmony_ciradv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed, 6683bf215546Sopenharmony_ci uint32_t draw_count, uint64_t count_va, uint32_t stride) 6684bf215546Sopenharmony_ci{ 6685bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 6686bf215546Sopenharmony_ci const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX; 6687bf215546Sopenharmony_ci bool draw_id_enable = cmd_buffer->state.graphics_pipeline->uses_drawid; 6688bf215546Sopenharmony_ci uint32_t base_reg = cmd_buffer->state.graphics_pipeline->vtx_base_sgpr; 6689bf215546Sopenharmony_ci uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0; 6690bf215546Sopenharmony_ci bool predicating = cmd_buffer->state.predicating; 6691bf215546Sopenharmony_ci bool mesh = cmd_buffer->state.mesh_shading; 6692bf215546Sopenharmony_ci assert(base_reg); 6693bf215546Sopenharmony_ci 6694bf215546Sopenharmony_ci /* just reset draw state for vertex data */ 6695bf215546Sopenharmony_ci cmd_buffer->state.last_first_instance = -1; 6696bf215546Sopenharmony_ci cmd_buffer->state.last_num_instances = -1; 6697bf215546Sopenharmony_ci cmd_buffer->state.last_drawid = -1; 6698bf215546Sopenharmony_ci cmd_buffer->state.last_vertex_offset = -1; 6699bf215546Sopenharmony_ci 6700bf215546Sopenharmony_ci vertex_offset_reg = (base_reg - SI_SH_REG_OFFSET) >> 2; 6701bf215546Sopenharmony_ci if (cmd_buffer->state.graphics_pipeline->uses_baseinstance) 6702bf215546Sopenharmony_ci start_instance_reg = ((base_reg + (draw_id_enable ? 8 : 4)) - SI_SH_REG_OFFSET) >> 2; 6703bf215546Sopenharmony_ci if (draw_id_enable) 6704bf215546Sopenharmony_ci draw_id_reg = ((base_reg + mesh * 12 + 4) - SI_SH_REG_OFFSET) >> 2; 6705bf215546Sopenharmony_ci 6706bf215546Sopenharmony_ci if (draw_count == 1 && !count_va && !draw_id_enable) { 6707bf215546Sopenharmony_ci radeon_emit(cs, 6708bf215546Sopenharmony_ci PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating)); 6709bf215546Sopenharmony_ci radeon_emit(cs, 0); 6710bf215546Sopenharmony_ci radeon_emit(cs, vertex_offset_reg); 6711bf215546Sopenharmony_ci radeon_emit(cs, start_instance_reg); 6712bf215546Sopenharmony_ci radeon_emit(cs, di_src_sel); 6713bf215546Sopenharmony_ci } else { 6714bf215546Sopenharmony_ci radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : PKT3_DRAW_INDIRECT_MULTI, 8, 6715bf215546Sopenharmony_ci predicating)); 6716bf215546Sopenharmony_ci radeon_emit(cs, 0); 6717bf215546Sopenharmony_ci radeon_emit(cs, vertex_offset_reg); 6718bf215546Sopenharmony_ci radeon_emit(cs, start_instance_reg); 6719bf215546Sopenharmony_ci radeon_emit(cs, draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) | 6720bf215546Sopenharmony_ci S_2C3_COUNT_INDIRECT_ENABLE(!!count_va)); 6721bf215546Sopenharmony_ci radeon_emit(cs, draw_count); /* count */ 6722bf215546Sopenharmony_ci radeon_emit(cs, count_va); /* count_addr */ 6723bf215546Sopenharmony_ci radeon_emit(cs, count_va >> 32); 6724bf215546Sopenharmony_ci radeon_emit(cs, stride); /* stride */ 6725bf215546Sopenharmony_ci radeon_emit(cs, di_src_sel); 6726bf215546Sopenharmony_ci 6727bf215546Sopenharmony_ci cmd_buffer->state.uses_draw_indirect_multi = true; 6728bf215546Sopenharmony_ci } 6729bf215546Sopenharmony_ci} 6730bf215546Sopenharmony_ci 6731bf215546Sopenharmony_ciALWAYS_INLINE static void 6732bf215546Sopenharmony_ciradv_cs_emit_dispatch_taskmesh_direct_ace_packet(struct radv_cmd_buffer *cmd_buffer, 6733bf215546Sopenharmony_ci const uint32_t x, const uint32_t y, 6734bf215546Sopenharmony_ci const uint32_t z) 6735bf215546Sopenharmony_ci{ 6736bf215546Sopenharmony_ci struct radv_pipeline *pipeline = &cmd_buffer->state.graphics_pipeline->base; 6737bf215546Sopenharmony_ci struct radv_shader *compute_shader = radv_get_shader(pipeline, MESA_SHADER_TASK); 6738bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs; 6739bf215546Sopenharmony_ci const bool predicating = cmd_buffer->state.predicating; 6740bf215546Sopenharmony_ci const uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator_task | 6741bf215546Sopenharmony_ci S_00B800_CS_W32_EN(compute_shader->info.wave_size == 32); 6742bf215546Sopenharmony_ci 6743bf215546Sopenharmony_ci struct radv_userdata_info *ring_entry_loc = 6744bf215546Sopenharmony_ci radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_TASK_RING_ENTRY); 6745bf215546Sopenharmony_ci assert(ring_entry_loc && ring_entry_loc->sgpr_idx != -1 && ring_entry_loc->num_sgprs == 1); 6746bf215546Sopenharmony_ci 6747bf215546Sopenharmony_ci uint32_t ring_entry_reg = 6748bf215546Sopenharmony_ci (R_00B900_COMPUTE_USER_DATA_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; 6749bf215546Sopenharmony_ci 6750bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_DIRECT_ACE, 4, predicating) | PKT3_SHADER_TYPE_S(1)); 6751bf215546Sopenharmony_ci radeon_emit(cs, x); 6752bf215546Sopenharmony_ci radeon_emit(cs, y); 6753bf215546Sopenharmony_ci radeon_emit(cs, z); 6754bf215546Sopenharmony_ci radeon_emit(cs, dispatch_initiator); 6755bf215546Sopenharmony_ci radeon_emit(cs, ring_entry_reg & 0xFFFF); 6756bf215546Sopenharmony_ci} 6757bf215546Sopenharmony_ci 6758bf215546Sopenharmony_ciALWAYS_INLINE static void 6759bf215546Sopenharmony_ciradv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(struct radv_cmd_buffer *cmd_buffer, 6760bf215546Sopenharmony_ci uint64_t data_va, uint32_t draw_count, 6761bf215546Sopenharmony_ci uint64_t count_va, uint32_t stride) 6762bf215546Sopenharmony_ci{ 6763bf215546Sopenharmony_ci assert((data_va & 0x03) == 0); 6764bf215546Sopenharmony_ci assert((count_va & 0x03) == 0); 6765bf215546Sopenharmony_ci 6766bf215546Sopenharmony_ci struct radv_pipeline *pipeline = &cmd_buffer->state.graphics_pipeline->base; 6767bf215546Sopenharmony_ci struct radv_shader *compute_shader = radv_get_shader(pipeline, MESA_SHADER_TASK); 6768bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs; 6769bf215546Sopenharmony_ci 6770bf215546Sopenharmony_ci const uint32_t count_indirect_enable = !!count_va; 6771bf215546Sopenharmony_ci const uint32_t xyz_dim_enable = compute_shader->info.cs.uses_grid_size; 6772bf215546Sopenharmony_ci const uint32_t draw_id_enable = compute_shader->info.vs.needs_draw_id; 6773bf215546Sopenharmony_ci const uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator_task | 6774bf215546Sopenharmony_ci S_00B800_CS_W32_EN(compute_shader->info.wave_size == 32); 6775bf215546Sopenharmony_ci 6776bf215546Sopenharmony_ci const struct radv_userdata_info *ring_entry_loc = 6777bf215546Sopenharmony_ci radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_TASK_RING_ENTRY); 6778bf215546Sopenharmony_ci const struct radv_userdata_info *xyz_dim_loc = 6779bf215546Sopenharmony_ci radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_CS_GRID_SIZE); 6780bf215546Sopenharmony_ci const struct radv_userdata_info *draw_id_loc = 6781bf215546Sopenharmony_ci radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_CS_TASK_DRAW_ID); 6782bf215546Sopenharmony_ci 6783bf215546Sopenharmony_ci assert(ring_entry_loc->sgpr_idx != -1 && ring_entry_loc->num_sgprs == 1); 6784bf215546Sopenharmony_ci assert(!xyz_dim_enable || (xyz_dim_loc->sgpr_idx != -1 && xyz_dim_loc->num_sgprs == 3)); 6785bf215546Sopenharmony_ci assert(!draw_id_enable || (draw_id_loc->sgpr_idx != -1 && draw_id_loc->num_sgprs == 1)); 6786bf215546Sopenharmony_ci 6787bf215546Sopenharmony_ci const uint32_t ring_entry_reg = 6788bf215546Sopenharmony_ci (R_00B900_COMPUTE_USER_DATA_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; 6789bf215546Sopenharmony_ci const uint32_t xyz_dim_reg = 6790bf215546Sopenharmony_ci !xyz_dim_enable 6791bf215546Sopenharmony_ci ? 0 6792bf215546Sopenharmony_ci : (R_00B900_COMPUTE_USER_DATA_0 + xyz_dim_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; 6793bf215546Sopenharmony_ci const uint32_t draw_id_reg = 6794bf215546Sopenharmony_ci !draw_id_enable 6795bf215546Sopenharmony_ci ? 0 6796bf215546Sopenharmony_ci : (R_00B900_COMPUTE_USER_DATA_0 + draw_id_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2; 6797bf215546Sopenharmony_ci 6798bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_INDIRECT_MULTI_ACE, 9, 0) | PKT3_SHADER_TYPE_S(1)); 6799bf215546Sopenharmony_ci radeon_emit(cs, data_va); 6800bf215546Sopenharmony_ci radeon_emit(cs, data_va >> 32); 6801bf215546Sopenharmony_ci radeon_emit(cs, ring_entry_reg & 0xFFFF); 6802bf215546Sopenharmony_ci radeon_emit(cs, (count_indirect_enable << 1) | (draw_id_enable << 2) | (xyz_dim_enable << 3) | 6803bf215546Sopenharmony_ci (draw_id_reg << 16)); 6804bf215546Sopenharmony_ci radeon_emit(cs, xyz_dim_reg & 0xFFFF); 6805bf215546Sopenharmony_ci radeon_emit(cs, draw_count); 6806bf215546Sopenharmony_ci radeon_emit(cs, count_va); 6807bf215546Sopenharmony_ci radeon_emit(cs, count_va >> 32); 6808bf215546Sopenharmony_ci radeon_emit(cs, stride); 6809bf215546Sopenharmony_ci radeon_emit(cs, dispatch_initiator); 6810bf215546Sopenharmony_ci} 6811bf215546Sopenharmony_ci 6812bf215546Sopenharmony_ciALWAYS_INLINE static void 6813bf215546Sopenharmony_ciradv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer) 6814bf215546Sopenharmony_ci{ 6815bf215546Sopenharmony_ci struct radv_pipeline *pipeline = &cmd_buffer->state.graphics_pipeline->base; 6816bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 6817bf215546Sopenharmony_ci bool predicating = cmd_buffer->state.predicating; 6818bf215546Sopenharmony_ci 6819bf215546Sopenharmony_ci struct radv_userdata_info *ring_entry_loc = 6820bf215546Sopenharmony_ci radv_lookup_user_sgpr(pipeline, MESA_SHADER_MESH, AC_UD_TASK_RING_ENTRY); 6821bf215546Sopenharmony_ci 6822bf215546Sopenharmony_ci assert(ring_entry_loc && ring_entry_loc->sgpr_idx != -1); 6823bf215546Sopenharmony_ci 6824bf215546Sopenharmony_ci uint32_t base_reg = cmd_buffer->state.graphics_pipeline->vtx_base_sgpr; 6825bf215546Sopenharmony_ci uint32_t xyz_dim_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2; 6826bf215546Sopenharmony_ci uint32_t ring_entry_reg = ((base_reg + ring_entry_loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2; 6827bf215546Sopenharmony_ci 6828bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating)); 6829bf215546Sopenharmony_ci radeon_emit(cs, (ring_entry_reg << 16) | (xyz_dim_reg & 0xFFFF)); 6830bf215546Sopenharmony_ci radeon_emit(cs, 0); 6831bf215546Sopenharmony_ci radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX); 6832bf215546Sopenharmony_ci} 6833bf215546Sopenharmony_ci 6834bf215546Sopenharmony_cistatic inline void 6835bf215546Sopenharmony_ciradv_emit_userdata_vertex_internal(struct radv_cmd_buffer *cmd_buffer, 6836bf215546Sopenharmony_ci const struct radv_draw_info *info, const uint32_t vertex_offset) 6837bf215546Sopenharmony_ci{ 6838bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 6839bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 6840bf215546Sopenharmony_ci const bool uses_baseinstance = state->graphics_pipeline->uses_baseinstance; 6841bf215546Sopenharmony_ci const bool uses_drawid = state->graphics_pipeline->uses_drawid; 6842bf215546Sopenharmony_ci 6843bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, state->graphics_pipeline->vtx_base_sgpr, state->graphics_pipeline->vtx_emit_num); 6844bf215546Sopenharmony_ci 6845bf215546Sopenharmony_ci radeon_emit(cs, vertex_offset); 6846bf215546Sopenharmony_ci state->last_vertex_offset = vertex_offset; 6847bf215546Sopenharmony_ci if (uses_drawid) { 6848bf215546Sopenharmony_ci radeon_emit(cs, 0); 6849bf215546Sopenharmony_ci state->last_drawid = 0; 6850bf215546Sopenharmony_ci } 6851bf215546Sopenharmony_ci if (uses_baseinstance) { 6852bf215546Sopenharmony_ci radeon_emit(cs, info->first_instance); 6853bf215546Sopenharmony_ci state->last_first_instance = info->first_instance; 6854bf215546Sopenharmony_ci } 6855bf215546Sopenharmony_ci} 6856bf215546Sopenharmony_ci 6857bf215546Sopenharmony_ciALWAYS_INLINE static void 6858bf215546Sopenharmony_ciradv_emit_userdata_vertex(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, 6859bf215546Sopenharmony_ci const uint32_t vertex_offset) 6860bf215546Sopenharmony_ci{ 6861bf215546Sopenharmony_ci const struct radv_cmd_state *state = &cmd_buffer->state; 6862bf215546Sopenharmony_ci const bool uses_baseinstance = state->graphics_pipeline->uses_baseinstance; 6863bf215546Sopenharmony_ci const bool uses_drawid = state->graphics_pipeline->uses_drawid; 6864bf215546Sopenharmony_ci 6865bf215546Sopenharmony_ci /* this looks very dumb, but it allows the compiler to optimize better and yields 6866bf215546Sopenharmony_ci * ~3-4% perf increase in drawoverhead 6867bf215546Sopenharmony_ci */ 6868bf215546Sopenharmony_ci if (vertex_offset != state->last_vertex_offset) { 6869bf215546Sopenharmony_ci radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset); 6870bf215546Sopenharmony_ci } else if (uses_drawid && 0 != state->last_drawid) { 6871bf215546Sopenharmony_ci radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset); 6872bf215546Sopenharmony_ci } else if (uses_baseinstance && info->first_instance != state->last_first_instance) { 6873bf215546Sopenharmony_ci radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset); 6874bf215546Sopenharmony_ci } 6875bf215546Sopenharmony_ci} 6876bf215546Sopenharmony_ci 6877bf215546Sopenharmony_ciALWAYS_INLINE static void 6878bf215546Sopenharmony_ciradv_emit_userdata_vertex_drawid(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_offset, uint32_t drawid) 6879bf215546Sopenharmony_ci{ 6880bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 6881bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 6882bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, state->graphics_pipeline->vtx_base_sgpr, 1 + !!drawid); 6883bf215546Sopenharmony_ci radeon_emit(cs, vertex_offset); 6884bf215546Sopenharmony_ci state->last_vertex_offset = vertex_offset; 6885bf215546Sopenharmony_ci if (drawid) 6886bf215546Sopenharmony_ci radeon_emit(cs, drawid); 6887bf215546Sopenharmony_ci 6888bf215546Sopenharmony_ci} 6889bf215546Sopenharmony_ci 6890bf215546Sopenharmony_ciALWAYS_INLINE static void 6891bf215546Sopenharmony_ciradv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer, 6892bf215546Sopenharmony_ci const uint32_t x, const uint32_t y, const uint32_t z, 6893bf215546Sopenharmony_ci const uint32_t first_task) 6894bf215546Sopenharmony_ci{ 6895bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 6896bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 6897bf215546Sopenharmony_ci const bool uses_drawid = state->graphics_pipeline->uses_drawid; 6898bf215546Sopenharmony_ci 6899bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, state->graphics_pipeline->vtx_base_sgpr, state->graphics_pipeline->vtx_emit_num); 6900bf215546Sopenharmony_ci radeon_emit(cs, first_task); 6901bf215546Sopenharmony_ci radeon_emit(cs, x); 6902bf215546Sopenharmony_ci radeon_emit(cs, y); 6903bf215546Sopenharmony_ci radeon_emit(cs, z); 6904bf215546Sopenharmony_ci 6905bf215546Sopenharmony_ci if (uses_drawid) { 6906bf215546Sopenharmony_ci radeon_emit(cs, 0); 6907bf215546Sopenharmony_ci state->last_drawid = 0; 6908bf215546Sopenharmony_ci } 6909bf215546Sopenharmony_ci} 6910bf215546Sopenharmony_ci 6911bf215546Sopenharmony_ciALWAYS_INLINE static void 6912bf215546Sopenharmony_ciradv_emit_userdata_mesh_first_task_0_draw_id_0(struct radv_cmd_buffer *cmd_buffer) 6913bf215546Sopenharmony_ci{ 6914bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 6915bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 6916bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = state->graphics_pipeline; 6917bf215546Sopenharmony_ci const bool uses_drawid = pipeline->uses_drawid; 6918bf215546Sopenharmony_ci 6919bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, pipeline->vtx_base_sgpr, 1); 6920bf215546Sopenharmony_ci radeon_emit(cs, 0); 6921bf215546Sopenharmony_ci 6922bf215546Sopenharmony_ci if (uses_drawid) { 6923bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, pipeline->vtx_base_sgpr + (pipeline->vtx_emit_num - 1) * 4, 1); 6924bf215546Sopenharmony_ci radeon_emit(cs, 0); 6925bf215546Sopenharmony_ci } 6926bf215546Sopenharmony_ci} 6927bf215546Sopenharmony_ci 6928bf215546Sopenharmony_ciALWAYS_INLINE static void 6929bf215546Sopenharmony_ciradv_emit_userdata_task_ib_only(struct radv_cmd_buffer *cmd_buffer, uint64_t ib_va, 6930bf215546Sopenharmony_ci uint32_t ib_stride) 6931bf215546Sopenharmony_ci{ 6932bf215546Sopenharmony_ci struct radv_pipeline *pipeline = &cmd_buffer->state.graphics_pipeline->base; 6933bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs; 6934bf215546Sopenharmony_ci 6935bf215546Sopenharmony_ci struct radv_userdata_info *task_ib_loc = 6936bf215546Sopenharmony_ci radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_CS_TASK_IB); 6937bf215546Sopenharmony_ci 6938bf215546Sopenharmony_ci if (task_ib_loc->sgpr_idx != -1) { 6939bf215546Sopenharmony_ci assert(task_ib_loc->num_sgprs == 3); 6940bf215546Sopenharmony_ci unsigned task_ib_reg = R_00B900_COMPUTE_USER_DATA_0 + task_ib_loc->sgpr_idx * 4; 6941bf215546Sopenharmony_ci 6942bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, task_ib_reg, 3); 6943bf215546Sopenharmony_ci radeon_emit(cs, ib_va); 6944bf215546Sopenharmony_ci radeon_emit(cs, ib_va >> 32); 6945bf215546Sopenharmony_ci radeon_emit(cs, ib_stride); 6946bf215546Sopenharmony_ci } 6947bf215546Sopenharmony_ci} 6948bf215546Sopenharmony_ci 6949bf215546Sopenharmony_ciALWAYS_INLINE static void 6950bf215546Sopenharmony_ciradv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z, 6951bf215546Sopenharmony_ci uint32_t draw_id, uint32_t first_task, uint64_t ib_va) 6952bf215546Sopenharmony_ci{ 6953bf215546Sopenharmony_ci struct radv_pipeline *pipeline = &cmd_buffer->state.graphics_pipeline->base; 6954bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs; 6955bf215546Sopenharmony_ci 6956bf215546Sopenharmony_ci struct radv_userdata_info *xyz_loc = 6957bf215546Sopenharmony_ci radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_CS_GRID_SIZE); 6958bf215546Sopenharmony_ci struct radv_userdata_info *draw_id_loc = 6959bf215546Sopenharmony_ci radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_CS_TASK_DRAW_ID); 6960bf215546Sopenharmony_ci 6961bf215546Sopenharmony_ci if (xyz_loc->sgpr_idx != -1) { 6962bf215546Sopenharmony_ci assert(xyz_loc->num_sgprs == 3); 6963bf215546Sopenharmony_ci unsigned xyz_reg = R_00B900_COMPUTE_USER_DATA_0 + xyz_loc->sgpr_idx * 4; 6964bf215546Sopenharmony_ci 6965bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, xyz_reg, 3); 6966bf215546Sopenharmony_ci radeon_emit(cs, x); 6967bf215546Sopenharmony_ci radeon_emit(cs, y); 6968bf215546Sopenharmony_ci radeon_emit(cs, z); 6969bf215546Sopenharmony_ci } 6970bf215546Sopenharmony_ci 6971bf215546Sopenharmony_ci if (draw_id_loc->sgpr_idx != -1) { 6972bf215546Sopenharmony_ci assert(draw_id_loc->num_sgprs == 1); 6973bf215546Sopenharmony_ci unsigned draw_id_reg = R_00B900_COMPUTE_USER_DATA_0 + draw_id_loc->sgpr_idx * 4; 6974bf215546Sopenharmony_ci 6975bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, draw_id_reg, 1); 6976bf215546Sopenharmony_ci radeon_emit(cs, draw_id); 6977bf215546Sopenharmony_ci } 6978bf215546Sopenharmony_ci 6979bf215546Sopenharmony_ci radv_emit_userdata_task_ib_only(cmd_buffer, ib_va, first_task ? 8 : 0); 6980bf215546Sopenharmony_ci} 6981bf215546Sopenharmony_ci 6982bf215546Sopenharmony_ciALWAYS_INLINE static void 6983bf215546Sopenharmony_ciradv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer, 6984bf215546Sopenharmony_ci const struct radv_draw_info *info, 6985bf215546Sopenharmony_ci uint32_t drawCount, const VkMultiDrawIndexedInfoEXT *minfo, 6986bf215546Sopenharmony_ci uint32_t stride, 6987bf215546Sopenharmony_ci const int32_t *vertexOffset) 6988bf215546Sopenharmony_ci 6989bf215546Sopenharmony_ci{ 6990bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 6991bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 6992bf215546Sopenharmony_ci const int index_size = radv_get_vgt_index_size(state->index_type); 6993bf215546Sopenharmony_ci unsigned i = 0; 6994bf215546Sopenharmony_ci const bool uses_drawid = state->graphics_pipeline->uses_drawid; 6995bf215546Sopenharmony_ci const bool can_eop = 6996bf215546Sopenharmony_ci !uses_drawid && cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10; 6997bf215546Sopenharmony_ci 6998bf215546Sopenharmony_ci if (uses_drawid) { 6999bf215546Sopenharmony_ci if (vertexOffset) { 7000bf215546Sopenharmony_ci radv_emit_userdata_vertex(cmd_buffer, info, *vertexOffset); 7001bf215546Sopenharmony_ci vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) { 7002bf215546Sopenharmony_ci const uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex; 7003bf215546Sopenharmony_ci 7004bf215546Sopenharmony_ci /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */ 7005bf215546Sopenharmony_ci if (!remaining_indexes && 7006bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) 7007bf215546Sopenharmony_ci continue; 7008bf215546Sopenharmony_ci 7009bf215546Sopenharmony_ci if (i > 0) 7010bf215546Sopenharmony_ci radeon_set_sh_reg(cs, state->graphics_pipeline->vtx_base_sgpr + sizeof(uint32_t), i); 7011bf215546Sopenharmony_ci 7012bf215546Sopenharmony_ci const uint64_t index_va = state->index_va + draw->firstIndex * index_size; 7013bf215546Sopenharmony_ci 7014bf215546Sopenharmony_ci if (!state->subpass->view_mask) { 7015bf215546Sopenharmony_ci radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); 7016bf215546Sopenharmony_ci } else { 7017bf215546Sopenharmony_ci u_foreach_bit(view, state->subpass->view_mask) { 7018bf215546Sopenharmony_ci radv_emit_view_index(cmd_buffer, view); 7019bf215546Sopenharmony_ci 7020bf215546Sopenharmony_ci radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); 7021bf215546Sopenharmony_ci } 7022bf215546Sopenharmony_ci } 7023bf215546Sopenharmony_ci } 7024bf215546Sopenharmony_ci } else { 7025bf215546Sopenharmony_ci vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) { 7026bf215546Sopenharmony_ci const uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex; 7027bf215546Sopenharmony_ci 7028bf215546Sopenharmony_ci /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */ 7029bf215546Sopenharmony_ci if (!remaining_indexes && 7030bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) 7031bf215546Sopenharmony_ci continue; 7032bf215546Sopenharmony_ci 7033bf215546Sopenharmony_ci if (i > 0) { 7034bf215546Sopenharmony_ci if (state->last_vertex_offset != draw->vertexOffset) 7035bf215546Sopenharmony_ci radv_emit_userdata_vertex_drawid(cmd_buffer, draw->vertexOffset, i); 7036bf215546Sopenharmony_ci else 7037bf215546Sopenharmony_ci radeon_set_sh_reg(cs, state->graphics_pipeline->vtx_base_sgpr + sizeof(uint32_t), i); 7038bf215546Sopenharmony_ci } else 7039bf215546Sopenharmony_ci radv_emit_userdata_vertex(cmd_buffer, info, draw->vertexOffset); 7040bf215546Sopenharmony_ci 7041bf215546Sopenharmony_ci const uint64_t index_va = state->index_va + draw->firstIndex * index_size; 7042bf215546Sopenharmony_ci 7043bf215546Sopenharmony_ci if (!state->subpass->view_mask) { 7044bf215546Sopenharmony_ci radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); 7045bf215546Sopenharmony_ci } else { 7046bf215546Sopenharmony_ci u_foreach_bit(view, state->subpass->view_mask) { 7047bf215546Sopenharmony_ci radv_emit_view_index(cmd_buffer, view); 7048bf215546Sopenharmony_ci 7049bf215546Sopenharmony_ci radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); 7050bf215546Sopenharmony_ci } 7051bf215546Sopenharmony_ci } 7052bf215546Sopenharmony_ci } 7053bf215546Sopenharmony_ci } 7054bf215546Sopenharmony_ci if (drawCount > 1) { 7055bf215546Sopenharmony_ci state->last_drawid = drawCount - 1; 7056bf215546Sopenharmony_ci } 7057bf215546Sopenharmony_ci } else { 7058bf215546Sopenharmony_ci if (vertexOffset) { 7059bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10) { 7060bf215546Sopenharmony_ci /* GFX10 has a bug that consecutive draw packets with NOT_EOP must not have 7061bf215546Sopenharmony_ci * count == 0 for the last draw that doesn't have NOT_EOP. 7062bf215546Sopenharmony_ci */ 7063bf215546Sopenharmony_ci while (drawCount > 1) { 7064bf215546Sopenharmony_ci const VkMultiDrawIndexedInfoEXT *last = (const VkMultiDrawIndexedInfoEXT*)(((const uint8_t*)minfo) + (drawCount - 1) * stride); 7065bf215546Sopenharmony_ci if (last->indexCount) 7066bf215546Sopenharmony_ci break; 7067bf215546Sopenharmony_ci drawCount--; 7068bf215546Sopenharmony_ci } 7069bf215546Sopenharmony_ci } 7070bf215546Sopenharmony_ci 7071bf215546Sopenharmony_ci radv_emit_userdata_vertex(cmd_buffer, info, *vertexOffset); 7072bf215546Sopenharmony_ci vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) { 7073bf215546Sopenharmony_ci const uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex; 7074bf215546Sopenharmony_ci 7075bf215546Sopenharmony_ci /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */ 7076bf215546Sopenharmony_ci if (!remaining_indexes && 7077bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) 7078bf215546Sopenharmony_ci continue; 7079bf215546Sopenharmony_ci 7080bf215546Sopenharmony_ci const uint64_t index_va = state->index_va + draw->firstIndex * index_size; 7081bf215546Sopenharmony_ci 7082bf215546Sopenharmony_ci if (!state->subpass->view_mask) { 7083bf215546Sopenharmony_ci radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, can_eop && i < drawCount - 1); 7084bf215546Sopenharmony_ci } else { 7085bf215546Sopenharmony_ci u_foreach_bit(view, state->subpass->view_mask) { 7086bf215546Sopenharmony_ci radv_emit_view_index(cmd_buffer, view); 7087bf215546Sopenharmony_ci 7088bf215546Sopenharmony_ci radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); 7089bf215546Sopenharmony_ci } 7090bf215546Sopenharmony_ci } 7091bf215546Sopenharmony_ci } 7092bf215546Sopenharmony_ci } else { 7093bf215546Sopenharmony_ci vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) { 7094bf215546Sopenharmony_ci const uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex; 7095bf215546Sopenharmony_ci 7096bf215546Sopenharmony_ci /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */ 7097bf215546Sopenharmony_ci if (!remaining_indexes && 7098bf215546Sopenharmony_ci cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) 7099bf215546Sopenharmony_ci continue; 7100bf215546Sopenharmony_ci 7101bf215546Sopenharmony_ci const VkMultiDrawIndexedInfoEXT *next = (const VkMultiDrawIndexedInfoEXT*)(i < drawCount - 1 ? ((uint8_t*)draw + stride) : NULL); 7102bf215546Sopenharmony_ci const bool offset_changes = next && next->vertexOffset != draw->vertexOffset; 7103bf215546Sopenharmony_ci radv_emit_userdata_vertex(cmd_buffer, info, draw->vertexOffset); 7104bf215546Sopenharmony_ci 7105bf215546Sopenharmony_ci const uint64_t index_va = state->index_va + draw->firstIndex * index_size; 7106bf215546Sopenharmony_ci 7107bf215546Sopenharmony_ci if (!state->subpass->view_mask) { 7108bf215546Sopenharmony_ci radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, can_eop && !offset_changes && i < drawCount - 1); 7109bf215546Sopenharmony_ci } else { 7110bf215546Sopenharmony_ci u_foreach_bit(view, state->subpass->view_mask) { 7111bf215546Sopenharmony_ci radv_emit_view_index(cmd_buffer, view); 7112bf215546Sopenharmony_ci 7113bf215546Sopenharmony_ci radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false); 7114bf215546Sopenharmony_ci } 7115bf215546Sopenharmony_ci } 7116bf215546Sopenharmony_ci } 7117bf215546Sopenharmony_ci } 7118bf215546Sopenharmony_ci if (drawCount > 1) { 7119bf215546Sopenharmony_ci state->last_drawid = drawCount - 1; 7120bf215546Sopenharmony_ci } 7121bf215546Sopenharmony_ci } 7122bf215546Sopenharmony_ci} 7123bf215546Sopenharmony_ci 7124bf215546Sopenharmony_ciALWAYS_INLINE static void 7125bf215546Sopenharmony_ciradv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, 7126bf215546Sopenharmony_ci uint32_t drawCount, const VkMultiDrawInfoEXT *minfo, 7127bf215546Sopenharmony_ci uint32_t use_opaque, uint32_t stride) 7128bf215546Sopenharmony_ci{ 7129bf215546Sopenharmony_ci unsigned i = 0; 7130bf215546Sopenharmony_ci const uint32_t view_mask = cmd_buffer->state.subpass->view_mask; 7131bf215546Sopenharmony_ci const bool uses_drawid = cmd_buffer->state.graphics_pipeline->uses_drawid; 7132bf215546Sopenharmony_ci uint32_t last_start = 0; 7133bf215546Sopenharmony_ci 7134bf215546Sopenharmony_ci vk_foreach_multi_draw(draw, i, minfo, drawCount, stride) { 7135bf215546Sopenharmony_ci if (!i) 7136bf215546Sopenharmony_ci radv_emit_userdata_vertex(cmd_buffer, info, draw->firstVertex); 7137bf215546Sopenharmony_ci else 7138bf215546Sopenharmony_ci radv_emit_userdata_vertex_drawid(cmd_buffer, draw->firstVertex, uses_drawid ? i : 0); 7139bf215546Sopenharmony_ci 7140bf215546Sopenharmony_ci if (!view_mask) { 7141bf215546Sopenharmony_ci radv_cs_emit_draw_packet(cmd_buffer, draw->vertexCount, use_opaque); 7142bf215546Sopenharmony_ci } else { 7143bf215546Sopenharmony_ci u_foreach_bit(view, view_mask) { 7144bf215546Sopenharmony_ci radv_emit_view_index(cmd_buffer, view); 7145bf215546Sopenharmony_ci radv_cs_emit_draw_packet(cmd_buffer, draw->vertexCount, use_opaque); 7146bf215546Sopenharmony_ci } 7147bf215546Sopenharmony_ci } 7148bf215546Sopenharmony_ci last_start = draw->firstVertex; 7149bf215546Sopenharmony_ci } 7150bf215546Sopenharmony_ci if (drawCount > 1) { 7151bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 7152bf215546Sopenharmony_ci state->last_vertex_offset = last_start; 7153bf215546Sopenharmony_ci if (uses_drawid) 7154bf215546Sopenharmony_ci state->last_drawid = drawCount - 1; 7155bf215546Sopenharmony_ci } 7156bf215546Sopenharmony_ci} 7157bf215546Sopenharmony_ci 7158bf215546Sopenharmony_ciALWAYS_INLINE static void 7159bf215546Sopenharmony_ciradv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer, 7160bf215546Sopenharmony_ci uint32_t x, uint32_t y, uint32_t z, 7161bf215546Sopenharmony_ci uint32_t first_task) 7162bf215546Sopenharmony_ci{ 7163bf215546Sopenharmony_ci const uint32_t view_mask = cmd_buffer->state.subpass->view_mask; 7164bf215546Sopenharmony_ci const uint32_t count = x * y * z; 7165bf215546Sopenharmony_ci 7166bf215546Sopenharmony_ci radv_emit_userdata_mesh(cmd_buffer, x, y, z, first_task); 7167bf215546Sopenharmony_ci 7168bf215546Sopenharmony_ci if (!view_mask) { 7169bf215546Sopenharmony_ci radv_cs_emit_draw_packet(cmd_buffer, count, 0); 7170bf215546Sopenharmony_ci } else { 7171bf215546Sopenharmony_ci u_foreach_bit(view, view_mask) { 7172bf215546Sopenharmony_ci radv_emit_view_index(cmd_buffer, view); 7173bf215546Sopenharmony_ci radv_cs_emit_draw_packet(cmd_buffer, count, 0); 7174bf215546Sopenharmony_ci } 7175bf215546Sopenharmony_ci } 7176bf215546Sopenharmony_ci} 7177bf215546Sopenharmony_ci 7178bf215546Sopenharmony_ciALWAYS_INLINE static void 7179bf215546Sopenharmony_ciradv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, 7180bf215546Sopenharmony_ci uint32_t z, uint32_t first_task) 7181bf215546Sopenharmony_ci{ 7182bf215546Sopenharmony_ci uint64_t fake_ib_va = 0; 7183bf215546Sopenharmony_ci const uint32_t view_mask = cmd_buffer->state.subpass->view_mask; 7184bf215546Sopenharmony_ci const unsigned num_views = MAX2(1, util_bitcount(view_mask)); 7185bf215546Sopenharmony_ci unsigned ace_predication_size = num_views * 6; /* DISPATCH_TASKMESH_DIRECT_ACE size */ 7186bf215546Sopenharmony_ci 7187bf215546Sopenharmony_ci if (first_task) { 7188bf215546Sopenharmony_ci /* Pass this as the IB to the shader for emulating firstTask in task shaders. */ 7189bf215546Sopenharmony_ci uint32_t fake_ib_dwords[2] = {x, first_task}; 7190bf215546Sopenharmony_ci unsigned fake_ib_offset; 7191bf215546Sopenharmony_ci radv_cmd_buffer_upload_data(cmd_buffer, 8, fake_ib_dwords, &fake_ib_offset); 7192bf215546Sopenharmony_ci fake_ib_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + fake_ib_offset; 7193bf215546Sopenharmony_ci } 7194bf215546Sopenharmony_ci 7195bf215546Sopenharmony_ci radv_emit_userdata_task(cmd_buffer, x, y, z, 0, first_task, fake_ib_va); 7196bf215546Sopenharmony_ci radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer); 7197bf215546Sopenharmony_ci radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->ace_internal.cs, 7198bf215546Sopenharmony_ci cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted, 7199bf215546Sopenharmony_ci ace_predication_size); 7200bf215546Sopenharmony_ci 7201bf215546Sopenharmony_ci if (!view_mask) { 7202bf215546Sopenharmony_ci radv_cs_emit_dispatch_taskmesh_direct_ace_packet(cmd_buffer, x, y, z); 7203bf215546Sopenharmony_ci radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer); 7204bf215546Sopenharmony_ci } else { 7205bf215546Sopenharmony_ci u_foreach_bit (view, view_mask) { 7206bf215546Sopenharmony_ci radv_emit_view_index(cmd_buffer, view); 7207bf215546Sopenharmony_ci radv_cs_emit_dispatch_taskmesh_direct_ace_packet(cmd_buffer, x, y, z); 7208bf215546Sopenharmony_ci radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer); 7209bf215546Sopenharmony_ci } 7210bf215546Sopenharmony_ci } 7211bf215546Sopenharmony_ci} 7212bf215546Sopenharmony_ci 7213bf215546Sopenharmony_cistatic void 7214bf215546Sopenharmony_ciradv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, 7215bf215546Sopenharmony_ci const struct radv_draw_info *info, uint64_t nv_ib_va, 7216bf215546Sopenharmony_ci uint32_t nv_ib_stride) 7217bf215546Sopenharmony_ci{ 7218bf215546Sopenharmony_ci const uint32_t view_mask = cmd_buffer->state.subpass->view_mask; 7219bf215546Sopenharmony_ci struct radeon_winsys *ws = cmd_buffer->device->ws; 7220bf215546Sopenharmony_ci const unsigned num_views = MAX2(1, util_bitcount(view_mask)); 7221bf215546Sopenharmony_ci unsigned ace_predication_size = num_views * 11; /* DISPATCH_TASKMESH_INDIRECT_MULTI_ACE size */ 7222bf215546Sopenharmony_ci struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; 7223bf215546Sopenharmony_ci 7224bf215546Sopenharmony_ci const uint64_t va = 7225bf215546Sopenharmony_ci radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset; 7226bf215546Sopenharmony_ci const uint64_t count_va = !info->count_buffer 7227bf215546Sopenharmony_ci ? 0 7228bf215546Sopenharmony_ci : radv_buffer_get_va(info->count_buffer->bo) + 7229bf215546Sopenharmony_ci info->count_buffer->offset + info->count_buffer_offset; 7230bf215546Sopenharmony_ci uint64_t workaround_cond_va = 0; 7231bf215546Sopenharmony_ci 7232bf215546Sopenharmony_ci if (count_va) { 7233bf215546Sopenharmony_ci radv_cs_add_buffer(ws, cmd_buffer->ace_internal.cs, info->count_buffer->bo); 7234bf215546Sopenharmony_ci 7235bf215546Sopenharmony_ci /* MEC firmware bug workaround. 7236bf215546Sopenharmony_ci * When the count buffer contains zero, DISPATCH_TASKMESH_INDIRECT_MULTI_ACE hangs. 7237bf215546Sopenharmony_ci * - We must ensure that DISPATCH_TASKMESH_INDIRECT_MULTI_ACE 7238bf215546Sopenharmony_ci * is only executed when the count buffer contains non-zero. 7239bf215546Sopenharmony_ci * - Furthermore, we must also ensure that each DISPATCH_TASKMESH_GFX packet 7240bf215546Sopenharmony_ci * has a matching ACE packet. 7241bf215546Sopenharmony_ci * 7242bf215546Sopenharmony_ci * As a workaround: 7243bf215546Sopenharmony_ci * - Reserve a dword in the upload buffer and initialize it to 1 for the workaround 7244bf215546Sopenharmony_ci * - When count != 0, write 0 to the workaround BO and execute the indirect dispatch 7245bf215546Sopenharmony_ci * - When workaround BO != 0 (count was 0), execute an empty direct dispatch 7246bf215546Sopenharmony_ci */ 7247bf215546Sopenharmony_ci 7248bf215546Sopenharmony_ci uint32_t workaround_cond_init = 0; 7249bf215546Sopenharmony_ci uint32_t workaround_cond_off; 7250bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_data(cmd_buffer, 4, &workaround_cond_init, &workaround_cond_off)) 7251bf215546Sopenharmony_ci cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY; 7252bf215546Sopenharmony_ci 7253bf215546Sopenharmony_ci workaround_cond_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + workaround_cond_off; 7254bf215546Sopenharmony_ci 7255bf215546Sopenharmony_ci radeon_emit(ace_cs, PKT3(PKT3_COPY_DATA, 4, 0)); 7256bf215546Sopenharmony_ci radeon_emit(ace_cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 7257bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 7258bf215546Sopenharmony_ci radeon_emit(ace_cs, 1); 7259bf215546Sopenharmony_ci radeon_emit(ace_cs, 0); 7260bf215546Sopenharmony_ci radeon_emit(ace_cs, workaround_cond_va); 7261bf215546Sopenharmony_ci radeon_emit(ace_cs, workaround_cond_va >> 32); 7262bf215546Sopenharmony_ci 7263bf215546Sopenharmony_ci /* 2x COND_EXEC + 1x COPY_DATA + Nx DISPATCH_TASKMESH_DIRECT_ACE */ 7264bf215546Sopenharmony_ci ace_predication_size += 2 * 5 + 6 + 6 * num_views; 7265bf215546Sopenharmony_ci } 7266bf215546Sopenharmony_ci 7267bf215546Sopenharmony_ci radv_cs_add_buffer(ws, cmd_buffer->ace_internal.cs, info->indirect->bo); 7268bf215546Sopenharmony_ci radv_emit_userdata_task_ib_only(cmd_buffer, nv_ib_va, nv_ib_stride); 7269bf215546Sopenharmony_ci radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer); 7270bf215546Sopenharmony_ci radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->ace_internal.cs, 7271bf215546Sopenharmony_ci cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted, 7272bf215546Sopenharmony_ci ace_predication_size); 7273bf215546Sopenharmony_ci 7274bf215546Sopenharmony_ci if (workaround_cond_va) { 7275bf215546Sopenharmony_ci radeon_emit(ace_cs, PKT3(PKT3_COND_EXEC, 3, 0)); 7276bf215546Sopenharmony_ci radeon_emit(ace_cs, count_va); 7277bf215546Sopenharmony_ci radeon_emit(ace_cs, count_va >> 32); 7278bf215546Sopenharmony_ci radeon_emit(ace_cs, 0); 7279bf215546Sopenharmony_ci radeon_emit(ace_cs, 7280bf215546Sopenharmony_ci 6 + 11 * num_views); /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */ 7281bf215546Sopenharmony_ci 7282bf215546Sopenharmony_ci radeon_emit(ace_cs, PKT3(PKT3_COPY_DATA, 4, 0)); 7283bf215546Sopenharmony_ci radeon_emit(ace_cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 7284bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 7285bf215546Sopenharmony_ci radeon_emit(ace_cs, 0); 7286bf215546Sopenharmony_ci radeon_emit(ace_cs, 0); 7287bf215546Sopenharmony_ci radeon_emit(ace_cs, workaround_cond_va); 7288bf215546Sopenharmony_ci radeon_emit(ace_cs, workaround_cond_va >> 32); 7289bf215546Sopenharmony_ci } 7290bf215546Sopenharmony_ci 7291bf215546Sopenharmony_ci if (!view_mask) { 7292bf215546Sopenharmony_ci radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count, 7293bf215546Sopenharmony_ci count_va, info->stride); 7294bf215546Sopenharmony_ci radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer); 7295bf215546Sopenharmony_ci } else { 7296bf215546Sopenharmony_ci u_foreach_bit (view, view_mask) { 7297bf215546Sopenharmony_ci radv_emit_view_index(cmd_buffer, view); 7298bf215546Sopenharmony_ci radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count, 7299bf215546Sopenharmony_ci count_va, info->stride); 7300bf215546Sopenharmony_ci radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer); 7301bf215546Sopenharmony_ci } 7302bf215546Sopenharmony_ci } 7303bf215546Sopenharmony_ci 7304bf215546Sopenharmony_ci if (workaround_cond_va) { 7305bf215546Sopenharmony_ci radeon_emit(ace_cs, PKT3(PKT3_COND_EXEC, 3, 0)); 7306bf215546Sopenharmony_ci radeon_emit(ace_cs, workaround_cond_va); 7307bf215546Sopenharmony_ci radeon_emit(ace_cs, workaround_cond_va >> 32); 7308bf215546Sopenharmony_ci radeon_emit(ace_cs, 0); 7309bf215546Sopenharmony_ci radeon_emit(ace_cs, 6 * num_views); /* Nx DISPATCH_TASKMESH_DIRECT_ACE */ 7310bf215546Sopenharmony_ci 7311bf215546Sopenharmony_ci for (unsigned v = 0; v < num_views; ++v) { 7312bf215546Sopenharmony_ci radv_cs_emit_dispatch_taskmesh_direct_ace_packet(cmd_buffer, 0, 0, 0); 7313bf215546Sopenharmony_ci } 7314bf215546Sopenharmony_ci } 7315bf215546Sopenharmony_ci} 7316bf215546Sopenharmony_ci 7317bf215546Sopenharmony_cistatic void 7318bf215546Sopenharmony_ciradv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer, 7319bf215546Sopenharmony_ci const struct radv_draw_info *info) 7320bf215546Sopenharmony_ci{ 7321bf215546Sopenharmony_ci const struct radv_cmd_state *state = &cmd_buffer->state; 7322bf215546Sopenharmony_ci struct radeon_winsys *ws = cmd_buffer->device->ws; 7323bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 7324bf215546Sopenharmony_ci const uint64_t va = 7325bf215546Sopenharmony_ci radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset; 7326bf215546Sopenharmony_ci const uint64_t count_va = info->count_buffer 7327bf215546Sopenharmony_ci ? radv_buffer_get_va(info->count_buffer->bo) + 7328bf215546Sopenharmony_ci info->count_buffer->offset + info->count_buffer_offset 7329bf215546Sopenharmony_ci : 0; 7330bf215546Sopenharmony_ci 7331bf215546Sopenharmony_ci radv_cs_add_buffer(ws, cs, info->indirect->bo); 7332bf215546Sopenharmony_ci 7333bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0)); 7334bf215546Sopenharmony_ci radeon_emit(cs, 1); 7335bf215546Sopenharmony_ci radeon_emit(cs, va); 7336bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 7337bf215546Sopenharmony_ci 7338bf215546Sopenharmony_ci if (info->count_buffer) { 7339bf215546Sopenharmony_ci radv_cs_add_buffer(ws, cs, info->count_buffer->bo); 7340bf215546Sopenharmony_ci } 7341bf215546Sopenharmony_ci 7342bf215546Sopenharmony_ci if (!state->subpass->view_mask) { 7343bf215546Sopenharmony_ci radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va, 7344bf215546Sopenharmony_ci info->stride); 7345bf215546Sopenharmony_ci } else { 7346bf215546Sopenharmony_ci u_foreach_bit(i, state->subpass->view_mask) 7347bf215546Sopenharmony_ci { 7348bf215546Sopenharmony_ci radv_emit_view_index(cmd_buffer, i); 7349bf215546Sopenharmony_ci 7350bf215546Sopenharmony_ci radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va, 7351bf215546Sopenharmony_ci info->stride); 7352bf215546Sopenharmony_ci } 7353bf215546Sopenharmony_ci } 7354bf215546Sopenharmony_ci} 7355bf215546Sopenharmony_ci 7356bf215546Sopenharmony_ci/* 7357bf215546Sopenharmony_ci * Vega and raven have a bug which triggers if there are multiple context 7358bf215546Sopenharmony_ci * register contexts active at the same time with different scissor values. 7359bf215546Sopenharmony_ci * 7360bf215546Sopenharmony_ci * There are two possible workarounds: 7361bf215546Sopenharmony_ci * 1) Wait for PS_PARTIAL_FLUSH every time the scissor is changed. That way 7362bf215546Sopenharmony_ci * there is only ever 1 active set of scissor values at the same time. 7363bf215546Sopenharmony_ci * 7364bf215546Sopenharmony_ci * 2) Whenever the hardware switches contexts we have to set the scissor 7365bf215546Sopenharmony_ci * registers again even if it is a noop. That way the new context gets 7366bf215546Sopenharmony_ci * the correct scissor values. 7367bf215546Sopenharmony_ci * 7368bf215546Sopenharmony_ci * This implements option 2. radv_need_late_scissor_emission needs to 7369bf215546Sopenharmony_ci * return true on affected HW if radv_emit_all_graphics_states sets 7370bf215546Sopenharmony_ci * any context registers. 7371bf215546Sopenharmony_ci */ 7372bf215546Sopenharmony_cistatic bool 7373bf215546Sopenharmony_ciradv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer, 7374bf215546Sopenharmony_ci const struct radv_draw_info *info) 7375bf215546Sopenharmony_ci{ 7376bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 7377bf215546Sopenharmony_ci 7378bf215546Sopenharmony_ci if (!cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug) 7379bf215546Sopenharmony_ci return false; 7380bf215546Sopenharmony_ci 7381bf215546Sopenharmony_ci if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer) 7382bf215546Sopenharmony_ci return true; 7383bf215546Sopenharmony_ci 7384bf215546Sopenharmony_ci uint64_t used_states = 7385bf215546Sopenharmony_ci cmd_buffer->state.graphics_pipeline->needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL; 7386bf215546Sopenharmony_ci 7387bf215546Sopenharmony_ci /* Index, vertex and streamout buffers don't change context regs, and 7388bf215546Sopenharmony_ci * pipeline is already handled. 7389bf215546Sopenharmony_ci */ 7390bf215546Sopenharmony_ci used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER | 7391bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT | RADV_CMD_DIRTY_STREAMOUT_BUFFER | 7392bf215546Sopenharmony_ci RADV_CMD_DIRTY_PIPELINE); 7393bf215546Sopenharmony_ci 7394bf215546Sopenharmony_ci if (cmd_buffer->state.dirty & used_states) 7395bf215546Sopenharmony_ci return true; 7396bf215546Sopenharmony_ci 7397bf215546Sopenharmony_ci uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer); 7398bf215546Sopenharmony_ci 7399bf215546Sopenharmony_ci if (info->indexed && state->dynamic.primitive_restart_enable && 7400bf215546Sopenharmony_ci primitive_reset_index != state->last_primitive_reset_index) 7401bf215546Sopenharmony_ci return true; 7402bf215546Sopenharmony_ci 7403bf215546Sopenharmony_ci return false; 7404bf215546Sopenharmony_ci} 7405bf215546Sopenharmony_ci 7406bf215546Sopenharmony_ciALWAYS_INLINE static bool 7407bf215546Sopenharmony_ciradv_skip_ngg_culling(bool has_tess, const unsigned vtx_cnt, 7408bf215546Sopenharmony_ci bool indirect) 7409bf215546Sopenharmony_ci{ 7410bf215546Sopenharmony_ci /* If we have to draw only a few vertices, we get better latency if 7411bf215546Sopenharmony_ci * we disable NGG culling. 7412bf215546Sopenharmony_ci * 7413bf215546Sopenharmony_ci * When tessellation is used, what matters is the number of tessellated 7414bf215546Sopenharmony_ci * vertices, so let's always assume it's not a small draw. 7415bf215546Sopenharmony_ci */ 7416bf215546Sopenharmony_ci return !has_tess && !indirect && vtx_cnt < 128; 7417bf215546Sopenharmony_ci} 7418bf215546Sopenharmony_ci 7419bf215546Sopenharmony_ciALWAYS_INLINE static uint32_t 7420bf215546Sopenharmony_ciradv_get_ngg_culling_settings(struct radv_cmd_buffer *cmd_buffer, bool vp_y_inverted) 7421bf215546Sopenharmony_ci{ 7422bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 7423bf215546Sopenharmony_ci const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 7424bf215546Sopenharmony_ci 7425bf215546Sopenharmony_ci /* Cull every triangle when rasterizer discard is enabled. */ 7426bf215546Sopenharmony_ci if (d->rasterizer_discard_enable || 7427bf215546Sopenharmony_ci G_028810_DX_RASTERIZATION_KILL(cmd_buffer->state.graphics_pipeline->pa_cl_clip_cntl)) 7428bf215546Sopenharmony_ci return radv_nggc_front_face | radv_nggc_back_face; 7429bf215546Sopenharmony_ci 7430bf215546Sopenharmony_ci uint32_t pa_su_sc_mode_cntl = cmd_buffer->state.graphics_pipeline->pa_su_sc_mode_cntl; 7431bf215546Sopenharmony_ci uint32_t nggc_settings = radv_nggc_none; 7432bf215546Sopenharmony_ci 7433bf215546Sopenharmony_ci /* The culling code needs to know whether face is CW or CCW. */ 7434bf215546Sopenharmony_ci bool ccw = (pipeline->needed_dynamic_state & RADV_DYNAMIC_FRONT_FACE) 7435bf215546Sopenharmony_ci ? d->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE 7436bf215546Sopenharmony_ci : G_028814_FACE(pa_su_sc_mode_cntl) == 0; 7437bf215546Sopenharmony_ci 7438bf215546Sopenharmony_ci /* Take inverted viewport into account. */ 7439bf215546Sopenharmony_ci ccw ^= vp_y_inverted; 7440bf215546Sopenharmony_ci 7441bf215546Sopenharmony_ci if (ccw) 7442bf215546Sopenharmony_ci nggc_settings |= radv_nggc_face_is_ccw; 7443bf215546Sopenharmony_ci 7444bf215546Sopenharmony_ci /* Face culling settings. */ 7445bf215546Sopenharmony_ci if ((pipeline->needed_dynamic_state & RADV_DYNAMIC_CULL_MODE) 7446bf215546Sopenharmony_ci ? (d->cull_mode & VK_CULL_MODE_FRONT_BIT) 7447bf215546Sopenharmony_ci : G_028814_CULL_FRONT(pa_su_sc_mode_cntl)) 7448bf215546Sopenharmony_ci nggc_settings |= radv_nggc_front_face; 7449bf215546Sopenharmony_ci if ((pipeline->needed_dynamic_state & RADV_DYNAMIC_CULL_MODE) 7450bf215546Sopenharmony_ci ? (d->cull_mode & VK_CULL_MODE_BACK_BIT) 7451bf215546Sopenharmony_ci : G_028814_CULL_BACK(pa_su_sc_mode_cntl)) 7452bf215546Sopenharmony_ci nggc_settings |= radv_nggc_back_face; 7453bf215546Sopenharmony_ci 7454bf215546Sopenharmony_ci /* Small primitive culling is only valid when conservative overestimation is not used. It's also 7455bf215546Sopenharmony_ci * disabled for user sample locations because small primitive culling assumes a sample 7456bf215546Sopenharmony_ci * position at (0.5, 0.5). */ 7457bf215546Sopenharmony_ci if (!pipeline->uses_conservative_overestimate && !pipeline->uses_user_sample_locations) { 7458bf215546Sopenharmony_ci nggc_settings |= radv_nggc_small_primitives; 7459bf215546Sopenharmony_ci 7460bf215546Sopenharmony_ci /* small_prim_precision = num_samples / 2^subpixel_bits 7461bf215546Sopenharmony_ci * num_samples is also always a power of two, so the small prim precision can only be 7462bf215546Sopenharmony_ci * a power of two between 2^-2 and 2^-6, therefore it's enough to remember the exponent. 7463bf215546Sopenharmony_ci */ 7464bf215546Sopenharmony_ci unsigned subpixel_bits = 256; 7465bf215546Sopenharmony_ci int32_t small_prim_precision_log2 = util_logbase2(pipeline->ms.num_samples) - util_logbase2(subpixel_bits); 7466bf215546Sopenharmony_ci nggc_settings |= ((uint32_t) small_prim_precision_log2 << 24u); 7467bf215546Sopenharmony_ci } 7468bf215546Sopenharmony_ci 7469bf215546Sopenharmony_ci return nggc_settings; 7470bf215546Sopenharmony_ci} 7471bf215546Sopenharmony_ci 7472bf215546Sopenharmony_cistatic void 7473bf215546Sopenharmony_ciradv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info) 7474bf215546Sopenharmony_ci{ 7475bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 7476bf215546Sopenharmony_ci const unsigned stage = pipeline->last_vgt_api_stage; 7477bf215546Sopenharmony_ci const bool nggc_supported = pipeline->has_ngg_culling; 7478bf215546Sopenharmony_ci 7479bf215546Sopenharmony_ci if (!nggc_supported && !cmd_buffer->state.last_nggc_settings) { 7480bf215546Sopenharmony_ci /* Current shader doesn't support culling and culling was already disabled: 7481bf215546Sopenharmony_ci * No further steps needed, just remember the SGPR's location is not set. 7482bf215546Sopenharmony_ci */ 7483bf215546Sopenharmony_ci cmd_buffer->state.last_nggc_settings_sgpr_idx = -1; 7484bf215546Sopenharmony_ci return; 7485bf215546Sopenharmony_ci } 7486bf215546Sopenharmony_ci 7487bf215546Sopenharmony_ci /* Check dirty flags: 7488bf215546Sopenharmony_ci * - Dirty pipeline: SGPR index may have changed (we have to re-emit if changed). 7489bf215546Sopenharmony_ci * - Dirty dynamic flags: culling settings may have changed. 7490bf215546Sopenharmony_ci */ 7491bf215546Sopenharmony_ci const bool dirty = 7492bf215546Sopenharmony_ci cmd_buffer->state.dirty & 7493bf215546Sopenharmony_ci (RADV_CMD_DIRTY_PIPELINE | 7494bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE | 7495bf215546Sopenharmony_ci RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT); 7496bf215546Sopenharmony_ci 7497bf215546Sopenharmony_ci /* Check small draw status: 7498bf215546Sopenharmony_ci * For small draw calls, we disable culling by setting the SGPR to 0. 7499bf215546Sopenharmony_ci */ 7500bf215546Sopenharmony_ci const bool skip = 7501bf215546Sopenharmony_ci radv_skip_ngg_culling(stage == MESA_SHADER_TESS_EVAL, draw_info->count, draw_info->indirect); 7502bf215546Sopenharmony_ci 7503bf215546Sopenharmony_ci /* See if anything changed. */ 7504bf215546Sopenharmony_ci if (!dirty && skip == cmd_buffer->state.last_nggc_skip) 7505bf215546Sopenharmony_ci return; 7506bf215546Sopenharmony_ci 7507bf215546Sopenharmony_ci /* Remember small draw state. */ 7508bf215546Sopenharmony_ci cmd_buffer->state.last_nggc_skip = skip; 7509bf215546Sopenharmony_ci const struct radv_shader *v = pipeline->base.shaders[stage]; 7510bf215546Sopenharmony_ci assert(v->info.has_ngg_culling == nggc_supported); 7511bf215546Sopenharmony_ci 7512bf215546Sopenharmony_ci /* Find the user SGPR. */ 7513bf215546Sopenharmony_ci const uint32_t base_reg = pipeline->base.user_data_0[stage]; 7514bf215546Sopenharmony_ci const int8_t nggc_sgpr_idx = v->info.user_sgprs_locs.shader_data[AC_UD_NGG_CULLING_SETTINGS].sgpr_idx; 7515bf215546Sopenharmony_ci assert(!nggc_supported || nggc_sgpr_idx != -1); 7516bf215546Sopenharmony_ci 7517bf215546Sopenharmony_ci /* Get viewport transform. */ 7518bf215546Sopenharmony_ci float vp_scale[2], vp_translate[2]; 7519bf215546Sopenharmony_ci memcpy(vp_scale, cmd_buffer->state.dynamic.viewport.xform[0].scale, 2 * sizeof(float)); 7520bf215546Sopenharmony_ci memcpy(vp_translate, cmd_buffer->state.dynamic.viewport.xform[0].translate, 2 * sizeof(float)); 7521bf215546Sopenharmony_ci bool vp_y_inverted = (-vp_scale[1] + vp_translate[1]) > (vp_scale[1] + vp_translate[1]); 7522bf215546Sopenharmony_ci 7523bf215546Sopenharmony_ci /* Get current culling settings. */ 7524bf215546Sopenharmony_ci uint32_t nggc_settings = nggc_supported && !skip 7525bf215546Sopenharmony_ci ? radv_get_ngg_culling_settings(cmd_buffer, vp_y_inverted) 7526bf215546Sopenharmony_ci : radv_nggc_none; 7527bf215546Sopenharmony_ci 7528bf215546Sopenharmony_ci bool emit_viewport = nggc_settings && 7529bf215546Sopenharmony_ci (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_VIEWPORT || 7530bf215546Sopenharmony_ci cmd_buffer->state.last_nggc_settings_sgpr_idx != nggc_sgpr_idx || 7531bf215546Sopenharmony_ci !cmd_buffer->state.last_nggc_settings); 7532bf215546Sopenharmony_ci 7533bf215546Sopenharmony_ci if (emit_viewport) { 7534bf215546Sopenharmony_ci /* Correction for inverted Y */ 7535bf215546Sopenharmony_ci if (vp_y_inverted) { 7536bf215546Sopenharmony_ci vp_scale[1] = -vp_scale[1]; 7537bf215546Sopenharmony_ci vp_translate[1] = -vp_translate[1]; 7538bf215546Sopenharmony_ci } 7539bf215546Sopenharmony_ci 7540bf215546Sopenharmony_ci /* Correction for number of samples per pixel. */ 7541bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; ++i) { 7542bf215546Sopenharmony_ci vp_scale[i] *= (float) pipeline->ms.num_samples; 7543bf215546Sopenharmony_ci vp_translate[i] *= (float) pipeline->ms.num_samples; 7544bf215546Sopenharmony_ci } 7545bf215546Sopenharmony_ci 7546bf215546Sopenharmony_ci uint32_t vp_reg_values[4] = {fui(vp_scale[0]), fui(vp_scale[1]), fui(vp_translate[0]), fui(vp_translate[1])}; 7547bf215546Sopenharmony_ci const int8_t vp_sgpr_idx = v->info.user_sgprs_locs.shader_data[AC_UD_NGG_VIEWPORT].sgpr_idx; 7548bf215546Sopenharmony_ci assert(vp_sgpr_idx != -1); 7549bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + vp_sgpr_idx * 4, 4); 7550bf215546Sopenharmony_ci radeon_emit_array(cmd_buffer->cs, vp_reg_values, 4); 7551bf215546Sopenharmony_ci } 7552bf215546Sopenharmony_ci 7553bf215546Sopenharmony_ci bool emit_settings = nggc_supported && 7554bf215546Sopenharmony_ci (cmd_buffer->state.last_nggc_settings != nggc_settings || 7555bf215546Sopenharmony_ci cmd_buffer->state.last_nggc_settings_sgpr_idx != nggc_sgpr_idx); 7556bf215546Sopenharmony_ci 7557bf215546Sopenharmony_ci /* This needs to be emitted when culling is turned on 7558bf215546Sopenharmony_ci * and when it's already on but some settings change. 7559bf215546Sopenharmony_ci */ 7560bf215546Sopenharmony_ci if (emit_settings) { 7561bf215546Sopenharmony_ci assert(nggc_sgpr_idx >= 0); 7562bf215546Sopenharmony_ci radeon_set_sh_reg(cmd_buffer->cs, base_reg + nggc_sgpr_idx * 4, nggc_settings); 7563bf215546Sopenharmony_ci } 7564bf215546Sopenharmony_ci 7565bf215546Sopenharmony_ci /* These only need to be emitted when culling is turned on or off, 7566bf215546Sopenharmony_ci * but not when it stays on and just some settings change. 7567bf215546Sopenharmony_ci */ 7568bf215546Sopenharmony_ci if (!!cmd_buffer->state.last_nggc_settings != !!nggc_settings) { 7569bf215546Sopenharmony_ci uint32_t rsrc2 = v->config.rsrc2; 7570bf215546Sopenharmony_ci 7571bf215546Sopenharmony_ci if (!nggc_settings) { 7572bf215546Sopenharmony_ci /* Allocate less LDS when culling is disabled. (But GS always needs it.) */ 7573bf215546Sopenharmony_ci if (stage != MESA_SHADER_GEOMETRY) 7574bf215546Sopenharmony_ci rsrc2 = (rsrc2 & C_00B22C_LDS_SIZE) | S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling); 7575bf215546Sopenharmony_ci } 7576bf215546Sopenharmony_ci 7577bf215546Sopenharmony_ci /* When the pipeline is dirty and not yet emitted, don't write it here 7578bf215546Sopenharmony_ci * because radv_emit_graphics_pipeline will overwrite this register. 7579bf215546Sopenharmony_ci */ 7580bf215546Sopenharmony_ci if (!(cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) || 7581bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline == pipeline) { 7582bf215546Sopenharmony_ci radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2); 7583bf215546Sopenharmony_ci } 7584bf215546Sopenharmony_ci } 7585bf215546Sopenharmony_ci 7586bf215546Sopenharmony_ci cmd_buffer->state.last_nggc_settings = nggc_settings; 7587bf215546Sopenharmony_ci cmd_buffer->state.last_nggc_settings_sgpr_idx = nggc_sgpr_idx; 7588bf215546Sopenharmony_ci} 7589bf215546Sopenharmony_ci 7590bf215546Sopenharmony_cistatic void 7591bf215546Sopenharmony_ciradv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, 7592bf215546Sopenharmony_ci bool pipeline_is_dirty) 7593bf215546Sopenharmony_ci{ 7594bf215546Sopenharmony_ci bool late_scissor_emission; 7595bf215546Sopenharmony_ci 7596bf215546Sopenharmony_ci if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) || 7597bf215546Sopenharmony_ci cmd_buffer->state.emitted_graphics_pipeline != cmd_buffer->state.graphics_pipeline) 7598bf215546Sopenharmony_ci radv_emit_rbplus_state(cmd_buffer); 7599bf215546Sopenharmony_ci 7600bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->use_ngg_culling && 7601bf215546Sopenharmony_ci cmd_buffer->state.graphics_pipeline->is_ngg) 7602bf215546Sopenharmony_ci radv_emit_ngg_culling_state(cmd_buffer, info); 7603bf215546Sopenharmony_ci 7604bf215546Sopenharmony_ci if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) 7605bf215546Sopenharmony_ci radv_emit_graphics_pipeline(cmd_buffer); 7606bf215546Sopenharmony_ci 7607bf215546Sopenharmony_ci /* This should be before the cmd_buffer->state.dirty is cleared 7608bf215546Sopenharmony_ci * (excluding RADV_CMD_DIRTY_PIPELINE) and after 7609bf215546Sopenharmony_ci * cmd_buffer->state.context_roll_without_scissor_emitted is set. */ 7610bf215546Sopenharmony_ci late_scissor_emission = radv_need_late_scissor_emission(cmd_buffer, info); 7611bf215546Sopenharmony_ci 7612bf215546Sopenharmony_ci if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) 7613bf215546Sopenharmony_ci radv_emit_framebuffer_state(cmd_buffer); 7614bf215546Sopenharmony_ci 7615bf215546Sopenharmony_ci if (info->indexed) { 7616bf215546Sopenharmony_ci if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER) 7617bf215546Sopenharmony_ci radv_emit_index_buffer(cmd_buffer, info->indirect); 7618bf215546Sopenharmony_ci } else { 7619bf215546Sopenharmony_ci /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE, 7620bf215546Sopenharmony_ci * so the state must be re-emitted before the next indexed 7621bf215546Sopenharmony_ci * draw. 7622bf215546Sopenharmony_ci */ 7623bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { 7624bf215546Sopenharmony_ci cmd_buffer->state.last_index_type = -1; 7625bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; 7626bf215546Sopenharmony_ci } 7627bf215546Sopenharmony_ci } 7628bf215546Sopenharmony_ci 7629bf215546Sopenharmony_ci if (cmd_buffer->device->force_vrs != RADV_FORCE_VRS_1x1) { 7630bf215546Sopenharmony_ci struct radv_dynamic_state *d = &cmd_buffer->state.dynamic; 7631bf215546Sopenharmony_ci uint64_t dynamic_states = 7632bf215546Sopenharmony_ci cmd_buffer->state.dirty & cmd_buffer->state.emitted_graphics_pipeline->needed_dynamic_state; 7633bf215546Sopenharmony_ci 7634bf215546Sopenharmony_ci if ((dynamic_states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE) && 7635bf215546Sopenharmony_ci d->fragment_shading_rate.size.width == 1 && 7636bf215546Sopenharmony_ci d->fragment_shading_rate.size.height == 1 && 7637bf215546Sopenharmony_ci d->fragment_shading_rate.combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR && 7638bf215546Sopenharmony_ci d->fragment_shading_rate.combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) { 7639bf215546Sopenharmony_ci /* When per-vertex VRS is forced and the dynamic fragment shading rate is a no-op, ignore 7640bf215546Sopenharmony_ci * it. This is needed for vkd3d-proton because it always declares per-draw VRS as dynamic. 7641bf215546Sopenharmony_ci */ 7642bf215546Sopenharmony_ci cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE; 7643bf215546Sopenharmony_ci } 7644bf215546Sopenharmony_ci } 7645bf215546Sopenharmony_ci 7646bf215546Sopenharmony_ci radv_cmd_buffer_flush_dynamic_state(cmd_buffer, pipeline_is_dirty); 7647bf215546Sopenharmony_ci 7648bf215546Sopenharmony_ci radv_emit_draw_registers(cmd_buffer, info); 7649bf215546Sopenharmony_ci 7650bf215546Sopenharmony_ci if (late_scissor_emission) 7651bf215546Sopenharmony_ci radv_emit_scissor(cmd_buffer); 7652bf215546Sopenharmony_ci} 7653bf215546Sopenharmony_ci 7654bf215546Sopenharmony_ci/* MUST inline this function to avoid massive perf loss in drawoverhead */ 7655bf215546Sopenharmony_ciALWAYS_INLINE static bool 7656bf215546Sopenharmony_ciradv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount) 7657bf215546Sopenharmony_ci{ 7658bf215546Sopenharmony_ci const bool has_prefetch = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7; 7659bf215546Sopenharmony_ci const bool pipeline_is_dirty = (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) && 7660bf215546Sopenharmony_ci cmd_buffer->state.graphics_pipeline != cmd_buffer->state.emitted_graphics_pipeline; 7661bf215546Sopenharmony_ci 7662bf215546Sopenharmony_ci ASSERTED const unsigned cdw_max = 7663bf215546Sopenharmony_ci radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096 + 128 * (drawCount - 1)); 7664bf215546Sopenharmony_ci 7665bf215546Sopenharmony_ci if (likely(!info->indirect)) { 7666bf215546Sopenharmony_ci /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is 7667bf215546Sopenharmony_ci * no workaround for indirect draws, but we can at least skip 7668bf215546Sopenharmony_ci * direct draws. 7669bf215546Sopenharmony_ci */ 7670bf215546Sopenharmony_ci if (unlikely(!info->instance_count)) 7671bf215546Sopenharmony_ci return false; 7672bf215546Sopenharmony_ci 7673bf215546Sopenharmony_ci /* Handle count == 0. */ 7674bf215546Sopenharmony_ci if (unlikely(!info->count && !info->strmout_buffer)) 7675bf215546Sopenharmony_ci return false; 7676bf215546Sopenharmony_ci } 7677bf215546Sopenharmony_ci 7678bf215546Sopenharmony_ci /* Need to apply this workaround early as it can set flush flags. */ 7679bf215546Sopenharmony_ci if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) 7680bf215546Sopenharmony_ci radv_emit_fb_mip_change_flush(cmd_buffer); 7681bf215546Sopenharmony_ci 7682bf215546Sopenharmony_ci /* Use optimal packet order based on whether we need to sync the 7683bf215546Sopenharmony_ci * pipeline. 7684bf215546Sopenharmony_ci */ 7685bf215546Sopenharmony_ci if (cmd_buffer->state.flush_bits & 7686bf215546Sopenharmony_ci (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | 7687bf215546Sopenharmony_ci RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { 7688bf215546Sopenharmony_ci /* If we have to wait for idle, set all states first, so that 7689bf215546Sopenharmony_ci * all SET packets are processed in parallel with previous draw 7690bf215546Sopenharmony_ci * calls. Then upload descriptors, set shader pointers, and 7691bf215546Sopenharmony_ci * draw, and prefetch at the end. This ensures that the time 7692bf215546Sopenharmony_ci * the CUs are idle is very short. (there are only SET_SH 7693bf215546Sopenharmony_ci * packets between the wait and the draw) 7694bf215546Sopenharmony_ci */ 7695bf215546Sopenharmony_ci radv_emit_all_graphics_states(cmd_buffer, info, pipeline_is_dirty); 7696bf215546Sopenharmony_ci si_emit_cache_flush(cmd_buffer); 7697bf215546Sopenharmony_ci /* <-- CUs are idle here --> */ 7698bf215546Sopenharmony_ci 7699bf215546Sopenharmony_ci radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty); 7700bf215546Sopenharmony_ci } else { 7701bf215546Sopenharmony_ci /* If we don't wait for idle, start prefetches first, then set 7702bf215546Sopenharmony_ci * states, and draw at the end. 7703bf215546Sopenharmony_ci */ 7704bf215546Sopenharmony_ci si_emit_cache_flush(cmd_buffer); 7705bf215546Sopenharmony_ci 7706bf215546Sopenharmony_ci if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) { 7707bf215546Sopenharmony_ci /* Only prefetch the vertex shader and VBO descriptors 7708bf215546Sopenharmony_ci * in order to start the draw as soon as possible. 7709bf215546Sopenharmony_ci */ 7710bf215546Sopenharmony_ci radv_emit_prefetch_L2(cmd_buffer, cmd_buffer->state.graphics_pipeline, true); 7711bf215546Sopenharmony_ci } 7712bf215546Sopenharmony_ci 7713bf215546Sopenharmony_ci radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty); 7714bf215546Sopenharmony_ci 7715bf215546Sopenharmony_ci radv_emit_all_graphics_states(cmd_buffer, info, pipeline_is_dirty); 7716bf215546Sopenharmony_ci } 7717bf215546Sopenharmony_ci 7718bf215546Sopenharmony_ci radv_describe_draw(cmd_buffer); 7719bf215546Sopenharmony_ci if (likely(!info->indirect)) { 7720bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 7721bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 7722bf215546Sopenharmony_ci assert(state->graphics_pipeline->vtx_base_sgpr); 7723bf215546Sopenharmony_ci if (state->last_num_instances != info->instance_count) { 7724bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false)); 7725bf215546Sopenharmony_ci radeon_emit(cs, info->instance_count); 7726bf215546Sopenharmony_ci state->last_num_instances = info->instance_count; 7727bf215546Sopenharmony_ci } 7728bf215546Sopenharmony_ci } 7729bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 7730bf215546Sopenharmony_ci 7731bf215546Sopenharmony_ci return true; 7732bf215546Sopenharmony_ci} 7733bf215546Sopenharmony_ci 7734bf215546Sopenharmony_ciALWAYS_INLINE static bool 7735bf215546Sopenharmony_ciradv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, 7736bf215546Sopenharmony_ci uint32_t drawCount) 7737bf215546Sopenharmony_ci{ 7738bf215546Sopenharmony_ci struct radv_descriptor_state *descriptors_state = 7739bf215546Sopenharmony_ci radv_get_descriptors_state(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS); 7740bf215546Sopenharmony_ci const bool pipeline_is_dirty = 7741bf215546Sopenharmony_ci cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE && 7742bf215546Sopenharmony_ci cmd_buffer->state.graphics_pipeline != cmd_buffer->state.emitted_graphics_pipeline; 7743bf215546Sopenharmony_ci const bool push_dirty = descriptors_state->push_dirty; 7744bf215546Sopenharmony_ci const uint32_t desc_dirty = descriptors_state->dirty; 7745bf215546Sopenharmony_ci 7746bf215546Sopenharmony_ci const bool gfx_result = radv_before_draw(cmd_buffer, info, drawCount); 7747bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 7748bf215546Sopenharmony_ci struct radv_shader *task_shader = radv_get_shader(&pipeline->base, MESA_SHADER_TASK); 7749bf215546Sopenharmony_ci 7750bf215546Sopenharmony_ci /* If there is no task shader, no need to do anything special. */ 7751bf215546Sopenharmony_ci if (!task_shader) 7752bf215546Sopenharmony_ci return gfx_result; 7753bf215546Sopenharmony_ci 7754bf215546Sopenharmony_ci /* Need to check the count even for indirect draws to work around 7755bf215546Sopenharmony_ci * an issue with DISPATCH_TASKMESH_INDIRECT_MULTI_ACE. 7756bf215546Sopenharmony_ci */ 7757bf215546Sopenharmony_ci if (!info->count || !gfx_result) 7758bf215546Sopenharmony_ci return false; 7759bf215546Sopenharmony_ci 7760bf215546Sopenharmony_ci const bool need_task_semaphore = radv_flush_gfx2ace_semaphore(cmd_buffer); 7761bf215546Sopenharmony_ci struct radv_physical_device *pdevice = cmd_buffer->device->physical_device; 7762bf215546Sopenharmony_ci struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs; 7763bf215546Sopenharmony_ci struct radeon_winsys *ws = cmd_buffer->device->ws; 7764bf215546Sopenharmony_ci 7765bf215546Sopenharmony_ci assert(ace_cs); 7766bf215546Sopenharmony_ci ASSERTED const unsigned ace_cdw_max = 7767bf215546Sopenharmony_ci radeon_check_space(ws, ace_cs, 4096 + 128 * (drawCount - 1)); 7768bf215546Sopenharmony_ci 7769bf215546Sopenharmony_ci if (need_task_semaphore) 7770bf215546Sopenharmony_ci radv_wait_gfx2ace_semaphore(cmd_buffer); 7771bf215546Sopenharmony_ci 7772bf215546Sopenharmony_ci if (pipeline_is_dirty) { 7773bf215546Sopenharmony_ci radv_pipeline_emit_hw_cs(pdevice, ace_cs, task_shader); 7774bf215546Sopenharmony_ci radv_pipeline_emit_compute_state(pdevice, ace_cs, task_shader); 7775bf215546Sopenharmony_ci } 7776bf215546Sopenharmony_ci 7777bf215546Sopenharmony_ci radv_ace_internal_cache_flush(cmd_buffer); 7778bf215546Sopenharmony_ci 7779bf215546Sopenharmony_ci /* Restore dirty state of descriptors 7780bf215546Sopenharmony_ci * They were marked non-dirty in radv_before_draw, 7781bf215546Sopenharmony_ci * but they need to be re-emitted now to the ACE cmdbuf. 7782bf215546Sopenharmony_ci */ 7783bf215546Sopenharmony_ci descriptors_state->push_dirty = push_dirty; 7784bf215546Sopenharmony_ci descriptors_state->dirty = desc_dirty; 7785bf215546Sopenharmony_ci 7786bf215546Sopenharmony_ci /* Flush descriptors and push constants for task shaders. */ 7787bf215546Sopenharmony_ci radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_TASK_BIT_NV, &pipeline->base, 7788bf215546Sopenharmony_ci VK_PIPELINE_BIND_POINT_GRAPHICS); 7789bf215546Sopenharmony_ci radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_TASK_BIT_NV, &pipeline->base, 7790bf215546Sopenharmony_ci VK_PIPELINE_BIND_POINT_GRAPHICS); 7791bf215546Sopenharmony_ci 7792bf215546Sopenharmony_ci assert(ace_cs->cdw <= ace_cdw_max); 7793bf215546Sopenharmony_ci return true; 7794bf215546Sopenharmony_ci} 7795bf215546Sopenharmony_ci 7796bf215546Sopenharmony_cistatic void 7797bf215546Sopenharmony_ciradv_after_draw(struct radv_cmd_buffer *cmd_buffer) 7798bf215546Sopenharmony_ci{ 7799bf215546Sopenharmony_ci const struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info; 7800bf215546Sopenharmony_ci bool has_prefetch = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7; 7801bf215546Sopenharmony_ci /* Start prefetches after the draw has been started. Both will 7802bf215546Sopenharmony_ci * run in parallel, but starting the draw first is more 7803bf215546Sopenharmony_ci * important. 7804bf215546Sopenharmony_ci */ 7805bf215546Sopenharmony_ci if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) { 7806bf215546Sopenharmony_ci radv_emit_prefetch_L2(cmd_buffer, cmd_buffer->state.graphics_pipeline, false); 7807bf215546Sopenharmony_ci } 7808bf215546Sopenharmony_ci 7809bf215546Sopenharmony_ci /* Workaround for a VGT hang when streamout is enabled. 7810bf215546Sopenharmony_ci * It must be done after drawing. 7811bf215546Sopenharmony_ci */ 7812bf215546Sopenharmony_ci if (radv_is_streamout_enabled(cmd_buffer) && 7813bf215546Sopenharmony_ci (rad_info->family == CHIP_HAWAII || rad_info->family == CHIP_TONGA || 7814bf215546Sopenharmony_ci rad_info->family == CHIP_FIJI)) { 7815bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC; 7816bf215546Sopenharmony_ci } 7817bf215546Sopenharmony_ci 7818bf215546Sopenharmony_ci radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH); 7819bf215546Sopenharmony_ci} 7820bf215546Sopenharmony_ci 7821bf215546Sopenharmony_cistatic struct radv_buffer 7822bf215546Sopenharmony_ciradv_nv_mesh_indirect_bo(struct radv_cmd_buffer *cmd_buffer, 7823bf215546Sopenharmony_ci struct radv_buffer *buffer, VkDeviceSize offset, 7824bf215546Sopenharmony_ci uint32_t draw_count, uint32_t stride) 7825bf215546Sopenharmony_ci{ 7826bf215546Sopenharmony_ci /* Translates the indirect BO format used by NV_mesh_shader API 7827bf215546Sopenharmony_ci * to the BO format used by DRAW_INDIRECT / DRAW_INDIRECT_MULTI. 7828bf215546Sopenharmony_ci */ 7829bf215546Sopenharmony_ci 7830bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 7831bf215546Sopenharmony_ci struct radeon_winsys *ws = cmd_buffer->device->ws; 7832bf215546Sopenharmony_ci 7833bf215546Sopenharmony_ci const size_t src_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV)); 7834bf215546Sopenharmony_ci const size_t dst_stride = sizeof(VkDrawIndirectCommand); 7835bf215546Sopenharmony_ci const size_t src_off_task_count = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount); 7836bf215546Sopenharmony_ci const size_t src_off_first_task = offsetof(VkDrawMeshTasksIndirectCommandNV, firstTask); 7837bf215546Sopenharmony_ci const size_t dst_off_vertex_count = offsetof(VkDrawIndirectCommand, vertexCount); 7838bf215546Sopenharmony_ci const size_t dst_off_first_vertex = offsetof(VkDrawIndirectCommand, firstVertex); 7839bf215546Sopenharmony_ci 7840bf215546Sopenharmony_ci /* Fill the buffer with all zeroes except instanceCount = 1. 7841bf215546Sopenharmony_ci * This helps emit fewer copy packets below. 7842bf215546Sopenharmony_ci */ 7843bf215546Sopenharmony_ci VkDrawIndirectCommand *fill_data = (VkDrawIndirectCommand *) alloca(dst_stride * draw_count); 7844bf215546Sopenharmony_ci const VkDrawIndirectCommand filler = { .instanceCount = 1 }; 7845bf215546Sopenharmony_ci for (unsigned i = 0; i < draw_count; ++i) 7846bf215546Sopenharmony_ci fill_data[i] = filler; 7847bf215546Sopenharmony_ci 7848bf215546Sopenharmony_ci /* We'll have to copy data from the API BO. */ 7849bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset; 7850bf215546Sopenharmony_ci radv_cs_add_buffer(ws, cs, buffer->bo); 7851bf215546Sopenharmony_ci 7852bf215546Sopenharmony_ci /* Allocate some space in the upload BO. */ 7853bf215546Sopenharmony_ci unsigned out_offset; 7854bf215546Sopenharmony_ci radv_cmd_buffer_upload_data(cmd_buffer, dst_stride * draw_count, fill_data, &out_offset); 7855bf215546Sopenharmony_ci const uint64_t new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + out_offset; 7856bf215546Sopenharmony_ci 7857bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 12 * draw_count + 2); 7858bf215546Sopenharmony_ci 7859bf215546Sopenharmony_ci /* Copy data from the API BO so that the format is suitable for the 7860bf215546Sopenharmony_ci * indirect draw packet: 7861bf215546Sopenharmony_ci * - vertexCount = taskCount (copied here) 7862bf215546Sopenharmony_ci * - instanceCount = 1 (filled by CPU above) 7863bf215546Sopenharmony_ci * - firstVertex = firstTask (copied here) 7864bf215546Sopenharmony_ci * - firstInstance = 0 (filled by CPU above) 7865bf215546Sopenharmony_ci */ 7866bf215546Sopenharmony_ci for (unsigned i = 0; i < draw_count; ++i) { 7867bf215546Sopenharmony_ci const uint64_t src_task_count = va + i * src_stride + src_off_task_count; 7868bf215546Sopenharmony_ci const uint64_t src_first_task = va + i * src_stride + src_off_first_task; 7869bf215546Sopenharmony_ci const uint64_t dst_vertex_count = new_va + i * dst_stride + dst_off_vertex_count; 7870bf215546Sopenharmony_ci const uint64_t dst_first_vertex = new_va + i * dst_stride + dst_off_first_vertex; 7871bf215546Sopenharmony_ci 7872bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); 7873bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 7874bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 7875bf215546Sopenharmony_ci radeon_emit(cs, src_task_count); 7876bf215546Sopenharmony_ci radeon_emit(cs, src_task_count >> 32); 7877bf215546Sopenharmony_ci radeon_emit(cs, dst_vertex_count); 7878bf215546Sopenharmony_ci radeon_emit(cs, dst_vertex_count >> 32); 7879bf215546Sopenharmony_ci 7880bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); 7881bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 7882bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 7883bf215546Sopenharmony_ci radeon_emit(cs, src_first_task); 7884bf215546Sopenharmony_ci radeon_emit(cs, src_first_task >> 32); 7885bf215546Sopenharmony_ci radeon_emit(cs, dst_first_vertex); 7886bf215546Sopenharmony_ci radeon_emit(cs, dst_first_vertex >> 32); 7887bf215546Sopenharmony_ci } 7888bf215546Sopenharmony_ci 7889bf215546Sopenharmony_ci /* Wait for the copies to finish */ 7890bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 7891bf215546Sopenharmony_ci radeon_emit(cs, 0); 7892bf215546Sopenharmony_ci 7893bf215546Sopenharmony_ci /* The draw packet can now use this buffer: */ 7894bf215546Sopenharmony_ci struct radv_buffer buf = *buffer; 7895bf215546Sopenharmony_ci buf.bo = cmd_buffer->upload.upload_bo; 7896bf215546Sopenharmony_ci buf.offset = out_offset; 7897bf215546Sopenharmony_ci 7898bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 7899bf215546Sopenharmony_ci 7900bf215546Sopenharmony_ci return buf; 7901bf215546Sopenharmony_ci} 7902bf215546Sopenharmony_ci 7903bf215546Sopenharmony_cistatic struct radv_buffer 7904bf215546Sopenharmony_ciradv_nv_task_indirect_bo(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, 7905bf215546Sopenharmony_ci VkDeviceSize offset, uint32_t draw_count, uint32_t stride) 7906bf215546Sopenharmony_ci{ 7907bf215546Sopenharmony_ci /* Translates the indirect BO format used by NV_mesh_shader API 7908bf215546Sopenharmony_ci * to the BO format used by DISPATCH_TASKMESH_INDIRECT_MULTI_ACE. 7909bf215546Sopenharmony_ci */ 7910bf215546Sopenharmony_ci 7911bf215546Sopenharmony_ci assert(draw_count); 7912bf215546Sopenharmony_ci static_assert(sizeof(VkDispatchIndirectCommand) == 12, "Incorrect size of taskmesh command."); 7913bf215546Sopenharmony_ci 7914bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs; 7915bf215546Sopenharmony_ci struct radeon_winsys *ws = cmd_buffer->device->ws; 7916bf215546Sopenharmony_ci 7917bf215546Sopenharmony_ci const size_t src_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV)); 7918bf215546Sopenharmony_ci const size_t dst_stride = sizeof(VkDispatchIndirectCommand); 7919bf215546Sopenharmony_ci const size_t src_off_task_count = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount); 7920bf215546Sopenharmony_ci const size_t dst_off_x = offsetof(VkDispatchIndirectCommand, x); 7921bf215546Sopenharmony_ci 7922bf215546Sopenharmony_ci const unsigned new_disp_size = dst_stride * draw_count; 7923bf215546Sopenharmony_ci 7924bf215546Sopenharmony_ci const uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset; 7925bf215546Sopenharmony_ci radv_cs_add_buffer(ws, cs, buffer->bo); 7926bf215546Sopenharmony_ci 7927bf215546Sopenharmony_ci /* Fill the buffer with X=0, Y=1, Z=1. */ 7928bf215546Sopenharmony_ci VkDispatchIndirectCommand *fill_data = (VkDispatchIndirectCommand *)alloca(new_disp_size); 7929bf215546Sopenharmony_ci for (unsigned i = 0; i < draw_count; ++i) { 7930bf215546Sopenharmony_ci fill_data[i].x = 0; 7931bf215546Sopenharmony_ci fill_data[i].y = 1; 7932bf215546Sopenharmony_ci fill_data[i].z = 1; 7933bf215546Sopenharmony_ci } 7934bf215546Sopenharmony_ci 7935bf215546Sopenharmony_ci /* Allocate space in the upload BO. */ 7936bf215546Sopenharmony_ci unsigned out_offset; 7937bf215546Sopenharmony_ci ASSERTED bool uploaded = 7938bf215546Sopenharmony_ci radv_cmd_buffer_upload_data(cmd_buffer, new_disp_size, fill_data, &out_offset); 7939bf215546Sopenharmony_ci const uint64_t new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + out_offset; 7940bf215546Sopenharmony_ci assert(uploaded); 7941bf215546Sopenharmony_ci 7942bf215546Sopenharmony_ci /* Clamp draw count to fit the actual size of the buffer. 7943bf215546Sopenharmony_ci * This is to avoid potential out of bounds copies (eg. for draws with an indirect count buffer). 7944bf215546Sopenharmony_ci * The remaining indirect draws will stay filled with X=0, Y=1, Z=1 which is harmless. 7945bf215546Sopenharmony_ci */ 7946bf215546Sopenharmony_ci draw_count = MIN2(draw_count, (buffer->vk.size - buffer->offset - offset) / src_stride); 7947bf215546Sopenharmony_ci 7948bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 6 * draw_count + 2); 7949bf215546Sopenharmony_ci 7950bf215546Sopenharmony_ci /* Copy taskCount from the NV API BO to the X dispatch size of the compatible BO. */ 7951bf215546Sopenharmony_ci for (unsigned i = 0; i < draw_count; ++i) { 7952bf215546Sopenharmony_ci const uint64_t src_task_count = va + i * src_stride + src_off_task_count; 7953bf215546Sopenharmony_ci const uint64_t dst_x = new_va + i * dst_stride + dst_off_x; 7954bf215546Sopenharmony_ci 7955bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating)); 7956bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 7957bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 7958bf215546Sopenharmony_ci radeon_emit(cs, src_task_count); 7959bf215546Sopenharmony_ci radeon_emit(cs, src_task_count >> 32); 7960bf215546Sopenharmony_ci radeon_emit(cs, dst_x); 7961bf215546Sopenharmony_ci radeon_emit(cs, dst_x >> 32); 7962bf215546Sopenharmony_ci } 7963bf215546Sopenharmony_ci 7964bf215546Sopenharmony_ci assert(cs->cdw <= cdw_max); 7965bf215546Sopenharmony_ci 7966bf215546Sopenharmony_ci /* The draw packet can now use this buffer: */ 7967bf215546Sopenharmony_ci struct radv_buffer buf = *buffer; 7968bf215546Sopenharmony_ci buf.bo = cmd_buffer->upload.upload_bo; 7969bf215546Sopenharmony_ci buf.offset = out_offset; 7970bf215546Sopenharmony_ci 7971bf215546Sopenharmony_ci return buf; 7972bf215546Sopenharmony_ci} 7973bf215546Sopenharmony_ci 7974bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 7975bf215546Sopenharmony_ciradv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount, 7976bf215546Sopenharmony_ci uint32_t firstVertex, uint32_t firstInstance) 7977bf215546Sopenharmony_ci{ 7978bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 7979bf215546Sopenharmony_ci struct radv_draw_info info; 7980bf215546Sopenharmony_ci 7981bf215546Sopenharmony_ci info.count = vertexCount; 7982bf215546Sopenharmony_ci info.instance_count = instanceCount; 7983bf215546Sopenharmony_ci info.first_instance = firstInstance; 7984bf215546Sopenharmony_ci info.strmout_buffer = NULL; 7985bf215546Sopenharmony_ci info.indirect = NULL; 7986bf215546Sopenharmony_ci info.indexed = false; 7987bf215546Sopenharmony_ci 7988bf215546Sopenharmony_ci if (!radv_before_draw(cmd_buffer, &info, 1)) 7989bf215546Sopenharmony_ci return; 7990bf215546Sopenharmony_ci const VkMultiDrawInfoEXT minfo = { firstVertex, vertexCount }; 7991bf215546Sopenharmony_ci radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, 0, 0); 7992bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 7993bf215546Sopenharmony_ci} 7994bf215546Sopenharmony_ci 7995bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 7996bf215546Sopenharmony_ciradv_CmdDrawMultiEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, const VkMultiDrawInfoEXT *pVertexInfo, 7997bf215546Sopenharmony_ci uint32_t instanceCount, uint32_t firstInstance, uint32_t stride) 7998bf215546Sopenharmony_ci{ 7999bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8000bf215546Sopenharmony_ci struct radv_draw_info info; 8001bf215546Sopenharmony_ci 8002bf215546Sopenharmony_ci if (!drawCount) 8003bf215546Sopenharmony_ci return; 8004bf215546Sopenharmony_ci 8005bf215546Sopenharmony_ci info.count = pVertexInfo->vertexCount; 8006bf215546Sopenharmony_ci info.instance_count = instanceCount; 8007bf215546Sopenharmony_ci info.first_instance = firstInstance; 8008bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8009bf215546Sopenharmony_ci info.indirect = NULL; 8010bf215546Sopenharmony_ci info.indexed = false; 8011bf215546Sopenharmony_ci 8012bf215546Sopenharmony_ci if (!radv_before_draw(cmd_buffer, &info, drawCount)) 8013bf215546Sopenharmony_ci return; 8014bf215546Sopenharmony_ci radv_emit_direct_draw_packets(cmd_buffer, &info, drawCount, pVertexInfo, 0, stride); 8015bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8016bf215546Sopenharmony_ci} 8017bf215546Sopenharmony_ci 8018bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8019bf215546Sopenharmony_ciradv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount, 8020bf215546Sopenharmony_ci uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance) 8021bf215546Sopenharmony_ci{ 8022bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8023bf215546Sopenharmony_ci struct radv_draw_info info; 8024bf215546Sopenharmony_ci 8025bf215546Sopenharmony_ci info.indexed = true; 8026bf215546Sopenharmony_ci info.count = indexCount; 8027bf215546Sopenharmony_ci info.instance_count = instanceCount; 8028bf215546Sopenharmony_ci info.first_instance = firstInstance; 8029bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8030bf215546Sopenharmony_ci info.indirect = NULL; 8031bf215546Sopenharmony_ci 8032bf215546Sopenharmony_ci if (!radv_before_draw(cmd_buffer, &info, 1)) 8033bf215546Sopenharmony_ci return; 8034bf215546Sopenharmony_ci const VkMultiDrawIndexedInfoEXT minfo = { firstIndex, indexCount, vertexOffset }; 8035bf215546Sopenharmony_ci radv_emit_draw_packets_indexed(cmd_buffer, &info, 1, &minfo, 0, NULL); 8036bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8037bf215546Sopenharmony_ci} 8038bf215546Sopenharmony_ci 8039bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8040bf215546Sopenharmony_ciradv_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, const VkMultiDrawIndexedInfoEXT *pIndexInfo, 8041bf215546Sopenharmony_ci uint32_t instanceCount, uint32_t firstInstance, uint32_t stride, const int32_t *pVertexOffset) 8042bf215546Sopenharmony_ci{ 8043bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8044bf215546Sopenharmony_ci struct radv_draw_info info; 8045bf215546Sopenharmony_ci 8046bf215546Sopenharmony_ci if (!drawCount) 8047bf215546Sopenharmony_ci return; 8048bf215546Sopenharmony_ci 8049bf215546Sopenharmony_ci const VkMultiDrawIndexedInfoEXT *minfo = pIndexInfo; 8050bf215546Sopenharmony_ci info.indexed = true; 8051bf215546Sopenharmony_ci info.count = minfo->indexCount; 8052bf215546Sopenharmony_ci info.instance_count = instanceCount; 8053bf215546Sopenharmony_ci info.first_instance = firstInstance; 8054bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8055bf215546Sopenharmony_ci info.indirect = NULL; 8056bf215546Sopenharmony_ci 8057bf215546Sopenharmony_ci if (!radv_before_draw(cmd_buffer, &info, drawCount)) 8058bf215546Sopenharmony_ci return; 8059bf215546Sopenharmony_ci radv_emit_draw_packets_indexed(cmd_buffer, &info, drawCount, pIndexInfo, stride, pVertexOffset); 8060bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8061bf215546Sopenharmony_ci} 8062bf215546Sopenharmony_ci 8063bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8064bf215546Sopenharmony_ciradv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, 8065bf215546Sopenharmony_ci uint32_t drawCount, uint32_t stride) 8066bf215546Sopenharmony_ci{ 8067bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8068bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 8069bf215546Sopenharmony_ci struct radv_draw_info info; 8070bf215546Sopenharmony_ci 8071bf215546Sopenharmony_ci info.count = drawCount; 8072bf215546Sopenharmony_ci info.indirect = buffer; 8073bf215546Sopenharmony_ci info.indirect_offset = offset; 8074bf215546Sopenharmony_ci info.stride = stride; 8075bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8076bf215546Sopenharmony_ci info.count_buffer = NULL; 8077bf215546Sopenharmony_ci info.indexed = false; 8078bf215546Sopenharmony_ci info.instance_count = 0; 8079bf215546Sopenharmony_ci 8080bf215546Sopenharmony_ci if (!radv_before_draw(cmd_buffer, &info, 1)) 8081bf215546Sopenharmony_ci return; 8082bf215546Sopenharmony_ci radv_emit_indirect_draw_packets(cmd_buffer, &info); 8083bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8084bf215546Sopenharmony_ci} 8085bf215546Sopenharmony_ci 8086bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8087bf215546Sopenharmony_ciradv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, 8088bf215546Sopenharmony_ci uint32_t drawCount, uint32_t stride) 8089bf215546Sopenharmony_ci{ 8090bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8091bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 8092bf215546Sopenharmony_ci struct radv_draw_info info; 8093bf215546Sopenharmony_ci 8094bf215546Sopenharmony_ci info.indexed = true; 8095bf215546Sopenharmony_ci info.count = drawCount; 8096bf215546Sopenharmony_ci info.indirect = buffer; 8097bf215546Sopenharmony_ci info.indirect_offset = offset; 8098bf215546Sopenharmony_ci info.stride = stride; 8099bf215546Sopenharmony_ci info.count_buffer = NULL; 8100bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8101bf215546Sopenharmony_ci info.instance_count = 0; 8102bf215546Sopenharmony_ci 8103bf215546Sopenharmony_ci if (!radv_before_draw(cmd_buffer, &info, 1)) 8104bf215546Sopenharmony_ci return; 8105bf215546Sopenharmony_ci radv_emit_indirect_draw_packets(cmd_buffer, &info); 8106bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8107bf215546Sopenharmony_ci} 8108bf215546Sopenharmony_ci 8109bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8110bf215546Sopenharmony_ciradv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset, 8111bf215546Sopenharmony_ci VkBuffer _countBuffer, VkDeviceSize countBufferOffset, 8112bf215546Sopenharmony_ci uint32_t maxDrawCount, uint32_t stride) 8113bf215546Sopenharmony_ci{ 8114bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8115bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 8116bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); 8117bf215546Sopenharmony_ci struct radv_draw_info info; 8118bf215546Sopenharmony_ci 8119bf215546Sopenharmony_ci info.count = maxDrawCount; 8120bf215546Sopenharmony_ci info.indirect = buffer; 8121bf215546Sopenharmony_ci info.indirect_offset = offset; 8122bf215546Sopenharmony_ci info.count_buffer = count_buffer; 8123bf215546Sopenharmony_ci info.count_buffer_offset = countBufferOffset; 8124bf215546Sopenharmony_ci info.stride = stride; 8125bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8126bf215546Sopenharmony_ci info.indexed = false; 8127bf215546Sopenharmony_ci info.instance_count = 0; 8128bf215546Sopenharmony_ci 8129bf215546Sopenharmony_ci if (!radv_before_draw(cmd_buffer, &info, 1)) 8130bf215546Sopenharmony_ci return; 8131bf215546Sopenharmony_ci radv_emit_indirect_draw_packets(cmd_buffer, &info); 8132bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8133bf215546Sopenharmony_ci} 8134bf215546Sopenharmony_ci 8135bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8136bf215546Sopenharmony_ciradv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, 8137bf215546Sopenharmony_ci VkDeviceSize offset, VkBuffer _countBuffer, 8138bf215546Sopenharmony_ci VkDeviceSize countBufferOffset, uint32_t maxDrawCount, 8139bf215546Sopenharmony_ci uint32_t stride) 8140bf215546Sopenharmony_ci{ 8141bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8142bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 8143bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); 8144bf215546Sopenharmony_ci struct radv_draw_info info; 8145bf215546Sopenharmony_ci 8146bf215546Sopenharmony_ci info.indexed = true; 8147bf215546Sopenharmony_ci info.count = maxDrawCount; 8148bf215546Sopenharmony_ci info.indirect = buffer; 8149bf215546Sopenharmony_ci info.indirect_offset = offset; 8150bf215546Sopenharmony_ci info.count_buffer = count_buffer; 8151bf215546Sopenharmony_ci info.count_buffer_offset = countBufferOffset; 8152bf215546Sopenharmony_ci info.stride = stride; 8153bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8154bf215546Sopenharmony_ci info.instance_count = 0; 8155bf215546Sopenharmony_ci 8156bf215546Sopenharmony_ci if (!radv_before_draw(cmd_buffer, &info, 1)) 8157bf215546Sopenharmony_ci return; 8158bf215546Sopenharmony_ci radv_emit_indirect_draw_packets(cmd_buffer, &info); 8159bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8160bf215546Sopenharmony_ci} 8161bf215546Sopenharmony_ci 8162bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8163bf215546Sopenharmony_ciradv_CmdDrawMeshTasksNV(VkCommandBuffer commandBuffer, uint32_t taskCount, uint32_t firstTask) 8164bf215546Sopenharmony_ci{ 8165bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8166bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 8167bf215546Sopenharmony_ci struct radv_draw_info info; 8168bf215546Sopenharmony_ci 8169bf215546Sopenharmony_ci info.count = taskCount; 8170bf215546Sopenharmony_ci info.instance_count = 1; 8171bf215546Sopenharmony_ci info.first_instance = 0; 8172bf215546Sopenharmony_ci info.stride = 0; 8173bf215546Sopenharmony_ci info.indexed = false; 8174bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8175bf215546Sopenharmony_ci info.count_buffer = NULL; 8176bf215546Sopenharmony_ci info.indirect = NULL; 8177bf215546Sopenharmony_ci 8178bf215546Sopenharmony_ci if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1)) 8179bf215546Sopenharmony_ci return; 8180bf215546Sopenharmony_ci 8181bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) { 8182bf215546Sopenharmony_ci radv_emit_direct_taskmesh_draw_packets(cmd_buffer, taskCount, 1, 1, firstTask); 8183bf215546Sopenharmony_ci } else { 8184bf215546Sopenharmony_ci radv_emit_direct_mesh_draw_packet(cmd_buffer, taskCount, 1, 1, firstTask); 8185bf215546Sopenharmony_ci } 8186bf215546Sopenharmony_ci 8187bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8188bf215546Sopenharmony_ci} 8189bf215546Sopenharmony_ci 8190bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8191bf215546Sopenharmony_ciradv_CmdDrawMeshTasksIndirectNV(VkCommandBuffer commandBuffer, VkBuffer _buffer, 8192bf215546Sopenharmony_ci VkDeviceSize offset, uint32_t drawCount, uint32_t stride) 8193bf215546Sopenharmony_ci{ 8194bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8195bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 8196bf215546Sopenharmony_ci 8197bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 8198bf215546Sopenharmony_ci struct radv_draw_info info; 8199bf215546Sopenharmony_ci 8200bf215546Sopenharmony_ci info.indirect = buffer; 8201bf215546Sopenharmony_ci info.indirect_offset = offset; 8202bf215546Sopenharmony_ci info.stride = stride; 8203bf215546Sopenharmony_ci info.count = drawCount; 8204bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8205bf215546Sopenharmony_ci info.count_buffer = NULL; 8206bf215546Sopenharmony_ci info.indexed = false; 8207bf215546Sopenharmony_ci info.instance_count = 0; 8208bf215546Sopenharmony_ci 8209bf215546Sopenharmony_ci if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount)) 8210bf215546Sopenharmony_ci return; 8211bf215546Sopenharmony_ci 8212bf215546Sopenharmony_ci /* Indirect draw with mesh shader only: 8213bf215546Sopenharmony_ci * Use DRAW_INDIRECT / DRAW_INDIRECT_MULTI like normal indirect draws. 8214bf215546Sopenharmony_ci * Needed because DISPATCH_MESH_INDIRECT_MULTI doesn't support firstTask. 8215bf215546Sopenharmony_ci * 8216bf215546Sopenharmony_ci * Indirect draw with task + mesh shaders: 8217bf215546Sopenharmony_ci * Use DISPATCH_TASKMESH_INDIRECT_MULTI_ACE + DISPATCH_TASKMESH_GFX. 8218bf215546Sopenharmony_ci * These packets don't support firstTask so we implement that by 8219bf215546Sopenharmony_ci * reading the NV command's indirect buffer in the shader. 8220bf215546Sopenharmony_ci * 8221bf215546Sopenharmony_ci * The indirect BO layout from the NV_mesh_shader API is incompatible 8222bf215546Sopenharmony_ci * with AMD HW. To make it work, we allocate some space 8223bf215546Sopenharmony_ci * in the upload buffer and copy the data to it. 8224bf215546Sopenharmony_ci */ 8225bf215546Sopenharmony_ci 8226bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) { 8227bf215546Sopenharmony_ci uint64_t nv_ib_va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset; 8228bf215546Sopenharmony_ci uint32_t nv_ib_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV)); 8229bf215546Sopenharmony_ci struct radv_buffer buf = 8230bf215546Sopenharmony_ci radv_nv_task_indirect_bo(cmd_buffer, buffer, offset, drawCount, stride); 8231bf215546Sopenharmony_ci info.indirect = &buf; 8232bf215546Sopenharmony_ci info.indirect_offset = 0; 8233bf215546Sopenharmony_ci info.stride = sizeof(VkDispatchIndirectCommand); 8234bf215546Sopenharmony_ci 8235bf215546Sopenharmony_ci radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, nv_ib_va, nv_ib_stride); 8236bf215546Sopenharmony_ci } else { 8237bf215546Sopenharmony_ci struct radv_buffer buf = 8238bf215546Sopenharmony_ci radv_nv_mesh_indirect_bo(cmd_buffer, buffer, offset, drawCount, stride); 8239bf215546Sopenharmony_ci info.indirect = &buf; 8240bf215546Sopenharmony_ci info.indirect_offset = 0; 8241bf215546Sopenharmony_ci info.stride = sizeof(VkDrawIndirectCommand); 8242bf215546Sopenharmony_ci 8243bf215546Sopenharmony_ci radv_emit_indirect_draw_packets(cmd_buffer, &info); 8244bf215546Sopenharmony_ci } 8245bf215546Sopenharmony_ci 8246bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8247bf215546Sopenharmony_ci} 8248bf215546Sopenharmony_ci 8249bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8250bf215546Sopenharmony_ciradv_CmdDrawMeshTasksIndirectCountNV(VkCommandBuffer commandBuffer, VkBuffer _buffer, 8251bf215546Sopenharmony_ci VkDeviceSize offset, VkBuffer _countBuffer, 8252bf215546Sopenharmony_ci VkDeviceSize countBufferOffset, uint32_t maxDrawCount, 8253bf215546Sopenharmony_ci uint32_t stride) 8254bf215546Sopenharmony_ci{ 8255bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8256bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 8257bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer); 8258bf215546Sopenharmony_ci 8259bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 8260bf215546Sopenharmony_ci struct radv_draw_info info; 8261bf215546Sopenharmony_ci 8262bf215546Sopenharmony_ci info.indirect = buffer; 8263bf215546Sopenharmony_ci info.indirect_offset = offset; 8264bf215546Sopenharmony_ci info.stride = stride; 8265bf215546Sopenharmony_ci info.count = maxDrawCount; 8266bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8267bf215546Sopenharmony_ci info.count_buffer = count_buffer; 8268bf215546Sopenharmony_ci info.count_buffer_offset = countBufferOffset; 8269bf215546Sopenharmony_ci info.indexed = false; 8270bf215546Sopenharmony_ci info.instance_count = 0; 8271bf215546Sopenharmony_ci 8272bf215546Sopenharmony_ci if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount)) 8273bf215546Sopenharmony_ci return; 8274bf215546Sopenharmony_ci 8275bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) { 8276bf215546Sopenharmony_ci uint64_t nv_ib_va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset; 8277bf215546Sopenharmony_ci uint32_t nv_ib_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV)); 8278bf215546Sopenharmony_ci struct radv_buffer buf = 8279bf215546Sopenharmony_ci radv_nv_task_indirect_bo(cmd_buffer, buffer, offset, maxDrawCount, stride); 8280bf215546Sopenharmony_ci info.indirect = &buf; 8281bf215546Sopenharmony_ci info.indirect_offset = 0; 8282bf215546Sopenharmony_ci info.stride = sizeof(VkDispatchIndirectCommand); 8283bf215546Sopenharmony_ci 8284bf215546Sopenharmony_ci radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, nv_ib_va, nv_ib_stride); 8285bf215546Sopenharmony_ci } else { 8286bf215546Sopenharmony_ci struct radv_buffer buf = 8287bf215546Sopenharmony_ci radv_nv_mesh_indirect_bo(cmd_buffer, buffer, offset, maxDrawCount, stride); 8288bf215546Sopenharmony_ci info.indirect = &buf; 8289bf215546Sopenharmony_ci info.indirect_offset = 0; 8290bf215546Sopenharmony_ci info.stride = sizeof(VkDrawIndirectCommand); 8291bf215546Sopenharmony_ci 8292bf215546Sopenharmony_ci radv_emit_indirect_draw_packets(cmd_buffer, &info); 8293bf215546Sopenharmony_ci } 8294bf215546Sopenharmony_ci 8295bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8296bf215546Sopenharmony_ci} 8297bf215546Sopenharmony_ci 8298bf215546Sopenharmony_civoid 8299bf215546Sopenharmony_ciradv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed, 8300bf215546Sopenharmony_ci const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo) 8301bf215546Sopenharmony_ci{ 8302bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8303bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_indirect_command_layout, layout, 8304bf215546Sopenharmony_ci pGeneratedCommandsInfo->indirectCommandsLayout); 8305bf215546Sopenharmony_ci VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer); 8306bf215546Sopenharmony_ci 8307bf215546Sopenharmony_ci /* The only actions that can be done are draws, so skip on other queues. */ 8308bf215546Sopenharmony_ci if (cmd_buffer->qf != RADV_QUEUE_GENERAL) 8309bf215546Sopenharmony_ci return; 8310bf215546Sopenharmony_ci 8311bf215546Sopenharmony_ci /* Secondary command buffers are needed for the full extension but can't use 8312bf215546Sopenharmony_ci * PKT3_INDIRECT_BUFFER_CIK. 8313bf215546Sopenharmony_ci */ 8314bf215546Sopenharmony_ci assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); 8315bf215546Sopenharmony_ci 8316bf215546Sopenharmony_ci radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo); 8317bf215546Sopenharmony_ci 8318bf215546Sopenharmony_ci struct radv_draw_info info; 8319bf215546Sopenharmony_ci 8320bf215546Sopenharmony_ci info.count = pGeneratedCommandsInfo->sequencesCount; 8321bf215546Sopenharmony_ci info.indirect = prep_buffer; /* We're not really going use it this way, but a good signal 8322bf215546Sopenharmony_ci that this is not direct. */ 8323bf215546Sopenharmony_ci info.indirect_offset = 0; 8324bf215546Sopenharmony_ci info.stride = 0; 8325bf215546Sopenharmony_ci info.strmout_buffer = NULL; 8326bf215546Sopenharmony_ci info.count_buffer = NULL; 8327bf215546Sopenharmony_ci info.indexed = layout->indexed; 8328bf215546Sopenharmony_ci info.instance_count = 0; 8329bf215546Sopenharmony_ci 8330bf215546Sopenharmony_ci if (!radv_before_draw(cmd_buffer, &info, 1)) 8331bf215546Sopenharmony_ci return; 8332bf215546Sopenharmony_ci 8333bf215546Sopenharmony_ci uint32_t cmdbuf_size = radv_get_indirect_cmdbuf_size(pGeneratedCommandsInfo); 8334bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset + 8335bf215546Sopenharmony_ci pGeneratedCommandsInfo->preprocessOffset; 8336bf215546Sopenharmony_ci const uint32_t view_mask = cmd_buffer->state.subpass->view_mask; 8337bf215546Sopenharmony_ci 8338bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating)); 8339bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0); 8340bf215546Sopenharmony_ci 8341bf215546Sopenharmony_ci if (!view_mask) { 8342bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); 8343bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va); 8344bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va >> 32); 8345bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cmdbuf_size >> 2); 8346bf215546Sopenharmony_ci } else { 8347bf215546Sopenharmony_ci u_foreach_bit (view, view_mask) { 8348bf215546Sopenharmony_ci radv_emit_view_index(cmd_buffer, view); 8349bf215546Sopenharmony_ci 8350bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0)); 8351bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va); 8352bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, va >> 32); 8353bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, cmdbuf_size >> 2); 8354bf215546Sopenharmony_ci } 8355bf215546Sopenharmony_ci } 8356bf215546Sopenharmony_ci 8357bf215546Sopenharmony_ci if (layout->binds_index_buffer) { 8358bf215546Sopenharmony_ci cmd_buffer->state.last_index_type = -1; 8359bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER; 8360bf215546Sopenharmony_ci } 8361bf215546Sopenharmony_ci 8362bf215546Sopenharmony_ci if (layout->bind_vbo_mask) 8363bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER; 8364bf215546Sopenharmony_ci 8365bf215546Sopenharmony_ci if (layout->binds_state) 8366bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE; 8367bf215546Sopenharmony_ci 8368bf215546Sopenharmony_ci cmd_buffer->push_constant_stages |= ~0; 8369bf215546Sopenharmony_ci 8370bf215546Sopenharmony_ci cmd_buffer->state.last_index_type = -1; 8371bf215546Sopenharmony_ci cmd_buffer->state.last_num_instances = -1; 8372bf215546Sopenharmony_ci cmd_buffer->state.last_vertex_offset = -1; 8373bf215546Sopenharmony_ci cmd_buffer->state.last_first_instance = -1; 8374bf215546Sopenharmony_ci cmd_buffer->state.last_drawid = -1; 8375bf215546Sopenharmony_ci 8376bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 8377bf215546Sopenharmony_ci} 8378bf215546Sopenharmony_ci 8379bf215546Sopenharmony_cistruct radv_dispatch_info { 8380bf215546Sopenharmony_ci /** 8381bf215546Sopenharmony_ci * Determine the layout of the grid (in block units) to be used. 8382bf215546Sopenharmony_ci */ 8383bf215546Sopenharmony_ci uint32_t blocks[3]; 8384bf215546Sopenharmony_ci 8385bf215546Sopenharmony_ci /** 8386bf215546Sopenharmony_ci * A starting offset for the grid. If unaligned is set, the offset 8387bf215546Sopenharmony_ci * must still be aligned. 8388bf215546Sopenharmony_ci */ 8389bf215546Sopenharmony_ci uint32_t offsets[3]; 8390bf215546Sopenharmony_ci /** 8391bf215546Sopenharmony_ci * Whether it's an unaligned compute dispatch. 8392bf215546Sopenharmony_ci */ 8393bf215546Sopenharmony_ci bool unaligned; 8394bf215546Sopenharmony_ci 8395bf215546Sopenharmony_ci /** 8396bf215546Sopenharmony_ci * Indirect compute parameters resource. 8397bf215546Sopenharmony_ci */ 8398bf215546Sopenharmony_ci struct radeon_winsys_bo *indirect; 8399bf215546Sopenharmony_ci uint64_t va; 8400bf215546Sopenharmony_ci}; 8401bf215546Sopenharmony_ci 8402bf215546Sopenharmony_cistatic void 8403bf215546Sopenharmony_ciradv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer, 8404bf215546Sopenharmony_ci struct radv_compute_pipeline *pipeline, 8405bf215546Sopenharmony_ci const struct radv_dispatch_info *info) 8406bf215546Sopenharmony_ci{ 8407bf215546Sopenharmony_ci struct radv_shader *compute_shader = pipeline->base.shaders[MESA_SHADER_COMPUTE]; 8408bf215546Sopenharmony_ci unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator; 8409bf215546Sopenharmony_ci struct radeon_winsys *ws = cmd_buffer->device->ws; 8410bf215546Sopenharmony_ci bool predicating = cmd_buffer->state.predicating; 8411bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 8412bf215546Sopenharmony_ci struct radv_userdata_info *loc; 8413bf215546Sopenharmony_ci 8414bf215546Sopenharmony_ci radv_describe_dispatch(cmd_buffer, info->blocks[0], info->blocks[1], info->blocks[2]); 8415bf215546Sopenharmony_ci 8416bf215546Sopenharmony_ci loc = radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE); 8417bf215546Sopenharmony_ci 8418bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 30); 8419bf215546Sopenharmony_ci 8420bf215546Sopenharmony_ci if (compute_shader->info.wave_size == 32) { 8421bf215546Sopenharmony_ci assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10); 8422bf215546Sopenharmony_ci dispatch_initiator |= S_00B800_CS_W32_EN(1); 8423bf215546Sopenharmony_ci } 8424bf215546Sopenharmony_ci 8425bf215546Sopenharmony_ci if (info->va) { 8426bf215546Sopenharmony_ci if (info->indirect) 8427bf215546Sopenharmony_ci radv_cs_add_buffer(ws, cs, info->indirect); 8428bf215546Sopenharmony_ci 8429bf215546Sopenharmony_ci if (info->unaligned) { 8430bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); 8431bf215546Sopenharmony_ci radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0])); 8432bf215546Sopenharmony_ci radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[1])); 8433bf215546Sopenharmony_ci radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2])); 8434bf215546Sopenharmony_ci 8435bf215546Sopenharmony_ci dispatch_initiator |= S_00B800_USE_THREAD_DIMENSIONS(1); 8436bf215546Sopenharmony_ci } 8437bf215546Sopenharmony_ci 8438bf215546Sopenharmony_ci if (loc->sgpr_idx != -1) { 8439bf215546Sopenharmony_ci unsigned reg = R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4; 8440bf215546Sopenharmony_ci 8441bf215546Sopenharmony_ci if (cmd_buffer->device->load_grid_size_from_user_sgpr) { 8442bf215546Sopenharmony_ci assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3); 8443bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_LOAD_SH_REG_INDEX, 3, 0)); 8444bf215546Sopenharmony_ci radeon_emit(cs, info->va); 8445bf215546Sopenharmony_ci radeon_emit(cs, info->va >> 32); 8446bf215546Sopenharmony_ci radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2); 8447bf215546Sopenharmony_ci radeon_emit(cs, 3); 8448bf215546Sopenharmony_ci } else { 8449bf215546Sopenharmony_ci radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, reg, info->va, true); 8450bf215546Sopenharmony_ci } 8451bf215546Sopenharmony_ci } 8452bf215546Sopenharmony_ci 8453bf215546Sopenharmony_ci if (radv_cmd_buffer_uses_mec(cmd_buffer)) { 8454bf215546Sopenharmony_ci radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va, 8455bf215546Sopenharmony_ci &cmd_buffer->mec_inv_pred_emitted, 8456bf215546Sopenharmony_ci 4 /* DISPATCH_INDIRECT size */); 8457bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1)); 8458bf215546Sopenharmony_ci radeon_emit(cs, info->va); 8459bf215546Sopenharmony_ci radeon_emit(cs, info->va >> 32); 8460bf215546Sopenharmony_ci radeon_emit(cs, dispatch_initiator); 8461bf215546Sopenharmony_ci } else { 8462bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | PKT3_SHADER_TYPE_S(1)); 8463bf215546Sopenharmony_ci radeon_emit(cs, 1); 8464bf215546Sopenharmony_ci radeon_emit(cs, info->va); 8465bf215546Sopenharmony_ci radeon_emit(cs, info->va >> 32); 8466bf215546Sopenharmony_ci 8467bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, predicating) | PKT3_SHADER_TYPE_S(1)); 8468bf215546Sopenharmony_ci radeon_emit(cs, 0); 8469bf215546Sopenharmony_ci radeon_emit(cs, dispatch_initiator); 8470bf215546Sopenharmony_ci } 8471bf215546Sopenharmony_ci } else { 8472bf215546Sopenharmony_ci unsigned blocks[3] = {info->blocks[0], info->blocks[1], info->blocks[2]}; 8473bf215546Sopenharmony_ci unsigned offsets[3] = {info->offsets[0], info->offsets[1], info->offsets[2]}; 8474bf215546Sopenharmony_ci 8475bf215546Sopenharmony_ci if (info->unaligned) { 8476bf215546Sopenharmony_ci unsigned *cs_block_size = compute_shader->info.cs.block_size; 8477bf215546Sopenharmony_ci unsigned remainder[3]; 8478bf215546Sopenharmony_ci 8479bf215546Sopenharmony_ci /* If aligned, these should be an entire block size, 8480bf215546Sopenharmony_ci * not 0. 8481bf215546Sopenharmony_ci */ 8482bf215546Sopenharmony_ci remainder[0] = blocks[0] + cs_block_size[0] - align_u32_npot(blocks[0], cs_block_size[0]); 8483bf215546Sopenharmony_ci remainder[1] = blocks[1] + cs_block_size[1] - align_u32_npot(blocks[1], cs_block_size[1]); 8484bf215546Sopenharmony_ci remainder[2] = blocks[2] + cs_block_size[2] - align_u32_npot(blocks[2], cs_block_size[2]); 8485bf215546Sopenharmony_ci 8486bf215546Sopenharmony_ci blocks[0] = round_up_u32(blocks[0], cs_block_size[0]); 8487bf215546Sopenharmony_ci blocks[1] = round_up_u32(blocks[1], cs_block_size[1]); 8488bf215546Sopenharmony_ci blocks[2] = round_up_u32(blocks[2], cs_block_size[2]); 8489bf215546Sopenharmony_ci 8490bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; ++i) { 8491bf215546Sopenharmony_ci assert(offsets[i] % cs_block_size[i] == 0); 8492bf215546Sopenharmony_ci offsets[i] /= cs_block_size[i]; 8493bf215546Sopenharmony_ci } 8494bf215546Sopenharmony_ci 8495bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); 8496bf215546Sopenharmony_ci radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) | 8497bf215546Sopenharmony_ci S_00B81C_NUM_THREAD_PARTIAL(remainder[0])); 8498bf215546Sopenharmony_ci radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) | 8499bf215546Sopenharmony_ci S_00B81C_NUM_THREAD_PARTIAL(remainder[1])); 8500bf215546Sopenharmony_ci radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) | 8501bf215546Sopenharmony_ci S_00B81C_NUM_THREAD_PARTIAL(remainder[2])); 8502bf215546Sopenharmony_ci 8503bf215546Sopenharmony_ci dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1); 8504bf215546Sopenharmony_ci } 8505bf215546Sopenharmony_ci 8506bf215546Sopenharmony_ci if (loc->sgpr_idx != -1) { 8507bf215546Sopenharmony_ci if (cmd_buffer->device->load_grid_size_from_user_sgpr) { 8508bf215546Sopenharmony_ci assert(loc->num_sgprs == 3); 8509bf215546Sopenharmony_ci 8510bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3); 8511bf215546Sopenharmony_ci radeon_emit(cs, blocks[0]); 8512bf215546Sopenharmony_ci radeon_emit(cs, blocks[1]); 8513bf215546Sopenharmony_ci radeon_emit(cs, blocks[2]); 8514bf215546Sopenharmony_ci } else { 8515bf215546Sopenharmony_ci uint32_t offset; 8516bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_data(cmd_buffer, 12, blocks, &offset)) 8517bf215546Sopenharmony_ci return; 8518bf215546Sopenharmony_ci 8519bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset; 8520bf215546Sopenharmony_ci radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, 8521bf215546Sopenharmony_ci R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, va, true); 8522bf215546Sopenharmony_ci } 8523bf215546Sopenharmony_ci } 8524bf215546Sopenharmony_ci 8525bf215546Sopenharmony_ci if (offsets[0] || offsets[1] || offsets[2]) { 8526bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3); 8527bf215546Sopenharmony_ci radeon_emit(cs, offsets[0]); 8528bf215546Sopenharmony_ci radeon_emit(cs, offsets[1]); 8529bf215546Sopenharmony_ci radeon_emit(cs, offsets[2]); 8530bf215546Sopenharmony_ci 8531bf215546Sopenharmony_ci /* The blocks in the packet are not counts but end values. */ 8532bf215546Sopenharmony_ci for (unsigned i = 0; i < 3; ++i) 8533bf215546Sopenharmony_ci blocks[i] += offsets[i]; 8534bf215546Sopenharmony_ci } else { 8535bf215546Sopenharmony_ci dispatch_initiator |= S_00B800_FORCE_START_AT_000(1); 8536bf215546Sopenharmony_ci } 8537bf215546Sopenharmony_ci 8538bf215546Sopenharmony_ci if (radv_cmd_buffer_uses_mec(cmd_buffer)) { 8539bf215546Sopenharmony_ci radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va, 8540bf215546Sopenharmony_ci &cmd_buffer->mec_inv_pred_emitted, 8541bf215546Sopenharmony_ci 5 /* DISPATCH_DIRECT size */); 8542bf215546Sopenharmony_ci predicating = false; 8543bf215546Sopenharmony_ci } 8544bf215546Sopenharmony_ci 8545bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1)); 8546bf215546Sopenharmony_ci radeon_emit(cs, blocks[0]); 8547bf215546Sopenharmony_ci radeon_emit(cs, blocks[1]); 8548bf215546Sopenharmony_ci radeon_emit(cs, blocks[2]); 8549bf215546Sopenharmony_ci radeon_emit(cs, dispatch_initiator); 8550bf215546Sopenharmony_ci } 8551bf215546Sopenharmony_ci 8552bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 8553bf215546Sopenharmony_ci} 8554bf215546Sopenharmony_ci 8555bf215546Sopenharmony_cistatic void 8556bf215546Sopenharmony_ciradv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, 8557bf215546Sopenharmony_ci struct radv_compute_pipeline *pipeline, 8558bf215546Sopenharmony_ci VkPipelineBindPoint bind_point) 8559bf215546Sopenharmony_ci{ 8560bf215546Sopenharmony_ci radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT, &pipeline->base, bind_point); 8561bf215546Sopenharmony_ci radv_flush_constants(cmd_buffer, 8562bf215546Sopenharmony_ci bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR 8563bf215546Sopenharmony_ci ? RADV_RT_STAGE_BITS 8564bf215546Sopenharmony_ci : VK_SHADER_STAGE_COMPUTE_BIT, 8565bf215546Sopenharmony_ci &pipeline->base, bind_point); 8566bf215546Sopenharmony_ci} 8567bf215546Sopenharmony_ci 8568bf215546Sopenharmony_cistatic void 8569bf215546Sopenharmony_ciradv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info, 8570bf215546Sopenharmony_ci struct radv_compute_pipeline *pipeline, VkPipelineBindPoint bind_point) 8571bf215546Sopenharmony_ci{ 8572bf215546Sopenharmony_ci bool has_prefetch = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7; 8573bf215546Sopenharmony_ci bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline; 8574bf215546Sopenharmony_ci 8575bf215546Sopenharmony_ci if (pipeline->cs_regalloc_hang_bug) 8576bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH | 8577bf215546Sopenharmony_ci RADV_CMD_FLAG_CS_PARTIAL_FLUSH; 8578bf215546Sopenharmony_ci 8579bf215546Sopenharmony_ci if (cmd_buffer->state.flush_bits & 8580bf215546Sopenharmony_ci (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB | 8581bf215546Sopenharmony_ci RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) { 8582bf215546Sopenharmony_ci /* If we have to wait for idle, set all states first, so that 8583bf215546Sopenharmony_ci * all SET packets are processed in parallel with previous draw 8584bf215546Sopenharmony_ci * calls. Then upload descriptors, set shader pointers, and 8585bf215546Sopenharmony_ci * dispatch, and prefetch at the end. This ensures that the 8586bf215546Sopenharmony_ci * time the CUs are idle is very short. (there are only SET_SH 8587bf215546Sopenharmony_ci * packets between the wait and the draw) 8588bf215546Sopenharmony_ci */ 8589bf215546Sopenharmony_ci radv_emit_compute_pipeline(cmd_buffer, pipeline); 8590bf215546Sopenharmony_ci si_emit_cache_flush(cmd_buffer); 8591bf215546Sopenharmony_ci /* <-- CUs are idle here --> */ 8592bf215546Sopenharmony_ci 8593bf215546Sopenharmony_ci radv_upload_compute_shader_descriptors(cmd_buffer, pipeline, bind_point); 8594bf215546Sopenharmony_ci 8595bf215546Sopenharmony_ci radv_emit_dispatch_packets(cmd_buffer, pipeline, info); 8596bf215546Sopenharmony_ci /* <-- CUs are busy here --> */ 8597bf215546Sopenharmony_ci 8598bf215546Sopenharmony_ci /* Start prefetches after the dispatch has been started. Both 8599bf215546Sopenharmony_ci * will run in parallel, but starting the dispatch first is 8600bf215546Sopenharmony_ci * more important. 8601bf215546Sopenharmony_ci */ 8602bf215546Sopenharmony_ci if (has_prefetch && pipeline_is_dirty) { 8603bf215546Sopenharmony_ci radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_COMPUTE]); 8604bf215546Sopenharmony_ci } 8605bf215546Sopenharmony_ci } else { 8606bf215546Sopenharmony_ci /* If we don't wait for idle, start prefetches first, then set 8607bf215546Sopenharmony_ci * states, and dispatch at the end. 8608bf215546Sopenharmony_ci */ 8609bf215546Sopenharmony_ci si_emit_cache_flush(cmd_buffer); 8610bf215546Sopenharmony_ci 8611bf215546Sopenharmony_ci if (has_prefetch && pipeline_is_dirty) { 8612bf215546Sopenharmony_ci radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_COMPUTE]); 8613bf215546Sopenharmony_ci } 8614bf215546Sopenharmony_ci 8615bf215546Sopenharmony_ci radv_upload_compute_shader_descriptors(cmd_buffer, pipeline, bind_point); 8616bf215546Sopenharmony_ci 8617bf215546Sopenharmony_ci radv_emit_compute_pipeline(cmd_buffer, pipeline); 8618bf215546Sopenharmony_ci radv_emit_dispatch_packets(cmd_buffer, pipeline, info); 8619bf215546Sopenharmony_ci } 8620bf215546Sopenharmony_ci 8621bf215546Sopenharmony_ci if (pipeline_is_dirty) { 8622bf215546Sopenharmony_ci /* Raytracing uses compute shaders but has separate bind points and pipelines. 8623bf215546Sopenharmony_ci * So if we set compute userdata & shader registers we should dirty the raytracing 8624bf215546Sopenharmony_ci * ones and the other way around. 8625bf215546Sopenharmony_ci * 8626bf215546Sopenharmony_ci * We only need to do this when the pipeline is dirty because when we switch between 8627bf215546Sopenharmony_ci * the two we always need to switch pipelines. 8628bf215546Sopenharmony_ci */ 8629bf215546Sopenharmony_ci radv_mark_descriptor_sets_dirty(cmd_buffer, bind_point == VK_PIPELINE_BIND_POINT_COMPUTE 8630bf215546Sopenharmony_ci ? VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR 8631bf215546Sopenharmony_ci : VK_PIPELINE_BIND_POINT_COMPUTE); 8632bf215546Sopenharmony_ci } 8633bf215546Sopenharmony_ci 8634bf215546Sopenharmony_ci if (pipeline->cs_regalloc_hang_bug) 8635bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH; 8636bf215546Sopenharmony_ci 8637bf215546Sopenharmony_ci radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH); 8638bf215546Sopenharmony_ci} 8639bf215546Sopenharmony_ci 8640bf215546Sopenharmony_cistatic void 8641bf215546Sopenharmony_ciradv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info) 8642bf215546Sopenharmony_ci{ 8643bf215546Sopenharmony_ci radv_dispatch(cmd_buffer, info, cmd_buffer->state.compute_pipeline, 8644bf215546Sopenharmony_ci VK_PIPELINE_BIND_POINT_COMPUTE); 8645bf215546Sopenharmony_ci} 8646bf215546Sopenharmony_ci 8647bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8648bf215546Sopenharmony_ciradv_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t base_x, uint32_t base_y, 8649bf215546Sopenharmony_ci uint32_t base_z, uint32_t x, uint32_t y, uint32_t z) 8650bf215546Sopenharmony_ci{ 8651bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8652bf215546Sopenharmony_ci struct radv_dispatch_info info = {0}; 8653bf215546Sopenharmony_ci 8654bf215546Sopenharmony_ci info.blocks[0] = x; 8655bf215546Sopenharmony_ci info.blocks[1] = y; 8656bf215546Sopenharmony_ci info.blocks[2] = z; 8657bf215546Sopenharmony_ci 8658bf215546Sopenharmony_ci info.offsets[0] = base_x; 8659bf215546Sopenharmony_ci info.offsets[1] = base_y; 8660bf215546Sopenharmony_ci info.offsets[2] = base_z; 8661bf215546Sopenharmony_ci radv_compute_dispatch(cmd_buffer, &info); 8662bf215546Sopenharmony_ci} 8663bf215546Sopenharmony_ci 8664bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8665bf215546Sopenharmony_ciradv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z) 8666bf215546Sopenharmony_ci{ 8667bf215546Sopenharmony_ci radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z); 8668bf215546Sopenharmony_ci} 8669bf215546Sopenharmony_ci 8670bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8671bf215546Sopenharmony_ciradv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset) 8672bf215546Sopenharmony_ci{ 8673bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8674bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 8675bf215546Sopenharmony_ci struct radv_dispatch_info info = {0}; 8676bf215546Sopenharmony_ci 8677bf215546Sopenharmony_ci info.indirect = buffer->bo; 8678bf215546Sopenharmony_ci info.va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset; 8679bf215546Sopenharmony_ci 8680bf215546Sopenharmony_ci radv_compute_dispatch(cmd_buffer, &info); 8681bf215546Sopenharmony_ci} 8682bf215546Sopenharmony_ci 8683bf215546Sopenharmony_civoid 8684bf215546Sopenharmony_ciradv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z) 8685bf215546Sopenharmony_ci{ 8686bf215546Sopenharmony_ci struct radv_dispatch_info info = {0}; 8687bf215546Sopenharmony_ci 8688bf215546Sopenharmony_ci info.blocks[0] = x; 8689bf215546Sopenharmony_ci info.blocks[1] = y; 8690bf215546Sopenharmony_ci info.blocks[2] = z; 8691bf215546Sopenharmony_ci info.unaligned = 1; 8692bf215546Sopenharmony_ci 8693bf215546Sopenharmony_ci radv_compute_dispatch(cmd_buffer, &info); 8694bf215546Sopenharmony_ci} 8695bf215546Sopenharmony_ci 8696bf215546Sopenharmony_civoid 8697bf215546Sopenharmony_ciradv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t va) 8698bf215546Sopenharmony_ci{ 8699bf215546Sopenharmony_ci struct radv_dispatch_info info = {0}; 8700bf215546Sopenharmony_ci 8701bf215546Sopenharmony_ci info.indirect = bo; 8702bf215546Sopenharmony_ci info.va = va; 8703bf215546Sopenharmony_ci 8704bf215546Sopenharmony_ci radv_compute_dispatch(cmd_buffer, &info); 8705bf215546Sopenharmony_ci} 8706bf215546Sopenharmony_ci 8707bf215546Sopenharmony_cienum radv_rt_mode { 8708bf215546Sopenharmony_ci radv_rt_mode_direct, 8709bf215546Sopenharmony_ci radv_rt_mode_indirect, 8710bf215546Sopenharmony_ci radv_rt_mode_indirect2, 8711bf215546Sopenharmony_ci}; 8712bf215546Sopenharmony_ci 8713bf215546Sopenharmony_cistatic void 8714bf215546Sopenharmony_ciradv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCommand2KHR *tables, 8715bf215546Sopenharmony_ci uint64_t indirect_va, enum radv_rt_mode mode) 8716bf215546Sopenharmony_ci{ 8717bf215546Sopenharmony_ci struct radv_compute_pipeline *pipeline = cmd_buffer->state.rt_pipeline; 8718bf215546Sopenharmony_ci uint32_t base_reg = pipeline->base.user_data_0[MESA_SHADER_COMPUTE]; 8719bf215546Sopenharmony_ci 8720bf215546Sopenharmony_ci struct radv_dispatch_info info = {0}; 8721bf215546Sopenharmony_ci info.unaligned = true; 8722bf215546Sopenharmony_ci 8723bf215546Sopenharmony_ci uint64_t launch_size_va; 8724bf215546Sopenharmony_ci uint64_t sbt_va; 8725bf215546Sopenharmony_ci 8726bf215546Sopenharmony_ci if (mode != radv_rt_mode_indirect2) { 8727bf215546Sopenharmony_ci uint32_t upload_size = mode == radv_rt_mode_direct 8728bf215546Sopenharmony_ci ? sizeof(VkTraceRaysIndirectCommand2KHR) 8729bf215546Sopenharmony_ci : offsetof(VkTraceRaysIndirectCommand2KHR, width); 8730bf215546Sopenharmony_ci 8731bf215546Sopenharmony_ci uint32_t offset; 8732bf215546Sopenharmony_ci if (!radv_cmd_buffer_upload_data(cmd_buffer, upload_size, tables, &offset)) 8733bf215546Sopenharmony_ci return; 8734bf215546Sopenharmony_ci 8735bf215546Sopenharmony_ci uint64_t upload_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset; 8736bf215546Sopenharmony_ci 8737bf215546Sopenharmony_ci launch_size_va = (mode == radv_rt_mode_direct) 8738bf215546Sopenharmony_ci ? upload_va + offsetof(VkTraceRaysIndirectCommand2KHR, width) 8739bf215546Sopenharmony_ci : indirect_va; 8740bf215546Sopenharmony_ci sbt_va = upload_va; 8741bf215546Sopenharmony_ci } else { 8742bf215546Sopenharmony_ci launch_size_va = indirect_va + offsetof(VkTraceRaysIndirectCommand2KHR, width); 8743bf215546Sopenharmony_ci sbt_va = indirect_va; 8744bf215546Sopenharmony_ci } 8745bf215546Sopenharmony_ci 8746bf215546Sopenharmony_ci if (mode == radv_rt_mode_direct) { 8747bf215546Sopenharmony_ci info.blocks[0] = tables->width; 8748bf215546Sopenharmony_ci info.blocks[1] = tables->height; 8749bf215546Sopenharmony_ci info.blocks[2] = tables->depth; 8750bf215546Sopenharmony_ci } else 8751bf215546Sopenharmony_ci info.va = launch_size_va; 8752bf215546Sopenharmony_ci 8753bf215546Sopenharmony_ci struct radv_userdata_info *desc_loc = 8754bf215546Sopenharmony_ci radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_COMPUTE, AC_UD_CS_SBT_DESCRIPTORS); 8755bf215546Sopenharmony_ci if (desc_loc->sgpr_idx != -1) { 8756bf215546Sopenharmony_ci radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, 8757bf215546Sopenharmony_ci base_reg + desc_loc->sgpr_idx * 4, sbt_va, true); 8758bf215546Sopenharmony_ci } 8759bf215546Sopenharmony_ci 8760bf215546Sopenharmony_ci struct radv_userdata_info *size_loc = 8761bf215546Sopenharmony_ci radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_COMPUTE, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR); 8762bf215546Sopenharmony_ci if (size_loc->sgpr_idx != -1) { 8763bf215546Sopenharmony_ci radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, 8764bf215546Sopenharmony_ci base_reg + size_loc->sgpr_idx * 4, launch_size_va, true); 8765bf215546Sopenharmony_ci } 8766bf215546Sopenharmony_ci 8767bf215546Sopenharmony_ci radv_dispatch(cmd_buffer, &info, pipeline, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR); 8768bf215546Sopenharmony_ci} 8769bf215546Sopenharmony_ci 8770bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8771bf215546Sopenharmony_ciradv_CmdTraceRaysKHR(VkCommandBuffer commandBuffer, 8772bf215546Sopenharmony_ci const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable, 8773bf215546Sopenharmony_ci const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable, 8774bf215546Sopenharmony_ci const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable, 8775bf215546Sopenharmony_ci const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, 8776bf215546Sopenharmony_ci uint32_t width, uint32_t height, uint32_t depth) 8777bf215546Sopenharmony_ci{ 8778bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8779bf215546Sopenharmony_ci 8780bf215546Sopenharmony_ci VkTraceRaysIndirectCommand2KHR tables = { 8781bf215546Sopenharmony_ci .raygenShaderRecordAddress = pRaygenShaderBindingTable->deviceAddress, 8782bf215546Sopenharmony_ci .raygenShaderRecordSize = pRaygenShaderBindingTable->size, 8783bf215546Sopenharmony_ci .missShaderBindingTableAddress = pMissShaderBindingTable->deviceAddress, 8784bf215546Sopenharmony_ci .missShaderBindingTableSize = pMissShaderBindingTable->size, 8785bf215546Sopenharmony_ci .missShaderBindingTableStride = pMissShaderBindingTable->stride, 8786bf215546Sopenharmony_ci .hitShaderBindingTableAddress = pHitShaderBindingTable->deviceAddress, 8787bf215546Sopenharmony_ci .hitShaderBindingTableSize = pHitShaderBindingTable->size, 8788bf215546Sopenharmony_ci .hitShaderBindingTableStride = pHitShaderBindingTable->stride, 8789bf215546Sopenharmony_ci .callableShaderBindingTableAddress = pCallableShaderBindingTable->deviceAddress, 8790bf215546Sopenharmony_ci .callableShaderBindingTableSize = pCallableShaderBindingTable->size, 8791bf215546Sopenharmony_ci .callableShaderBindingTableStride = pCallableShaderBindingTable->stride, 8792bf215546Sopenharmony_ci .width = width, 8793bf215546Sopenharmony_ci .height = height, 8794bf215546Sopenharmony_ci .depth = depth, 8795bf215546Sopenharmony_ci }; 8796bf215546Sopenharmony_ci 8797bf215546Sopenharmony_ci radv_trace_rays(cmd_buffer, &tables, 0, radv_rt_mode_direct); 8798bf215546Sopenharmony_ci} 8799bf215546Sopenharmony_ci 8800bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8801bf215546Sopenharmony_ciradv_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer, 8802bf215546Sopenharmony_ci const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable, 8803bf215546Sopenharmony_ci const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable, 8804bf215546Sopenharmony_ci const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable, 8805bf215546Sopenharmony_ci const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable, 8806bf215546Sopenharmony_ci VkDeviceAddress indirectDeviceAddress) 8807bf215546Sopenharmony_ci{ 8808bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8809bf215546Sopenharmony_ci 8810bf215546Sopenharmony_ci assert(cmd_buffer->device->use_global_bo_list); 8811bf215546Sopenharmony_ci 8812bf215546Sopenharmony_ci VkTraceRaysIndirectCommand2KHR tables = { 8813bf215546Sopenharmony_ci .raygenShaderRecordAddress = pRaygenShaderBindingTable->deviceAddress, 8814bf215546Sopenharmony_ci .raygenShaderRecordSize = pRaygenShaderBindingTable->size, 8815bf215546Sopenharmony_ci .missShaderBindingTableAddress = pMissShaderBindingTable->deviceAddress, 8816bf215546Sopenharmony_ci .missShaderBindingTableSize = pMissShaderBindingTable->size, 8817bf215546Sopenharmony_ci .missShaderBindingTableStride = pMissShaderBindingTable->stride, 8818bf215546Sopenharmony_ci .hitShaderBindingTableAddress = pHitShaderBindingTable->deviceAddress, 8819bf215546Sopenharmony_ci .hitShaderBindingTableSize = pHitShaderBindingTable->size, 8820bf215546Sopenharmony_ci .hitShaderBindingTableStride = pHitShaderBindingTable->stride, 8821bf215546Sopenharmony_ci .callableShaderBindingTableAddress = pCallableShaderBindingTable->deviceAddress, 8822bf215546Sopenharmony_ci .callableShaderBindingTableSize = pCallableShaderBindingTable->size, 8823bf215546Sopenharmony_ci .callableShaderBindingTableStride = pCallableShaderBindingTable->stride, 8824bf215546Sopenharmony_ci }; 8825bf215546Sopenharmony_ci 8826bf215546Sopenharmony_ci radv_trace_rays(cmd_buffer, &tables, indirectDeviceAddress, radv_rt_mode_indirect); 8827bf215546Sopenharmony_ci} 8828bf215546Sopenharmony_ci 8829bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8830bf215546Sopenharmony_ciradv_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress) 8831bf215546Sopenharmony_ci{ 8832bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8833bf215546Sopenharmony_ci 8834bf215546Sopenharmony_ci assert(cmd_buffer->device->use_global_bo_list); 8835bf215546Sopenharmony_ci 8836bf215546Sopenharmony_ci radv_trace_rays(cmd_buffer, NULL, indirectDeviceAddress, radv_rt_mode_indirect2); 8837bf215546Sopenharmony_ci} 8838bf215546Sopenharmony_ci 8839bf215546Sopenharmony_cistatic void 8840bf215546Sopenharmony_ciradv_set_rt_stack_size(struct radv_cmd_buffer *cmd_buffer, uint32_t size) 8841bf215546Sopenharmony_ci{ 8842bf215546Sopenharmony_ci unsigned wave_size = 0; 8843bf215546Sopenharmony_ci unsigned scratch_bytes_per_wave = 0; 8844bf215546Sopenharmony_ci 8845bf215546Sopenharmony_ci if (cmd_buffer->state.rt_pipeline) { 8846bf215546Sopenharmony_ci scratch_bytes_per_wave = cmd_buffer->state.rt_pipeline->base.scratch_bytes_per_wave; 8847bf215546Sopenharmony_ci wave_size = cmd_buffer->state.rt_pipeline->base.shaders[MESA_SHADER_COMPUTE]->info.wave_size; 8848bf215546Sopenharmony_ci } 8849bf215546Sopenharmony_ci 8850bf215546Sopenharmony_ci /* The hardware register is specified as a multiple of 256 DWORDS. */ 8851bf215546Sopenharmony_ci scratch_bytes_per_wave += align(size * wave_size, 1024); 8852bf215546Sopenharmony_ci 8853bf215546Sopenharmony_ci cmd_buffer->compute_scratch_size_per_wave_needed = 8854bf215546Sopenharmony_ci MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, scratch_bytes_per_wave); 8855bf215546Sopenharmony_ci} 8856bf215546Sopenharmony_ci 8857bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8858bf215546Sopenharmony_ciradv_CmdSetRayTracingPipelineStackSizeKHR(VkCommandBuffer commandBuffer, uint32_t size) 8859bf215546Sopenharmony_ci{ 8860bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8861bf215546Sopenharmony_ci 8862bf215546Sopenharmony_ci radv_set_rt_stack_size(cmd_buffer, size); 8863bf215546Sopenharmony_ci cmd_buffer->state.rt_stack_size = size; 8864bf215546Sopenharmony_ci} 8865bf215546Sopenharmony_ci 8866bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8867bf215546Sopenharmony_ciradv_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo) 8868bf215546Sopenharmony_ci{ 8869bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8870bf215546Sopenharmony_ci 8871bf215546Sopenharmony_ci radv_mark_noncoherent_rb(cmd_buffer); 8872bf215546Sopenharmony_ci 8873bf215546Sopenharmony_ci radv_emit_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier); 8874bf215546Sopenharmony_ci 8875bf215546Sopenharmony_ci radv_cmd_buffer_end_subpass(cmd_buffer); 8876bf215546Sopenharmony_ci 8877bf215546Sopenharmony_ci vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachments); 8878bf215546Sopenharmony_ci vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.subpass_sample_locs); 8879bf215546Sopenharmony_ci 8880bf215546Sopenharmony_ci cmd_buffer->state.pass = NULL; 8881bf215546Sopenharmony_ci cmd_buffer->state.subpass = NULL; 8882bf215546Sopenharmony_ci cmd_buffer->state.attachments = NULL; 8883bf215546Sopenharmony_ci cmd_buffer->state.framebuffer = NULL; 8884bf215546Sopenharmony_ci cmd_buffer->state.subpass_sample_locs = NULL; 8885bf215546Sopenharmony_ci} 8886bf215546Sopenharmony_ci 8887bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 8888bf215546Sopenharmony_ciradv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo) 8889bf215546Sopenharmony_ci{ 8890bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 8891bf215546Sopenharmony_ci const VkRenderingFragmentShadingRateAttachmentInfoKHR *vrs_info = vk_find_struct_const( 8892bf215546Sopenharmony_ci pRenderingInfo->pNext, RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR); 8893bf215546Sopenharmony_ci VkResult result; 8894bf215546Sopenharmony_ci /* (normal + resolve) for color attachments and ds and a VRS attachment */ 8895bf215546Sopenharmony_ci VkAttachmentDescription2 att_desc[MAX_RTS * 2 + 3]; 8896bf215546Sopenharmony_ci VkAttachmentDescriptionStencilLayout ds_stencil_att, ds_stencil_resolve_att; 8897bf215546Sopenharmony_ci VkImageView iviews[MAX_RTS * 2 + 3]; 8898bf215546Sopenharmony_ci VkAttachmentReference2 color_refs[MAX_RTS], color_resolve_refs[MAX_RTS]; 8899bf215546Sopenharmony_ci VkAttachmentReference2 ds_ref, ds_resolve_ref, vrs_ref; 8900bf215546Sopenharmony_ci VkAttachmentReferenceStencilLayout ds_stencil_ref, ds_stencil_resolve_ref; 8901bf215546Sopenharmony_ci VkSubpassDescriptionDepthStencilResolve ds_resolve_info; 8902bf215546Sopenharmony_ci VkFragmentShadingRateAttachmentInfoKHR vrs_subpass_info; 8903bf215546Sopenharmony_ci VkClearValue clear_values[MAX_RTS * 2 + 3]; 8904bf215546Sopenharmony_ci unsigned att_count = 0; 8905bf215546Sopenharmony_ci 8906bf215546Sopenharmony_ci VkSubpassDescription2 subpass = { 8907bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2, 8908bf215546Sopenharmony_ci .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS, 8909bf215546Sopenharmony_ci .viewMask = pRenderingInfo->viewMask, 8910bf215546Sopenharmony_ci .colorAttachmentCount = pRenderingInfo->colorAttachmentCount, 8911bf215546Sopenharmony_ci .pColorAttachments = color_refs, 8912bf215546Sopenharmony_ci .pResolveAttachments = color_resolve_refs, 8913bf215546Sopenharmony_ci }; 8914bf215546Sopenharmony_ci 8915bf215546Sopenharmony_ci for (unsigned i = 0; i < pRenderingInfo->colorAttachmentCount; ++i) { 8916bf215546Sopenharmony_ci color_refs[i] = (VkAttachmentReference2){ 8917bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 8918bf215546Sopenharmony_ci .attachment = VK_ATTACHMENT_UNUSED, 8919bf215546Sopenharmony_ci }; 8920bf215546Sopenharmony_ci color_resolve_refs[i] = (VkAttachmentReference2){ 8921bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 8922bf215546Sopenharmony_ci .attachment = VK_ATTACHMENT_UNUSED, 8923bf215546Sopenharmony_ci }; 8924bf215546Sopenharmony_ci 8925bf215546Sopenharmony_ci if (pRenderingInfo->pColorAttachments[i].imageView == VK_NULL_HANDLE) 8926bf215546Sopenharmony_ci continue; 8927bf215546Sopenharmony_ci 8928bf215546Sopenharmony_ci const VkRenderingAttachmentInfo *info = &pRenderingInfo->pColorAttachments[i]; 8929bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_image_view, iview, info->imageView); 8930bf215546Sopenharmony_ci color_refs[i] = (VkAttachmentReference2){.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 8931bf215546Sopenharmony_ci .attachment = att_count, 8932bf215546Sopenharmony_ci .layout = info->imageLayout, 8933bf215546Sopenharmony_ci .aspectMask = iview->vk.aspects}; 8934bf215546Sopenharmony_ci 8935bf215546Sopenharmony_ci iviews[att_count] = info->imageView; 8936bf215546Sopenharmony_ci clear_values[att_count] = info->clearValue; 8937bf215546Sopenharmony_ci VkAttachmentDescription2 *att = att_desc + att_count++; 8938bf215546Sopenharmony_ci 8939bf215546Sopenharmony_ci memset(att, 0, sizeof(*att)); 8940bf215546Sopenharmony_ci att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; 8941bf215546Sopenharmony_ci att->format = iview->vk.format; 8942bf215546Sopenharmony_ci att->samples = iview->image->info.samples; 8943bf215546Sopenharmony_ci att->loadOp = info->loadOp; 8944bf215546Sopenharmony_ci att->storeOp = info->storeOp; 8945bf215546Sopenharmony_ci att->initialLayout = info->imageLayout; 8946bf215546Sopenharmony_ci att->finalLayout = info->imageLayout; 8947bf215546Sopenharmony_ci 8948bf215546Sopenharmony_ci if (pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT) 8949bf215546Sopenharmony_ci att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 8950bf215546Sopenharmony_ci 8951bf215546Sopenharmony_ci if (pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT) 8952bf215546Sopenharmony_ci att->storeOp = VK_ATTACHMENT_STORE_OP_STORE; 8953bf215546Sopenharmony_ci 8954bf215546Sopenharmony_ci if (info->resolveMode != VK_RESOLVE_MODE_NONE && 8955bf215546Sopenharmony_ci !(pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT)) { 8956bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_image_view, resolve_iview, info->resolveImageView); 8957bf215546Sopenharmony_ci color_resolve_refs[i] = 8958bf215546Sopenharmony_ci (VkAttachmentReference2){.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 8959bf215546Sopenharmony_ci .attachment = att_count, 8960bf215546Sopenharmony_ci .layout = info->resolveImageLayout, 8961bf215546Sopenharmony_ci .aspectMask = resolve_iview->vk.aspects}; 8962bf215546Sopenharmony_ci 8963bf215546Sopenharmony_ci iviews[att_count] = info->resolveImageView; 8964bf215546Sopenharmony_ci att = att_desc + att_count++; 8965bf215546Sopenharmony_ci 8966bf215546Sopenharmony_ci memset(att, 0, sizeof(*att)); 8967bf215546Sopenharmony_ci att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; 8968bf215546Sopenharmony_ci att->format = resolve_iview->vk.format; 8969bf215546Sopenharmony_ci att->samples = resolve_iview->image->info.samples; 8970bf215546Sopenharmony_ci att->loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; 8971bf215546Sopenharmony_ci att->storeOp = VK_ATTACHMENT_STORE_OP_STORE; 8972bf215546Sopenharmony_ci att->initialLayout = info->resolveImageLayout; 8973bf215546Sopenharmony_ci att->finalLayout = info->resolveImageLayout; 8974bf215546Sopenharmony_ci } 8975bf215546Sopenharmony_ci } 8976bf215546Sopenharmony_ci 8977bf215546Sopenharmony_ci if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) { 8978bf215546Sopenharmony_ci const VkRenderingAttachmentInfo *common_info = pRenderingInfo->pDepthAttachment 8979bf215546Sopenharmony_ci ? pRenderingInfo->pDepthAttachment 8980bf215546Sopenharmony_ci : pRenderingInfo->pStencilAttachment; 8981bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_image_view, iview, common_info->imageView); 8982bf215546Sopenharmony_ci 8983bf215546Sopenharmony_ci if (common_info->imageView != VK_NULL_HANDLE) { 8984bf215546Sopenharmony_ci ds_ref = (VkAttachmentReference2){ 8985bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 8986bf215546Sopenharmony_ci .attachment = att_count, 8987bf215546Sopenharmony_ci .layout = common_info->imageLayout, 8988bf215546Sopenharmony_ci .aspectMask = (pRenderingInfo->pDepthAttachment ? VK_IMAGE_ASPECT_DEPTH_BIT : 0) | 8989bf215546Sopenharmony_ci (pRenderingInfo->pStencilAttachment ? VK_IMAGE_ASPECT_STENCIL_BIT : 0)}; 8990bf215546Sopenharmony_ci subpass.pDepthStencilAttachment = &ds_ref; 8991bf215546Sopenharmony_ci 8992bf215546Sopenharmony_ci iviews[att_count] = common_info->imageView; 8993bf215546Sopenharmony_ci if (pRenderingInfo->pDepthAttachment) 8994bf215546Sopenharmony_ci clear_values[att_count].depthStencil.depth = 8995bf215546Sopenharmony_ci pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth; 8996bf215546Sopenharmony_ci if (pRenderingInfo->pStencilAttachment) 8997bf215546Sopenharmony_ci clear_values[att_count].depthStencil.stencil = 8998bf215546Sopenharmony_ci pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil; 8999bf215546Sopenharmony_ci VkAttachmentDescription2 *att = att_desc + att_count++; 9000bf215546Sopenharmony_ci 9001bf215546Sopenharmony_ci memset(att, 0, sizeof(*att)); 9002bf215546Sopenharmony_ci att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; 9003bf215546Sopenharmony_ci att->format = iview->vk.format; 9004bf215546Sopenharmony_ci att->samples = iview->image->info.samples; 9005bf215546Sopenharmony_ci 9006bf215546Sopenharmony_ci if (pRenderingInfo->pDepthAttachment) { 9007bf215546Sopenharmony_ci att->loadOp = pRenderingInfo->pDepthAttachment->loadOp; 9008bf215546Sopenharmony_ci att->storeOp = pRenderingInfo->pDepthAttachment->storeOp; 9009bf215546Sopenharmony_ci } else { 9010bf215546Sopenharmony_ci att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 9011bf215546Sopenharmony_ci att->storeOp = VK_ATTACHMENT_STORE_OP_STORE; 9012bf215546Sopenharmony_ci } 9013bf215546Sopenharmony_ci 9014bf215546Sopenharmony_ci if (pRenderingInfo->pStencilAttachment) { 9015bf215546Sopenharmony_ci att->stencilLoadOp = pRenderingInfo->pStencilAttachment->loadOp; 9016bf215546Sopenharmony_ci att->stencilStoreOp = pRenderingInfo->pStencilAttachment->storeOp; 9017bf215546Sopenharmony_ci } else { 9018bf215546Sopenharmony_ci att->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 9019bf215546Sopenharmony_ci att->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; 9020bf215546Sopenharmony_ci } 9021bf215546Sopenharmony_ci 9022bf215546Sopenharmony_ci if (pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT) { 9023bf215546Sopenharmony_ci att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 9024bf215546Sopenharmony_ci att->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 9025bf215546Sopenharmony_ci } 9026bf215546Sopenharmony_ci 9027bf215546Sopenharmony_ci if (pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT) { 9028bf215546Sopenharmony_ci att->storeOp = VK_ATTACHMENT_STORE_OP_STORE; 9029bf215546Sopenharmony_ci att->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; 9030bf215546Sopenharmony_ci } 9031bf215546Sopenharmony_ci 9032bf215546Sopenharmony_ci att->initialLayout = common_info->imageLayout; 9033bf215546Sopenharmony_ci att->finalLayout = common_info->imageLayout; 9034bf215546Sopenharmony_ci 9035bf215546Sopenharmony_ci if (pRenderingInfo->pDepthAttachment && pRenderingInfo->pStencilAttachment) { 9036bf215546Sopenharmony_ci ds_ref.pNext = &ds_stencil_ref; 9037bf215546Sopenharmony_ci ds_stencil_ref = (VkAttachmentReferenceStencilLayout){ 9038bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_STENCIL_LAYOUT, 9039bf215546Sopenharmony_ci .stencilLayout = pRenderingInfo->pStencilAttachment->imageLayout}; 9040bf215546Sopenharmony_ci 9041bf215546Sopenharmony_ci att->pNext = &ds_stencil_att; 9042bf215546Sopenharmony_ci ds_stencil_att = (VkAttachmentDescriptionStencilLayout){ 9043bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT, 9044bf215546Sopenharmony_ci .stencilInitialLayout = pRenderingInfo->pStencilAttachment->imageLayout, 9045bf215546Sopenharmony_ci .stencilFinalLayout = pRenderingInfo->pStencilAttachment->imageLayout, 9046bf215546Sopenharmony_ci }; 9047bf215546Sopenharmony_ci } 9048bf215546Sopenharmony_ci 9049bf215546Sopenharmony_ci if (((pRenderingInfo->pDepthAttachment && 9050bf215546Sopenharmony_ci pRenderingInfo->pDepthAttachment->resolveMode != VK_RESOLVE_MODE_NONE) || 9051bf215546Sopenharmony_ci (pRenderingInfo->pStencilAttachment && 9052bf215546Sopenharmony_ci pRenderingInfo->pStencilAttachment->resolveMode != VK_RESOLVE_MODE_NONE)) && 9053bf215546Sopenharmony_ci !(pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT)) { 9054bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_image_view, resolve_iview, common_info->resolveImageView); 9055bf215546Sopenharmony_ci ds_resolve_ref = 9056bf215546Sopenharmony_ci (VkAttachmentReference2){.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 9057bf215546Sopenharmony_ci .attachment = att_count, 9058bf215546Sopenharmony_ci .layout = common_info->resolveImageLayout, 9059bf215546Sopenharmony_ci .aspectMask = resolve_iview->vk.aspects}; 9060bf215546Sopenharmony_ci 9061bf215546Sopenharmony_ci iviews[att_count] = common_info->resolveImageView; 9062bf215546Sopenharmony_ci att = att_desc + att_count++; 9063bf215546Sopenharmony_ci 9064bf215546Sopenharmony_ci memset(att, 0, sizeof(*att)); 9065bf215546Sopenharmony_ci att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; 9066bf215546Sopenharmony_ci att->format = resolve_iview->vk.format; 9067bf215546Sopenharmony_ci att->samples = resolve_iview->image->info.samples; 9068bf215546Sopenharmony_ci att->loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; 9069bf215546Sopenharmony_ci att->storeOp = VK_ATTACHMENT_STORE_OP_STORE; 9070bf215546Sopenharmony_ci att->initialLayout = common_info->resolveImageLayout; 9071bf215546Sopenharmony_ci att->finalLayout = common_info->resolveImageLayout; 9072bf215546Sopenharmony_ci 9073bf215546Sopenharmony_ci ds_resolve_info = (VkSubpassDescriptionDepthStencilResolve){ 9074bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE, 9075bf215546Sopenharmony_ci .pNext = subpass.pNext, 9076bf215546Sopenharmony_ci .depthResolveMode = 9077bf215546Sopenharmony_ci (pRenderingInfo->pDepthAttachment && 9078bf215546Sopenharmony_ci pRenderingInfo->pDepthAttachment->resolveMode != VK_RESOLVE_MODE_NONE) 9079bf215546Sopenharmony_ci ? pRenderingInfo->pDepthAttachment->resolveMode 9080bf215546Sopenharmony_ci : VK_RESOLVE_MODE_NONE, 9081bf215546Sopenharmony_ci .stencilResolveMode = 9082bf215546Sopenharmony_ci (pRenderingInfo->pStencilAttachment && 9083bf215546Sopenharmony_ci pRenderingInfo->pStencilAttachment->resolveMode != VK_RESOLVE_MODE_NONE) 9084bf215546Sopenharmony_ci ? pRenderingInfo->pStencilAttachment->resolveMode 9085bf215546Sopenharmony_ci : VK_RESOLVE_MODE_NONE, 9086bf215546Sopenharmony_ci .pDepthStencilResolveAttachment = &ds_resolve_ref}; 9087bf215546Sopenharmony_ci subpass.pNext = &ds_resolve_info; 9088bf215546Sopenharmony_ci 9089bf215546Sopenharmony_ci if (pRenderingInfo->pDepthAttachment && pRenderingInfo->pStencilAttachment && 9090bf215546Sopenharmony_ci pRenderingInfo->pDepthAttachment->resolveMode != VK_RESOLVE_MODE_NONE && 9091bf215546Sopenharmony_ci pRenderingInfo->pStencilAttachment->resolveMode != VK_RESOLVE_MODE_NONE) { 9092bf215546Sopenharmony_ci ds_resolve_ref.pNext = &ds_stencil_resolve_ref; 9093bf215546Sopenharmony_ci ds_stencil_resolve_ref = (VkAttachmentReferenceStencilLayout){ 9094bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_STENCIL_LAYOUT, 9095bf215546Sopenharmony_ci .stencilLayout = pRenderingInfo->pStencilAttachment->resolveImageLayout}; 9096bf215546Sopenharmony_ci 9097bf215546Sopenharmony_ci att->pNext = &ds_stencil_resolve_att; 9098bf215546Sopenharmony_ci ds_stencil_resolve_att = (VkAttachmentDescriptionStencilLayout){ 9099bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT, 9100bf215546Sopenharmony_ci .stencilInitialLayout = pRenderingInfo->pStencilAttachment->resolveImageLayout, 9101bf215546Sopenharmony_ci .stencilFinalLayout = pRenderingInfo->pStencilAttachment->resolveImageLayout, 9102bf215546Sopenharmony_ci }; 9103bf215546Sopenharmony_ci } 9104bf215546Sopenharmony_ci } 9105bf215546Sopenharmony_ci } 9106bf215546Sopenharmony_ci } 9107bf215546Sopenharmony_ci 9108bf215546Sopenharmony_ci if (vrs_info && vrs_info->imageView) { 9109bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_image_view, iview, vrs_info->imageView); 9110bf215546Sopenharmony_ci vrs_ref = (VkAttachmentReference2){.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2, 9111bf215546Sopenharmony_ci .attachment = att_count, 9112bf215546Sopenharmony_ci .layout = vrs_info->imageLayout, 9113bf215546Sopenharmony_ci .aspectMask = iview->vk.aspects}; 9114bf215546Sopenharmony_ci 9115bf215546Sopenharmony_ci iviews[att_count] = vrs_info->imageView; 9116bf215546Sopenharmony_ci VkAttachmentDescription2 *att = att_desc + att_count++; 9117bf215546Sopenharmony_ci 9118bf215546Sopenharmony_ci memset(att, 0, sizeof(*att)); 9119bf215546Sopenharmony_ci att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2; 9120bf215546Sopenharmony_ci att->format = iview->vk.format; 9121bf215546Sopenharmony_ci att->samples = iview->image->info.samples; 9122bf215546Sopenharmony_ci att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; 9123bf215546Sopenharmony_ci att->storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; 9124bf215546Sopenharmony_ci att->initialLayout = vrs_info->imageLayout; 9125bf215546Sopenharmony_ci att->finalLayout = vrs_info->imageLayout; 9126bf215546Sopenharmony_ci 9127bf215546Sopenharmony_ci vrs_subpass_info = (VkFragmentShadingRateAttachmentInfoKHR){ 9128bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR, 9129bf215546Sopenharmony_ci .pNext = subpass.pNext, 9130bf215546Sopenharmony_ci .pFragmentShadingRateAttachment = &vrs_ref, 9131bf215546Sopenharmony_ci .shadingRateAttachmentTexelSize = vrs_info->shadingRateAttachmentTexelSize, 9132bf215546Sopenharmony_ci }; 9133bf215546Sopenharmony_ci subpass.pNext = &vrs_subpass_info; 9134bf215546Sopenharmony_ci } 9135bf215546Sopenharmony_ci 9136bf215546Sopenharmony_ci VkRenderPassCreateInfo2 rp_create_info = { 9137bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2, 9138bf215546Sopenharmony_ci .attachmentCount = att_count, 9139bf215546Sopenharmony_ci .pAttachments = att_desc, 9140bf215546Sopenharmony_ci .subpassCount = 1, 9141bf215546Sopenharmony_ci .pSubpasses = &subpass, 9142bf215546Sopenharmony_ci }; 9143bf215546Sopenharmony_ci 9144bf215546Sopenharmony_ci VkRenderPass rp; 9145bf215546Sopenharmony_ci result = 9146bf215546Sopenharmony_ci radv_CreateRenderPass2(radv_device_to_handle(cmd_buffer->device), &rp_create_info, NULL, &rp); 9147bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 9148bf215546Sopenharmony_ci cmd_buffer->record_result = result; 9149bf215546Sopenharmony_ci return; 9150bf215546Sopenharmony_ci } 9151bf215546Sopenharmony_ci 9152bf215546Sopenharmony_ci unsigned w = pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width; 9153bf215546Sopenharmony_ci unsigned h = pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height; 9154bf215546Sopenharmony_ci for (unsigned i = 0; i < att_count; ++i) { 9155bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_image_view, iview, iviews[i]); 9156bf215546Sopenharmony_ci 9157bf215546Sopenharmony_ci if (vrs_info && vrs_info->imageView == iviews[i]) 9158bf215546Sopenharmony_ci continue; 9159bf215546Sopenharmony_ci 9160bf215546Sopenharmony_ci w = MIN2(w, iview->extent.width); 9161bf215546Sopenharmony_ci h = MIN2(h, iview->extent.height); 9162bf215546Sopenharmony_ci } 9163bf215546Sopenharmony_ci VkFramebufferCreateInfo fb_create_info = { 9164bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, 9165bf215546Sopenharmony_ci .renderPass = rp, 9166bf215546Sopenharmony_ci .attachmentCount = att_count, 9167bf215546Sopenharmony_ci .pAttachments = iviews, 9168bf215546Sopenharmony_ci .width = w, 9169bf215546Sopenharmony_ci .height = h, 9170bf215546Sopenharmony_ci .layers = pRenderingInfo->layerCount, 9171bf215546Sopenharmony_ci }; 9172bf215546Sopenharmony_ci 9173bf215546Sopenharmony_ci VkFramebuffer fb; 9174bf215546Sopenharmony_ci result = 9175bf215546Sopenharmony_ci vk_common_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device), &fb_create_info, NULL, &fb); 9176bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 9177bf215546Sopenharmony_ci radv_DestroyRenderPass(radv_device_to_handle(cmd_buffer->device), rp, NULL); 9178bf215546Sopenharmony_ci cmd_buffer->record_result = result; 9179bf215546Sopenharmony_ci return; 9180bf215546Sopenharmony_ci } 9181bf215546Sopenharmony_ci 9182bf215546Sopenharmony_ci VkRenderPassBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, 9183bf215546Sopenharmony_ci .renderPass = rp, 9184bf215546Sopenharmony_ci .framebuffer = fb, 9185bf215546Sopenharmony_ci .renderArea = pRenderingInfo->renderArea, 9186bf215546Sopenharmony_ci .clearValueCount = att_count, 9187bf215546Sopenharmony_ci .pClearValues = clear_values}; 9188bf215546Sopenharmony_ci 9189bf215546Sopenharmony_ci const VkSubpassBeginInfo pass_begin_info = { 9190bf215546Sopenharmony_ci .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO, 9191bf215546Sopenharmony_ci .contents = (pRenderingInfo->flags & VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT) 9192bf215546Sopenharmony_ci ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS 9193bf215546Sopenharmony_ci : VK_SUBPASS_CONTENTS_INLINE, 9194bf215546Sopenharmony_ci }; 9195bf215546Sopenharmony_ci 9196bf215546Sopenharmony_ci radv_CmdBeginRenderPass2(commandBuffer, &begin_info, &pass_begin_info); 9197bf215546Sopenharmony_ci} 9198bf215546Sopenharmony_ci 9199bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 9200bf215546Sopenharmony_ciradv_CmdEndRendering(VkCommandBuffer commandBuffer) 9201bf215546Sopenharmony_ci{ 9202bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 9203bf215546Sopenharmony_ci struct radv_render_pass *pass = cmd_buffer->state.pass; 9204bf215546Sopenharmony_ci struct vk_framebuffer *framebuffer = cmd_buffer->state.framebuffer; 9205bf215546Sopenharmony_ci 9206bf215546Sopenharmony_ci radv_CmdEndRenderPass2(commandBuffer, NULL); 9207bf215546Sopenharmony_ci 9208bf215546Sopenharmony_ci vk_common_DestroyFramebuffer(radv_device_to_handle(cmd_buffer->device), 9209bf215546Sopenharmony_ci vk_framebuffer_to_handle(framebuffer), NULL); 9210bf215546Sopenharmony_ci radv_DestroyRenderPass(radv_device_to_handle(cmd_buffer->device), 9211bf215546Sopenharmony_ci radv_render_pass_to_handle(pass), NULL); 9212bf215546Sopenharmony_ci} 9213bf215546Sopenharmony_ci 9214bf215546Sopenharmony_ci/* 9215bf215546Sopenharmony_ci * For HTILE we have the following interesting clear words: 9216bf215546Sopenharmony_ci * 0xfffff30f: Uncompressed, full depth range, for depth+stencil HTILE 9217bf215546Sopenharmony_ci * 0xfffc000f: Uncompressed, full depth range, for depth only HTILE. 9218bf215546Sopenharmony_ci * 0xfffffff0: Clear depth to 1.0 9219bf215546Sopenharmony_ci * 0x00000000: Clear depth to 0.0 9220bf215546Sopenharmony_ci */ 9221bf215546Sopenharmony_cistatic void 9222bf215546Sopenharmony_ciradv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 9223bf215546Sopenharmony_ci const VkImageSubresourceRange *range) 9224bf215546Sopenharmony_ci{ 9225bf215546Sopenharmony_ci struct radv_cmd_state *state = &cmd_buffer->state; 9226bf215546Sopenharmony_ci uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image); 9227bf215546Sopenharmony_ci VkClearDepthStencilValue value = {0}; 9228bf215546Sopenharmony_ci struct radv_barrier_data barrier = {0}; 9229bf215546Sopenharmony_ci 9230bf215546Sopenharmony_ci barrier.layout_transitions.init_mask_ram = 1; 9231bf215546Sopenharmony_ci radv_describe_layout_transition(cmd_buffer, &barrier); 9232bf215546Sopenharmony_ci 9233bf215546Sopenharmony_ci /* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent 9234bf215546Sopenharmony_ci * in considering previous rendering work for WAW hazards. */ 9235bf215546Sopenharmony_ci state->flush_bits |= 9236bf215546Sopenharmony_ci radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image); 9237bf215546Sopenharmony_ci 9238bf215546Sopenharmony_ci if (image->planes[0].surface.has_stencil && 9239bf215546Sopenharmony_ci !(range->aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) { 9240bf215546Sopenharmony_ci /* Flush caches before performing a separate aspect initialization because it's a 9241bf215546Sopenharmony_ci * read-modify-write operation. 9242bf215546Sopenharmony_ci */ 9243bf215546Sopenharmony_ci state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, image); 9244bf215546Sopenharmony_ci } 9245bf215546Sopenharmony_ci 9246bf215546Sopenharmony_ci state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value); 9247bf215546Sopenharmony_ci 9248bf215546Sopenharmony_ci radv_set_ds_clear_metadata(cmd_buffer, image, range, value, range->aspectMask); 9249bf215546Sopenharmony_ci 9250bf215546Sopenharmony_ci if (radv_image_is_tc_compat_htile(image) && (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) { 9251bf215546Sopenharmony_ci /* Initialize the TC-compat metada value to 0 because by 9252bf215546Sopenharmony_ci * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only 9253bf215546Sopenharmony_ci * need have to conditionally update its value when performing 9254bf215546Sopenharmony_ci * a fast depth clear. 9255bf215546Sopenharmony_ci */ 9256bf215546Sopenharmony_ci radv_set_tc_compat_zrange_metadata(cmd_buffer, image, range, 0); 9257bf215546Sopenharmony_ci } 9258bf215546Sopenharmony_ci} 9259bf215546Sopenharmony_ci 9260bf215546Sopenharmony_cistatic void 9261bf215546Sopenharmony_ciradv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 9262bf215546Sopenharmony_ci VkImageLayout src_layout, bool src_render_loop, 9263bf215546Sopenharmony_ci VkImageLayout dst_layout, bool dst_render_loop, 9264bf215546Sopenharmony_ci unsigned src_queue_mask, unsigned dst_queue_mask, 9265bf215546Sopenharmony_ci const VkImageSubresourceRange *range, 9266bf215546Sopenharmony_ci struct radv_sample_locations_state *sample_locs) 9267bf215546Sopenharmony_ci{ 9268bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 9269bf215546Sopenharmony_ci 9270bf215546Sopenharmony_ci if (!radv_htile_enabled(image, range->baseMipLevel)) 9271bf215546Sopenharmony_ci return; 9272bf215546Sopenharmony_ci 9273bf215546Sopenharmony_ci if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { 9274bf215546Sopenharmony_ci radv_initialize_htile(cmd_buffer, image, range); 9275bf215546Sopenharmony_ci } else if (!radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop, 9276bf215546Sopenharmony_ci src_queue_mask) && 9277bf215546Sopenharmony_ci radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop, 9278bf215546Sopenharmony_ci dst_queue_mask)) { 9279bf215546Sopenharmony_ci radv_initialize_htile(cmd_buffer, image, range); 9280bf215546Sopenharmony_ci } else if (radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop, 9281bf215546Sopenharmony_ci src_queue_mask) && 9282bf215546Sopenharmony_ci !radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop, 9283bf215546Sopenharmony_ci dst_queue_mask)) { 9284bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= 9285bf215546Sopenharmony_ci RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 9286bf215546Sopenharmony_ci 9287bf215546Sopenharmony_ci radv_expand_depth_stencil(cmd_buffer, image, range, sample_locs); 9288bf215546Sopenharmony_ci 9289bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= 9290bf215546Sopenharmony_ci RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META; 9291bf215546Sopenharmony_ci } 9292bf215546Sopenharmony_ci} 9293bf215546Sopenharmony_ci 9294bf215546Sopenharmony_cistatic uint32_t 9295bf215546Sopenharmony_ciradv_init_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 9296bf215546Sopenharmony_ci const VkImageSubresourceRange *range, uint32_t value) 9297bf215546Sopenharmony_ci{ 9298bf215546Sopenharmony_ci struct radv_barrier_data barrier = {0}; 9299bf215546Sopenharmony_ci 9300bf215546Sopenharmony_ci barrier.layout_transitions.init_mask_ram = 1; 9301bf215546Sopenharmony_ci radv_describe_layout_transition(cmd_buffer, &barrier); 9302bf215546Sopenharmony_ci 9303bf215546Sopenharmony_ci return radv_clear_cmask(cmd_buffer, image, range, value); 9304bf215546Sopenharmony_ci} 9305bf215546Sopenharmony_ci 9306bf215546Sopenharmony_ciuint32_t 9307bf215546Sopenharmony_ciradv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 9308bf215546Sopenharmony_ci const VkImageSubresourceRange *range) 9309bf215546Sopenharmony_ci{ 9310bf215546Sopenharmony_ci static const uint32_t fmask_clear_values[4] = {0x00000000, 0x02020202, 0xE4E4E4E4, 0x76543210}; 9311bf215546Sopenharmony_ci uint32_t log2_samples = util_logbase2(image->info.samples); 9312bf215546Sopenharmony_ci uint32_t value = fmask_clear_values[log2_samples]; 9313bf215546Sopenharmony_ci struct radv_barrier_data barrier = {0}; 9314bf215546Sopenharmony_ci 9315bf215546Sopenharmony_ci barrier.layout_transitions.init_mask_ram = 1; 9316bf215546Sopenharmony_ci radv_describe_layout_transition(cmd_buffer, &barrier); 9317bf215546Sopenharmony_ci 9318bf215546Sopenharmony_ci return radv_clear_fmask(cmd_buffer, image, range, value); 9319bf215546Sopenharmony_ci} 9320bf215546Sopenharmony_ci 9321bf215546Sopenharmony_ciuint32_t 9322bf215546Sopenharmony_ciradv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 9323bf215546Sopenharmony_ci const VkImageSubresourceRange *range, uint32_t value) 9324bf215546Sopenharmony_ci{ 9325bf215546Sopenharmony_ci struct radv_barrier_data barrier = {0}; 9326bf215546Sopenharmony_ci uint32_t flush_bits = 0; 9327bf215546Sopenharmony_ci unsigned size = 0; 9328bf215546Sopenharmony_ci 9329bf215546Sopenharmony_ci barrier.layout_transitions.init_mask_ram = 1; 9330bf215546Sopenharmony_ci radv_describe_layout_transition(cmd_buffer, &barrier); 9331bf215546Sopenharmony_ci 9332bf215546Sopenharmony_ci flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value); 9333bf215546Sopenharmony_ci 9334bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX8) { 9335bf215546Sopenharmony_ci /* When DCC is enabled with mipmaps, some levels might not 9336bf215546Sopenharmony_ci * support fast clears and we have to initialize them as "fully 9337bf215546Sopenharmony_ci * expanded". 9338bf215546Sopenharmony_ci */ 9339bf215546Sopenharmony_ci /* Compute the size of all fast clearable DCC levels. */ 9340bf215546Sopenharmony_ci for (unsigned i = 0; i < image->planes[0].surface.num_meta_levels; i++) { 9341bf215546Sopenharmony_ci struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.color.dcc_level[i]; 9342bf215546Sopenharmony_ci unsigned dcc_fast_clear_size = 9343bf215546Sopenharmony_ci dcc_level->dcc_slice_fast_clear_size * image->info.array_size; 9344bf215546Sopenharmony_ci 9345bf215546Sopenharmony_ci if (!dcc_fast_clear_size) 9346bf215546Sopenharmony_ci break; 9347bf215546Sopenharmony_ci 9348bf215546Sopenharmony_ci size = dcc_level->dcc_offset + dcc_fast_clear_size; 9349bf215546Sopenharmony_ci } 9350bf215546Sopenharmony_ci 9351bf215546Sopenharmony_ci /* Initialize the mipmap levels without DCC. */ 9352bf215546Sopenharmony_ci if (size != image->planes[0].surface.meta_size) { 9353bf215546Sopenharmony_ci flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, 9354bf215546Sopenharmony_ci radv_buffer_get_va(image->bindings[0].bo) + 9355bf215546Sopenharmony_ci image->bindings[0].offset + 9356bf215546Sopenharmony_ci image->planes[0].surface.meta_offset + size, 9357bf215546Sopenharmony_ci image->planes[0].surface.meta_size - size, 0xffffffff); 9358bf215546Sopenharmony_ci } 9359bf215546Sopenharmony_ci } 9360bf215546Sopenharmony_ci 9361bf215546Sopenharmony_ci return flush_bits; 9362bf215546Sopenharmony_ci} 9363bf215546Sopenharmony_ci 9364bf215546Sopenharmony_ci/** 9365bf215546Sopenharmony_ci * Initialize DCC/FMASK/CMASK metadata for a color image. 9366bf215546Sopenharmony_ci */ 9367bf215546Sopenharmony_cistatic void 9368bf215546Sopenharmony_ciradv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 9369bf215546Sopenharmony_ci VkImageLayout src_layout, bool src_render_loop, 9370bf215546Sopenharmony_ci VkImageLayout dst_layout, bool dst_render_loop, 9371bf215546Sopenharmony_ci unsigned src_queue_mask, unsigned dst_queue_mask, 9372bf215546Sopenharmony_ci const VkImageSubresourceRange *range) 9373bf215546Sopenharmony_ci{ 9374bf215546Sopenharmony_ci uint32_t flush_bits = 0; 9375bf215546Sopenharmony_ci 9376bf215546Sopenharmony_ci /* Transitioning from LAYOUT_UNDEFINED layout not everyone is 9377bf215546Sopenharmony_ci * consistent in considering previous rendering work for WAW hazards. 9378bf215546Sopenharmony_ci */ 9379bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= 9380bf215546Sopenharmony_ci radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image); 9381bf215546Sopenharmony_ci 9382bf215546Sopenharmony_ci if (radv_image_has_cmask(image)) { 9383bf215546Sopenharmony_ci uint32_t value; 9384bf215546Sopenharmony_ci 9385bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) { 9386bf215546Sopenharmony_ci /* TODO: Fix clearing CMASK layers on GFX9. */ 9387bf215546Sopenharmony_ci if (radv_image_is_tc_compat_cmask(image) || 9388bf215546Sopenharmony_ci (radv_image_has_fmask(image) && 9389bf215546Sopenharmony_ci radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, dst_layout, 9390bf215546Sopenharmony_ci dst_render_loop, dst_queue_mask))) { 9391bf215546Sopenharmony_ci value = 0xccccccccu; 9392bf215546Sopenharmony_ci } else { 9393bf215546Sopenharmony_ci value = 0xffffffffu; 9394bf215546Sopenharmony_ci } 9395bf215546Sopenharmony_ci } else { 9396bf215546Sopenharmony_ci static const uint32_t cmask_clear_values[4] = {0xffffffff, 0xdddddddd, 0xeeeeeeee, 0xffffffff}; 9397bf215546Sopenharmony_ci uint32_t log2_samples = util_logbase2(image->info.samples); 9398bf215546Sopenharmony_ci 9399bf215546Sopenharmony_ci value = cmask_clear_values[log2_samples]; 9400bf215546Sopenharmony_ci } 9401bf215546Sopenharmony_ci 9402bf215546Sopenharmony_ci flush_bits |= radv_init_cmask(cmd_buffer, image, range, value); 9403bf215546Sopenharmony_ci } 9404bf215546Sopenharmony_ci 9405bf215546Sopenharmony_ci if (radv_image_has_fmask(image)) { 9406bf215546Sopenharmony_ci flush_bits |= radv_init_fmask(cmd_buffer, image, range); 9407bf215546Sopenharmony_ci } 9408bf215546Sopenharmony_ci 9409bf215546Sopenharmony_ci if (radv_dcc_enabled(image, range->baseMipLevel)) { 9410bf215546Sopenharmony_ci uint32_t value = 0xffffffffu; /* Fully expanded mode. */ 9411bf215546Sopenharmony_ci 9412bf215546Sopenharmony_ci if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, 9413bf215546Sopenharmony_ci dst_layout, dst_render_loop, dst_queue_mask)) { 9414bf215546Sopenharmony_ci value = 0u; 9415bf215546Sopenharmony_ci } 9416bf215546Sopenharmony_ci 9417bf215546Sopenharmony_ci flush_bits |= radv_init_dcc(cmd_buffer, image, range, value); 9418bf215546Sopenharmony_ci } 9419bf215546Sopenharmony_ci 9420bf215546Sopenharmony_ci if (radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel)) { 9421bf215546Sopenharmony_ci radv_update_fce_metadata(cmd_buffer, image, range, false); 9422bf215546Sopenharmony_ci 9423bf215546Sopenharmony_ci uint32_t color_values[2] = {0}; 9424bf215546Sopenharmony_ci radv_set_color_clear_metadata(cmd_buffer, image, range, color_values); 9425bf215546Sopenharmony_ci } 9426bf215546Sopenharmony_ci 9427bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= flush_bits; 9428bf215546Sopenharmony_ci} 9429bf215546Sopenharmony_ci 9430bf215546Sopenharmony_cistatic void 9431bf215546Sopenharmony_ciradv_retile_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 9432bf215546Sopenharmony_ci VkImageLayout src_layout, VkImageLayout dst_layout, unsigned dst_queue_mask) 9433bf215546Sopenharmony_ci{ 9434bf215546Sopenharmony_ci /* If the image is read-only, we don't have to retile DCC because it can't change. */ 9435bf215546Sopenharmony_ci if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS)) 9436bf215546Sopenharmony_ci return; 9437bf215546Sopenharmony_ci 9438bf215546Sopenharmony_ci if (src_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR && 9439bf215546Sopenharmony_ci (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR || 9440bf215546Sopenharmony_ci (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN)))) 9441bf215546Sopenharmony_ci radv_retile_dcc(cmd_buffer, image); 9442bf215546Sopenharmony_ci} 9443bf215546Sopenharmony_ci 9444bf215546Sopenharmony_cistatic bool 9445bf215546Sopenharmony_ciradv_image_need_retile(const struct radv_image *image) 9446bf215546Sopenharmony_ci{ 9447bf215546Sopenharmony_ci return image->planes[0].surface.display_dcc_offset && 9448bf215546Sopenharmony_ci image->planes[0].surface.display_dcc_offset != image->planes[0].surface.meta_offset; 9449bf215546Sopenharmony_ci} 9450bf215546Sopenharmony_ci 9451bf215546Sopenharmony_ci/** 9452bf215546Sopenharmony_ci * Handle color image transitions for DCC/FMASK/CMASK. 9453bf215546Sopenharmony_ci */ 9454bf215546Sopenharmony_cistatic void 9455bf215546Sopenharmony_ciradv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 9456bf215546Sopenharmony_ci VkImageLayout src_layout, bool src_render_loop, 9457bf215546Sopenharmony_ci VkImageLayout dst_layout, bool dst_render_loop, 9458bf215546Sopenharmony_ci unsigned src_queue_mask, unsigned dst_queue_mask, 9459bf215546Sopenharmony_ci const VkImageSubresourceRange *range) 9460bf215546Sopenharmony_ci{ 9461bf215546Sopenharmony_ci bool dcc_decompressed = false, fast_clear_flushed = false; 9462bf215546Sopenharmony_ci 9463bf215546Sopenharmony_ci if (!radv_image_has_cmask(image) && !radv_image_has_fmask(image) && 9464bf215546Sopenharmony_ci !radv_dcc_enabled(image, range->baseMipLevel)) 9465bf215546Sopenharmony_ci return; 9466bf215546Sopenharmony_ci 9467bf215546Sopenharmony_ci if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) { 9468bf215546Sopenharmony_ci radv_init_color_image_metadata(cmd_buffer, image, src_layout, src_render_loop, dst_layout, 9469bf215546Sopenharmony_ci dst_render_loop, src_queue_mask, dst_queue_mask, range); 9470bf215546Sopenharmony_ci 9471bf215546Sopenharmony_ci if (radv_image_need_retile(image)) 9472bf215546Sopenharmony_ci radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask); 9473bf215546Sopenharmony_ci return; 9474bf215546Sopenharmony_ci } 9475bf215546Sopenharmony_ci 9476bf215546Sopenharmony_ci if (radv_dcc_enabled(image, range->baseMipLevel)) { 9477bf215546Sopenharmony_ci if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) { 9478bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, range, 0xffffffffu); 9479bf215546Sopenharmony_ci } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, 9480bf215546Sopenharmony_ci src_layout, src_render_loop, src_queue_mask) && 9481bf215546Sopenharmony_ci !radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, 9482bf215546Sopenharmony_ci dst_layout, dst_render_loop, dst_queue_mask)) { 9483bf215546Sopenharmony_ci radv_decompress_dcc(cmd_buffer, image, range); 9484bf215546Sopenharmony_ci dcc_decompressed = true; 9485bf215546Sopenharmony_ci } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, 9486bf215546Sopenharmony_ci src_layout, src_render_loop, src_queue_mask) && 9487bf215546Sopenharmony_ci !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, 9488bf215546Sopenharmony_ci dst_layout, dst_render_loop, dst_queue_mask)) { 9489bf215546Sopenharmony_ci radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); 9490bf215546Sopenharmony_ci fast_clear_flushed = true; 9491bf215546Sopenharmony_ci } 9492bf215546Sopenharmony_ci 9493bf215546Sopenharmony_ci if (radv_image_need_retile(image)) 9494bf215546Sopenharmony_ci radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask); 9495bf215546Sopenharmony_ci } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) { 9496bf215546Sopenharmony_ci if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, 9497bf215546Sopenharmony_ci src_layout, src_render_loop, src_queue_mask) && 9498bf215546Sopenharmony_ci !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, 9499bf215546Sopenharmony_ci dst_layout, dst_render_loop, dst_queue_mask)) { 9500bf215546Sopenharmony_ci radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); 9501bf215546Sopenharmony_ci fast_clear_flushed = true; 9502bf215546Sopenharmony_ci } 9503bf215546Sopenharmony_ci } 9504bf215546Sopenharmony_ci 9505bf215546Sopenharmony_ci /* MSAA color decompress. */ 9506bf215546Sopenharmony_ci if (radv_image_has_fmask(image) && 9507bf215546Sopenharmony_ci (image->vk.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) && 9508bf215546Sopenharmony_ci radv_layout_fmask_compressed(cmd_buffer->device, image, src_layout, src_queue_mask) && 9509bf215546Sopenharmony_ci !radv_layout_fmask_compressed(cmd_buffer->device, image, dst_layout, dst_queue_mask)) { 9510bf215546Sopenharmony_ci if (radv_dcc_enabled(image, range->baseMipLevel) && 9511bf215546Sopenharmony_ci !radv_image_use_dcc_image_stores(cmd_buffer->device, image) && !dcc_decompressed) { 9512bf215546Sopenharmony_ci /* A DCC decompress is required before expanding FMASK 9513bf215546Sopenharmony_ci * when DCC stores aren't supported to avoid being in 9514bf215546Sopenharmony_ci * a state where DCC is compressed and the main 9515bf215546Sopenharmony_ci * surface is uncompressed. 9516bf215546Sopenharmony_ci */ 9517bf215546Sopenharmony_ci radv_decompress_dcc(cmd_buffer, image, range); 9518bf215546Sopenharmony_ci } else if (!fast_clear_flushed) { 9519bf215546Sopenharmony_ci /* A FMASK decompress is required before expanding 9520bf215546Sopenharmony_ci * FMASK. 9521bf215546Sopenharmony_ci */ 9522bf215546Sopenharmony_ci radv_fast_clear_flush_image_inplace(cmd_buffer, image, range); 9523bf215546Sopenharmony_ci } 9524bf215546Sopenharmony_ci 9525bf215546Sopenharmony_ci struct radv_barrier_data barrier = {0}; 9526bf215546Sopenharmony_ci barrier.layout_transitions.fmask_color_expand = 1; 9527bf215546Sopenharmony_ci radv_describe_layout_transition(cmd_buffer, &barrier); 9528bf215546Sopenharmony_ci 9529bf215546Sopenharmony_ci radv_expand_fmask_image_inplace(cmd_buffer, image, range); 9530bf215546Sopenharmony_ci } 9531bf215546Sopenharmony_ci} 9532bf215546Sopenharmony_ci 9533bf215546Sopenharmony_cistatic void 9534bf215546Sopenharmony_ciradv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 9535bf215546Sopenharmony_ci VkImageLayout src_layout, bool src_render_loop, 9536bf215546Sopenharmony_ci VkImageLayout dst_layout, bool dst_render_loop, uint32_t src_family_index, 9537bf215546Sopenharmony_ci uint32_t dst_family_index, const VkImageSubresourceRange *range, 9538bf215546Sopenharmony_ci struct radv_sample_locations_state *sample_locs) 9539bf215546Sopenharmony_ci{ 9540bf215546Sopenharmony_ci enum radv_queue_family src_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, src_family_index); 9541bf215546Sopenharmony_ci enum radv_queue_family dst_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, dst_family_index); 9542bf215546Sopenharmony_ci if (image->exclusive && src_family_index != dst_family_index) { 9543bf215546Sopenharmony_ci /* This is an acquire or a release operation and there will be 9544bf215546Sopenharmony_ci * a corresponding release/acquire. Do the transition in the 9545bf215546Sopenharmony_ci * most flexible queue. */ 9546bf215546Sopenharmony_ci 9547bf215546Sopenharmony_ci assert(src_qf == cmd_buffer->qf || 9548bf215546Sopenharmony_ci dst_qf == cmd_buffer->qf); 9549bf215546Sopenharmony_ci 9550bf215546Sopenharmony_ci if (src_family_index == VK_QUEUE_FAMILY_EXTERNAL || src_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT) 9551bf215546Sopenharmony_ci return; 9552bf215546Sopenharmony_ci 9553bf215546Sopenharmony_ci if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) 9554bf215546Sopenharmony_ci return; 9555bf215546Sopenharmony_ci 9556bf215546Sopenharmony_ci if (cmd_buffer->qf == RADV_QUEUE_COMPUTE && 9557bf215546Sopenharmony_ci (src_qf == RADV_QUEUE_GENERAL || dst_qf == RADV_QUEUE_GENERAL)) 9558bf215546Sopenharmony_ci return; 9559bf215546Sopenharmony_ci } 9560bf215546Sopenharmony_ci 9561bf215546Sopenharmony_ci unsigned src_queue_mask = 9562bf215546Sopenharmony_ci radv_image_queue_family_mask(image, src_qf, cmd_buffer->qf); 9563bf215546Sopenharmony_ci unsigned dst_queue_mask = 9564bf215546Sopenharmony_ci radv_image_queue_family_mask(image, dst_qf, cmd_buffer->qf); 9565bf215546Sopenharmony_ci 9566bf215546Sopenharmony_ci if (src_layout == dst_layout && src_render_loop == dst_render_loop && src_queue_mask == dst_queue_mask) 9567bf215546Sopenharmony_ci return; 9568bf215546Sopenharmony_ci 9569bf215546Sopenharmony_ci if (vk_format_has_depth(image->vk.format)) { 9570bf215546Sopenharmony_ci radv_handle_depth_image_transition(cmd_buffer, image, src_layout, src_render_loop, dst_layout, 9571bf215546Sopenharmony_ci dst_render_loop, src_queue_mask, dst_queue_mask, range, 9572bf215546Sopenharmony_ci sample_locs); 9573bf215546Sopenharmony_ci } else { 9574bf215546Sopenharmony_ci radv_handle_color_image_transition(cmd_buffer, image, src_layout, src_render_loop, dst_layout, 9575bf215546Sopenharmony_ci dst_render_loop, src_queue_mask, dst_queue_mask, range); 9576bf215546Sopenharmony_ci } 9577bf215546Sopenharmony_ci} 9578bf215546Sopenharmony_ci 9579bf215546Sopenharmony_cistatic void 9580bf215546Sopenharmony_ciradv_cp_dma_wait_for_stages(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 stage_mask) 9581bf215546Sopenharmony_ci{ 9582bf215546Sopenharmony_ci /* Make sure CP DMA is idle because the driver might have performed a DMA operation for copying a 9583bf215546Sopenharmony_ci * buffer (or a MSAA image using FMASK). Note that updating a buffer is considered a clear 9584bf215546Sopenharmony_ci * operation but it might also use a CP DMA copy in some rare situations. Other operations using 9585bf215546Sopenharmony_ci * a CP DMA clear are implicitly synchronized (see CP_DMA_SYNC). 9586bf215546Sopenharmony_ci */ 9587bf215546Sopenharmony_ci if (stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT | 9588bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | 9589bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) 9590bf215546Sopenharmony_ci si_cp_dma_wait_for_idle(cmd_buffer); 9591bf215546Sopenharmony_ci} 9592bf215546Sopenharmony_ci 9593bf215546Sopenharmony_cistatic void 9594bf215546Sopenharmony_ciradv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_info, 9595bf215546Sopenharmony_ci enum rgp_barrier_reason reason) 9596bf215546Sopenharmony_ci{ 9597bf215546Sopenharmony_ci enum radv_cmd_flush_bits src_flush_bits = 0; 9598bf215546Sopenharmony_ci enum radv_cmd_flush_bits dst_flush_bits = 0; 9599bf215546Sopenharmony_ci VkPipelineStageFlags2 src_stage_mask = 0; 9600bf215546Sopenharmony_ci VkPipelineStageFlags2 dst_stage_mask = 0; 9601bf215546Sopenharmony_ci 9602bf215546Sopenharmony_ci if (cmd_buffer->state.subpass) 9603bf215546Sopenharmony_ci radv_mark_noncoherent_rb(cmd_buffer); 9604bf215546Sopenharmony_ci 9605bf215546Sopenharmony_ci radv_describe_barrier_start(cmd_buffer, reason); 9606bf215546Sopenharmony_ci 9607bf215546Sopenharmony_ci for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) { 9608bf215546Sopenharmony_ci src_stage_mask |= dep_info->pMemoryBarriers[i].srcStageMask; 9609bf215546Sopenharmony_ci src_flush_bits |= 9610bf215546Sopenharmony_ci radv_src_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].srcAccessMask, NULL); 9611bf215546Sopenharmony_ci dst_stage_mask |= dep_info->pMemoryBarriers[i].dstStageMask; 9612bf215546Sopenharmony_ci dst_flush_bits |= 9613bf215546Sopenharmony_ci radv_dst_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].dstAccessMask, NULL); 9614bf215546Sopenharmony_ci } 9615bf215546Sopenharmony_ci 9616bf215546Sopenharmony_ci for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) { 9617bf215546Sopenharmony_ci src_stage_mask |= dep_info->pBufferMemoryBarriers[i].srcStageMask; 9618bf215546Sopenharmony_ci src_flush_bits |= 9619bf215546Sopenharmony_ci radv_src_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].srcAccessMask, NULL); 9620bf215546Sopenharmony_ci dst_stage_mask |= dep_info->pBufferMemoryBarriers[i].dstStageMask; 9621bf215546Sopenharmony_ci dst_flush_bits |= 9622bf215546Sopenharmony_ci radv_dst_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].dstAccessMask, NULL); 9623bf215546Sopenharmony_ci } 9624bf215546Sopenharmony_ci 9625bf215546Sopenharmony_ci for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) { 9626bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_image, image, dep_info->pImageMemoryBarriers[i].image); 9627bf215546Sopenharmony_ci 9628bf215546Sopenharmony_ci src_stage_mask |= dep_info->pImageMemoryBarriers[i].srcStageMask; 9629bf215546Sopenharmony_ci src_flush_bits |= 9630bf215546Sopenharmony_ci radv_src_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].srcAccessMask, image); 9631bf215546Sopenharmony_ci dst_stage_mask |= dep_info->pImageMemoryBarriers[i].dstStageMask; 9632bf215546Sopenharmony_ci dst_flush_bits |= 9633bf215546Sopenharmony_ci radv_dst_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].dstAccessMask, image); 9634bf215546Sopenharmony_ci } 9635bf215546Sopenharmony_ci 9636bf215546Sopenharmony_ci /* The Vulkan spec 1.1.98 says: 9637bf215546Sopenharmony_ci * 9638bf215546Sopenharmony_ci * "An execution dependency with only 9639bf215546Sopenharmony_ci * VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT in the destination stage mask 9640bf215546Sopenharmony_ci * will only prevent that stage from executing in subsequently 9641bf215546Sopenharmony_ci * submitted commands. As this stage does not perform any actual 9642bf215546Sopenharmony_ci * execution, this is not observable - in effect, it does not delay 9643bf215546Sopenharmony_ci * processing of subsequent commands. Similarly an execution dependency 9644bf215546Sopenharmony_ci * with only VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT in the source stage mask 9645bf215546Sopenharmony_ci * will effectively not wait for any prior commands to complete." 9646bf215546Sopenharmony_ci */ 9647bf215546Sopenharmony_ci if (dst_stage_mask != VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT) 9648bf215546Sopenharmony_ci radv_stage_flush(cmd_buffer, src_stage_mask); 9649bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= src_flush_bits; 9650bf215546Sopenharmony_ci 9651bf215546Sopenharmony_ci radv_ace_internal_barrier(cmd_buffer, src_stage_mask, 0); 9652bf215546Sopenharmony_ci 9653bf215546Sopenharmony_ci for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) { 9654bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_image, image, dep_info->pImageMemoryBarriers[i].image); 9655bf215546Sopenharmony_ci 9656bf215546Sopenharmony_ci const struct VkSampleLocationsInfoEXT *sample_locs_info = 9657bf215546Sopenharmony_ci vk_find_struct_const(dep_info->pImageMemoryBarriers[i].pNext, SAMPLE_LOCATIONS_INFO_EXT); 9658bf215546Sopenharmony_ci struct radv_sample_locations_state sample_locations; 9659bf215546Sopenharmony_ci 9660bf215546Sopenharmony_ci if (sample_locs_info) { 9661bf215546Sopenharmony_ci assert(image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT); 9662bf215546Sopenharmony_ci sample_locations.per_pixel = sample_locs_info->sampleLocationsPerPixel; 9663bf215546Sopenharmony_ci sample_locations.grid_size = sample_locs_info->sampleLocationGridSize; 9664bf215546Sopenharmony_ci sample_locations.count = sample_locs_info->sampleLocationsCount; 9665bf215546Sopenharmony_ci typed_memcpy(&sample_locations.locations[0], sample_locs_info->pSampleLocations, 9666bf215546Sopenharmony_ci sample_locs_info->sampleLocationsCount); 9667bf215546Sopenharmony_ci } 9668bf215546Sopenharmony_ci 9669bf215546Sopenharmony_ci radv_handle_image_transition( 9670bf215546Sopenharmony_ci cmd_buffer, image, dep_info->pImageMemoryBarriers[i].oldLayout, 9671bf215546Sopenharmony_ci false, /* Outside of a renderpass we are never in a renderloop */ 9672bf215546Sopenharmony_ci dep_info->pImageMemoryBarriers[i].newLayout, 9673bf215546Sopenharmony_ci false, /* Outside of a renderpass we are never in a renderloop */ 9674bf215546Sopenharmony_ci dep_info->pImageMemoryBarriers[i].srcQueueFamilyIndex, 9675bf215546Sopenharmony_ci dep_info->pImageMemoryBarriers[i].dstQueueFamilyIndex, 9676bf215546Sopenharmony_ci &dep_info->pImageMemoryBarriers[i].subresourceRange, sample_locs_info ? &sample_locations : NULL); 9677bf215546Sopenharmony_ci } 9678bf215546Sopenharmony_ci 9679bf215546Sopenharmony_ci radv_ace_internal_barrier(cmd_buffer, 0, dst_stage_mask); 9680bf215546Sopenharmony_ci radv_cp_dma_wait_for_stages(cmd_buffer, src_stage_mask); 9681bf215546Sopenharmony_ci 9682bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= dst_flush_bits; 9683bf215546Sopenharmony_ci 9684bf215546Sopenharmony_ci radv_describe_barrier_end(cmd_buffer); 9685bf215546Sopenharmony_ci} 9686bf215546Sopenharmony_ci 9687bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 9688bf215546Sopenharmony_ciradv_CmdPipelineBarrier2(VkCommandBuffer commandBuffer, 9689bf215546Sopenharmony_ci const VkDependencyInfo *pDependencyInfo) 9690bf215546Sopenharmony_ci{ 9691bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 9692bf215546Sopenharmony_ci 9693bf215546Sopenharmony_ci radv_barrier(cmd_buffer, pDependencyInfo, RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER); 9694bf215546Sopenharmony_ci} 9695bf215546Sopenharmony_ci 9696bf215546Sopenharmony_cistatic void 9697bf215546Sopenharmony_ciwrite_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event, 9698bf215546Sopenharmony_ci VkPipelineStageFlags2 stageMask, unsigned value) 9699bf215546Sopenharmony_ci{ 9700bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 9701bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(event->bo); 9702bf215546Sopenharmony_ci 9703bf215546Sopenharmony_ci si_emit_cache_flush(cmd_buffer); 9704bf215546Sopenharmony_ci 9705bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo); 9706bf215546Sopenharmony_ci 9707bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28); 9708bf215546Sopenharmony_ci 9709bf215546Sopenharmony_ci if (stageMask & (VK_PIPELINE_STAGE_2_COPY_BIT | 9710bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_RESOLVE_BIT | 9711bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_BLIT_BIT | 9712bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_CLEAR_BIT)) { 9713bf215546Sopenharmony_ci /* Be conservative for now. */ 9714bf215546Sopenharmony_ci stageMask |= VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT; 9715bf215546Sopenharmony_ci } 9716bf215546Sopenharmony_ci 9717bf215546Sopenharmony_ci /* Flags that only require a top-of-pipe event. */ 9718bf215546Sopenharmony_ci VkPipelineStageFlags2 top_of_pipe_flags = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT; 9719bf215546Sopenharmony_ci 9720bf215546Sopenharmony_ci /* Flags that only require a post-index-fetch event. */ 9721bf215546Sopenharmony_ci VkPipelineStageFlags2 post_index_fetch_flags = 9722bf215546Sopenharmony_ci top_of_pipe_flags | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT; 9723bf215546Sopenharmony_ci 9724bf215546Sopenharmony_ci /* Flags that only require signaling post PS. */ 9725bf215546Sopenharmony_ci VkPipelineStageFlags2 post_ps_flags = 9726bf215546Sopenharmony_ci post_index_fetch_flags | VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT | 9727bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT | 9728bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT | 9729bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_NV | 9730bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT | 9731bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT | 9732bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR | 9733bf215546Sopenharmony_ci VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT; 9734bf215546Sopenharmony_ci 9735bf215546Sopenharmony_ci /* Flags that only require signaling post CS. */ 9736bf215546Sopenharmony_ci VkPipelineStageFlags2 post_cs_flags = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT; 9737bf215546Sopenharmony_ci 9738bf215546Sopenharmony_ci radv_cp_dma_wait_for_stages(cmd_buffer, stageMask); 9739bf215546Sopenharmony_ci 9740bf215546Sopenharmony_ci if (!(stageMask & ~top_of_pipe_flags)) { 9741bf215546Sopenharmony_ci /* Just need to sync the PFP engine. */ 9742bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); 9743bf215546Sopenharmony_ci radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP)); 9744bf215546Sopenharmony_ci radeon_emit(cs, va); 9745bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 9746bf215546Sopenharmony_ci radeon_emit(cs, value); 9747bf215546Sopenharmony_ci } else if (!(stageMask & ~post_index_fetch_flags)) { 9748bf215546Sopenharmony_ci /* Sync ME because PFP reads index and indirect buffers. */ 9749bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); 9750bf215546Sopenharmony_ci radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME)); 9751bf215546Sopenharmony_ci radeon_emit(cs, va); 9752bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 9753bf215546Sopenharmony_ci radeon_emit(cs, value); 9754bf215546Sopenharmony_ci } else { 9755bf215546Sopenharmony_ci unsigned event_type; 9756bf215546Sopenharmony_ci 9757bf215546Sopenharmony_ci if (!(stageMask & ~post_ps_flags)) { 9758bf215546Sopenharmony_ci /* Sync previous fragment shaders. */ 9759bf215546Sopenharmony_ci event_type = V_028A90_PS_DONE; 9760bf215546Sopenharmony_ci } else if (!(stageMask & ~post_cs_flags)) { 9761bf215546Sopenharmony_ci /* Sync previous compute shaders. */ 9762bf215546Sopenharmony_ci event_type = V_028A90_CS_DONE; 9763bf215546Sopenharmony_ci } else { 9764bf215546Sopenharmony_ci /* Otherwise, sync all prior GPU work. */ 9765bf215546Sopenharmony_ci event_type = V_028A90_BOTTOM_OF_PIPE_TS; 9766bf215546Sopenharmony_ci } 9767bf215546Sopenharmony_ci 9768bf215546Sopenharmony_ci si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, 9769bf215546Sopenharmony_ci radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0, 9770bf215546Sopenharmony_ci EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, value, 9771bf215546Sopenharmony_ci cmd_buffer->gfx9_eop_bug_va); 9772bf215546Sopenharmony_ci } 9773bf215546Sopenharmony_ci 9774bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 9775bf215546Sopenharmony_ci} 9776bf215546Sopenharmony_ci 9777bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 9778bf215546Sopenharmony_ciradv_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, 9779bf215546Sopenharmony_ci const VkDependencyInfo* pDependencyInfo) 9780bf215546Sopenharmony_ci{ 9781bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 9782bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_event, event, _event); 9783bf215546Sopenharmony_ci VkPipelineStageFlags2 src_stage_mask = 0; 9784bf215546Sopenharmony_ci 9785bf215546Sopenharmony_ci for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++) 9786bf215546Sopenharmony_ci src_stage_mask |= pDependencyInfo->pMemoryBarriers[i].srcStageMask; 9787bf215546Sopenharmony_ci for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++) 9788bf215546Sopenharmony_ci src_stage_mask |= pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask; 9789bf215546Sopenharmony_ci for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++) 9790bf215546Sopenharmony_ci src_stage_mask |= pDependencyInfo->pImageMemoryBarriers[i].srcStageMask; 9791bf215546Sopenharmony_ci 9792bf215546Sopenharmony_ci write_event(cmd_buffer, event, src_stage_mask, 1); 9793bf215546Sopenharmony_ci} 9794bf215546Sopenharmony_ci 9795bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 9796bf215546Sopenharmony_ciradv_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event, 9797bf215546Sopenharmony_ci VkPipelineStageFlags2 stageMask) 9798bf215546Sopenharmony_ci{ 9799bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 9800bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_event, event, _event); 9801bf215546Sopenharmony_ci 9802bf215546Sopenharmony_ci write_event(cmd_buffer, event, stageMask, 0); 9803bf215546Sopenharmony_ci} 9804bf215546Sopenharmony_ci 9805bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 9806bf215546Sopenharmony_ciradv_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents, 9807bf215546Sopenharmony_ci const VkDependencyInfo* pDependencyInfos) 9808bf215546Sopenharmony_ci{ 9809bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 9810bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 9811bf215546Sopenharmony_ci 9812bf215546Sopenharmony_ci for (unsigned i = 0; i < eventCount; ++i) { 9813bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_event, event, pEvents[i]); 9814bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(event->bo); 9815bf215546Sopenharmony_ci 9816bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo); 9817bf215546Sopenharmony_ci 9818bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7); 9819bf215546Sopenharmony_ci 9820bf215546Sopenharmony_ci radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff); 9821bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 9822bf215546Sopenharmony_ci } 9823bf215546Sopenharmony_ci 9824bf215546Sopenharmony_ci radv_barrier(cmd_buffer, pDependencyInfos, RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS); 9825bf215546Sopenharmony_ci} 9826bf215546Sopenharmony_ci 9827bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 9828bf215546Sopenharmony_ciradv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask) 9829bf215546Sopenharmony_ci{ 9830bf215546Sopenharmony_ci /* No-op */ 9831bf215546Sopenharmony_ci} 9832bf215546Sopenharmony_ci 9833bf215546Sopenharmony_ci/* VK_EXT_conditional_rendering */ 9834bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 9835bf215546Sopenharmony_ciradv_CmdBeginConditionalRenderingEXT( 9836bf215546Sopenharmony_ci VkCommandBuffer commandBuffer, 9837bf215546Sopenharmony_ci const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin) 9838bf215546Sopenharmony_ci{ 9839bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 9840bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer); 9841bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 9842bf215546Sopenharmony_ci unsigned pred_op = PREDICATION_OP_BOOL32; 9843bf215546Sopenharmony_ci bool draw_visible = true; 9844bf215546Sopenharmony_ci uint64_t va; 9845bf215546Sopenharmony_ci 9846bf215546Sopenharmony_ci va = radv_buffer_get_va(buffer->bo) + buffer->offset + pConditionalRenderingBegin->offset; 9847bf215546Sopenharmony_ci 9848bf215546Sopenharmony_ci /* By default, if the 32-bit value at offset in buffer memory is zero, 9849bf215546Sopenharmony_ci * then the rendering commands are discarded, otherwise they are 9850bf215546Sopenharmony_ci * executed as normal. If the inverted flag is set, all commands are 9851bf215546Sopenharmony_ci * discarded if the value is non zero. 9852bf215546Sopenharmony_ci */ 9853bf215546Sopenharmony_ci if (pConditionalRenderingBegin->flags & VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) { 9854bf215546Sopenharmony_ci draw_visible = false; 9855bf215546Sopenharmony_ci } 9856bf215546Sopenharmony_ci 9857bf215546Sopenharmony_ci si_emit_cache_flush(cmd_buffer); 9858bf215546Sopenharmony_ci 9859bf215546Sopenharmony_ci if (cmd_buffer->qf == RADV_QUEUE_GENERAL && 9860bf215546Sopenharmony_ci !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) { 9861bf215546Sopenharmony_ci uint64_t pred_value = 0, pred_va; 9862bf215546Sopenharmony_ci unsigned pred_offset; 9863bf215546Sopenharmony_ci 9864bf215546Sopenharmony_ci /* From the Vulkan spec 1.1.107: 9865bf215546Sopenharmony_ci * 9866bf215546Sopenharmony_ci * "If the 32-bit value at offset in buffer memory is zero, 9867bf215546Sopenharmony_ci * then the rendering commands are discarded, otherwise they 9868bf215546Sopenharmony_ci * are executed as normal. If the value of the predicate in 9869bf215546Sopenharmony_ci * buffer memory changes while conditional rendering is 9870bf215546Sopenharmony_ci * active, the rendering commands may be discarded in an 9871bf215546Sopenharmony_ci * implementation-dependent way. Some implementations may 9872bf215546Sopenharmony_ci * latch the value of the predicate upon beginning conditional 9873bf215546Sopenharmony_ci * rendering while others may read it before every rendering 9874bf215546Sopenharmony_ci * command." 9875bf215546Sopenharmony_ci * 9876bf215546Sopenharmony_ci * But, the AMD hardware treats the predicate as a 64-bit 9877bf215546Sopenharmony_ci * value which means we need a workaround in the driver. 9878bf215546Sopenharmony_ci * Luckily, it's not required to support if the value changes 9879bf215546Sopenharmony_ci * when predication is active. 9880bf215546Sopenharmony_ci * 9881bf215546Sopenharmony_ci * The workaround is as follows: 9882bf215546Sopenharmony_ci * 1) allocate a 64-value in the upload BO and initialize it 9883bf215546Sopenharmony_ci * to 0 9884bf215546Sopenharmony_ci * 2) copy the 32-bit predicate value to the upload BO 9885bf215546Sopenharmony_ci * 3) use the new allocated VA address for predication 9886bf215546Sopenharmony_ci * 9887bf215546Sopenharmony_ci * Based on the conditionalrender demo, it's faster to do the 9888bf215546Sopenharmony_ci * COPY_DATA in ME (+ sync PFP) instead of PFP. 9889bf215546Sopenharmony_ci */ 9890bf215546Sopenharmony_ci radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset); 9891bf215546Sopenharmony_ci 9892bf215546Sopenharmony_ci pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset; 9893bf215546Sopenharmony_ci 9894bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 9895bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 9896bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 9897bf215546Sopenharmony_ci radeon_emit(cs, va); 9898bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 9899bf215546Sopenharmony_ci radeon_emit(cs, pred_va); 9900bf215546Sopenharmony_ci radeon_emit(cs, pred_va >> 32); 9901bf215546Sopenharmony_ci 9902bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0)); 9903bf215546Sopenharmony_ci radeon_emit(cs, 0); 9904bf215546Sopenharmony_ci 9905bf215546Sopenharmony_ci va = pred_va; 9906bf215546Sopenharmony_ci pred_op = PREDICATION_OP_BOOL64; 9907bf215546Sopenharmony_ci } 9908bf215546Sopenharmony_ci 9909bf215546Sopenharmony_ci /* MEC doesn't support predication, we emulate it elsewhere. */ 9910bf215546Sopenharmony_ci if (!radv_cmd_buffer_uses_mec(cmd_buffer)) { 9911bf215546Sopenharmony_ci si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va); 9912bf215546Sopenharmony_ci } 9913bf215546Sopenharmony_ci 9914bf215546Sopenharmony_ci /* Store conditional rendering user info. */ 9915bf215546Sopenharmony_ci cmd_buffer->state.predicating = true; 9916bf215546Sopenharmony_ci cmd_buffer->state.predication_type = draw_visible; 9917bf215546Sopenharmony_ci cmd_buffer->state.predication_op = pred_op; 9918bf215546Sopenharmony_ci cmd_buffer->state.predication_va = va; 9919bf215546Sopenharmony_ci cmd_buffer->mec_inv_pred_emitted = false; 9920bf215546Sopenharmony_ci} 9921bf215546Sopenharmony_ci 9922bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 9923bf215546Sopenharmony_ciradv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer) 9924bf215546Sopenharmony_ci{ 9925bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 9926bf215546Sopenharmony_ci 9927bf215546Sopenharmony_ci /* MEC doesn't support predication, no need to emit anything here. */ 9928bf215546Sopenharmony_ci if (!radv_cmd_buffer_uses_mec(cmd_buffer)) { 9929bf215546Sopenharmony_ci si_emit_set_predication_state(cmd_buffer, false, 0, 0); 9930bf215546Sopenharmony_ci } 9931bf215546Sopenharmony_ci 9932bf215546Sopenharmony_ci /* Reset conditional rendering user info. */ 9933bf215546Sopenharmony_ci cmd_buffer->state.predicating = false; 9934bf215546Sopenharmony_ci cmd_buffer->state.predication_type = -1; 9935bf215546Sopenharmony_ci cmd_buffer->state.predication_op = 0; 9936bf215546Sopenharmony_ci cmd_buffer->state.predication_va = 0; 9937bf215546Sopenharmony_ci cmd_buffer->mec_inv_pred_emitted = false; 9938bf215546Sopenharmony_ci} 9939bf215546Sopenharmony_ci 9940bf215546Sopenharmony_ci/* VK_EXT_transform_feedback */ 9941bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 9942bf215546Sopenharmony_ciradv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t firstBinding, 9943bf215546Sopenharmony_ci uint32_t bindingCount, const VkBuffer *pBuffers, 9944bf215546Sopenharmony_ci const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes) 9945bf215546Sopenharmony_ci{ 9946bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 9947bf215546Sopenharmony_ci struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; 9948bf215546Sopenharmony_ci uint8_t enabled_mask = 0; 9949bf215546Sopenharmony_ci 9950bf215546Sopenharmony_ci assert(firstBinding + bindingCount <= MAX_SO_BUFFERS); 9951bf215546Sopenharmony_ci for (uint32_t i = 0; i < bindingCount; i++) { 9952bf215546Sopenharmony_ci uint32_t idx = firstBinding + i; 9953bf215546Sopenharmony_ci 9954bf215546Sopenharmony_ci sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]); 9955bf215546Sopenharmony_ci sb[idx].offset = pOffsets[i]; 9956bf215546Sopenharmony_ci 9957bf215546Sopenharmony_ci if (!pSizes || pSizes[i] == VK_WHOLE_SIZE) { 9958bf215546Sopenharmony_ci sb[idx].size = sb[idx].buffer->vk.size - sb[idx].offset; 9959bf215546Sopenharmony_ci } else { 9960bf215546Sopenharmony_ci sb[idx].size = pSizes[i]; 9961bf215546Sopenharmony_ci } 9962bf215546Sopenharmony_ci 9963bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, sb[idx].buffer->bo); 9964bf215546Sopenharmony_ci 9965bf215546Sopenharmony_ci enabled_mask |= 1 << idx; 9966bf215546Sopenharmony_ci } 9967bf215546Sopenharmony_ci 9968bf215546Sopenharmony_ci cmd_buffer->state.streamout.enabled_mask |= enabled_mask; 9969bf215546Sopenharmony_ci 9970bf215546Sopenharmony_ci cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER; 9971bf215546Sopenharmony_ci} 9972bf215546Sopenharmony_ci 9973bf215546Sopenharmony_cibool 9974bf215546Sopenharmony_ciradv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer) 9975bf215546Sopenharmony_ci{ 9976bf215546Sopenharmony_ci struct radv_streamout_state *so = &cmd_buffer->state.streamout; 9977bf215546Sopenharmony_ci 9978bf215546Sopenharmony_ci /* Streamout must be enabled for the PRIMITIVES_GENERATED query to work. */ 9979bf215546Sopenharmony_ci return (so->streamout_enabled || cmd_buffer->state.prims_gen_query_enabled) && 9980bf215546Sopenharmony_ci !cmd_buffer->state.suspend_streamout; 9981bf215546Sopenharmony_ci} 9982bf215546Sopenharmony_ci 9983bf215546Sopenharmony_civoid 9984bf215546Sopenharmony_ciradv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer) 9985bf215546Sopenharmony_ci{ 9986bf215546Sopenharmony_ci struct radv_streamout_state *so = &cmd_buffer->state.streamout; 9987bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 9988bf215546Sopenharmony_ci bool streamout_enabled = radv_is_streamout_enabled(cmd_buffer); 9989bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 9990bf215546Sopenharmony_ci uint32_t enabled_stream_buffers_mask = 0; 9991bf215546Sopenharmony_ci 9992bf215546Sopenharmony_ci if (pipeline && pipeline->streamout_shader) { 9993bf215546Sopenharmony_ci enabled_stream_buffers_mask = pipeline->streamout_shader->info.so.enabled_stream_buffers_mask; 9994bf215546Sopenharmony_ci } 9995bf215546Sopenharmony_ci 9996bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2); 9997bf215546Sopenharmony_ci radeon_emit(cs, S_028B94_STREAMOUT_0_EN(streamout_enabled) | S_028B94_RAST_STREAM(0) | 9998bf215546Sopenharmony_ci S_028B94_STREAMOUT_1_EN(streamout_enabled) | 9999bf215546Sopenharmony_ci S_028B94_STREAMOUT_2_EN(streamout_enabled) | 10000bf215546Sopenharmony_ci S_028B94_STREAMOUT_3_EN(streamout_enabled)); 10001bf215546Sopenharmony_ci radeon_emit(cs, so->hw_enabled_mask & enabled_stream_buffers_mask); 10002bf215546Sopenharmony_ci 10003bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 10004bf215546Sopenharmony_ci} 10005bf215546Sopenharmony_ci 10006bf215546Sopenharmony_cistatic void 10007bf215546Sopenharmony_ciradv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable) 10008bf215546Sopenharmony_ci{ 10009bf215546Sopenharmony_ci struct radv_streamout_state *so = &cmd_buffer->state.streamout; 10010bf215546Sopenharmony_ci bool old_streamout_enabled = radv_is_streamout_enabled(cmd_buffer); 10011bf215546Sopenharmony_ci uint32_t old_hw_enabled_mask = so->hw_enabled_mask; 10012bf215546Sopenharmony_ci 10013bf215546Sopenharmony_ci so->streamout_enabled = enable; 10014bf215546Sopenharmony_ci 10015bf215546Sopenharmony_ci so->hw_enabled_mask = so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) | 10016bf215546Sopenharmony_ci (so->enabled_mask << 12); 10017bf215546Sopenharmony_ci 10018bf215546Sopenharmony_ci if (!cmd_buffer->device->physical_device->use_ngg_streamout && 10019bf215546Sopenharmony_ci ((old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer)) || 10020bf215546Sopenharmony_ci (old_hw_enabled_mask != so->hw_enabled_mask))) 10021bf215546Sopenharmony_ci radv_emit_streamout_enable(cmd_buffer); 10022bf215546Sopenharmony_ci 10023bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->use_ngg_streamout) { 10024bf215546Sopenharmony_ci cmd_buffer->gds_needed = true; 10025bf215546Sopenharmony_ci cmd_buffer->gds_oa_needed = true; 10026bf215546Sopenharmony_ci } 10027bf215546Sopenharmony_ci} 10028bf215546Sopenharmony_ci 10029bf215546Sopenharmony_cistatic void 10030bf215546Sopenharmony_ciradv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer) 10031bf215546Sopenharmony_ci{ 10032bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 10033bf215546Sopenharmony_ci unsigned reg_strmout_cntl; 10034bf215546Sopenharmony_ci 10035bf215546Sopenharmony_ci /* The register is at different places on different ASICs. */ 10036bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) { 10037bf215546Sopenharmony_ci reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; 10038bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0)); 10039bf215546Sopenharmony_ci radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_ENGINE_SEL(V_370_ME)); 10040bf215546Sopenharmony_ci radeon_emit(cs, R_0300FC_CP_STRMOUT_CNTL >> 2); 10041bf215546Sopenharmony_ci radeon_emit(cs, 0); 10042bf215546Sopenharmony_ci radeon_emit(cs, 0); 10043bf215546Sopenharmony_ci } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) { 10044bf215546Sopenharmony_ci reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL; 10045bf215546Sopenharmony_ci radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0); 10046bf215546Sopenharmony_ci } else { 10047bf215546Sopenharmony_ci reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL; 10048bf215546Sopenharmony_ci radeon_set_config_reg(cs, reg_strmout_cntl, 0); 10049bf215546Sopenharmony_ci } 10050bf215546Sopenharmony_ci 10051bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0)); 10052bf215546Sopenharmony_ci radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0)); 10053bf215546Sopenharmony_ci 10054bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 10055bf215546Sopenharmony_ci radeon_emit(cs, 10056bf215546Sopenharmony_ci WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */ 10057bf215546Sopenharmony_ci radeon_emit(cs, reg_strmout_cntl >> 2); /* register */ 10058bf215546Sopenharmony_ci radeon_emit(cs, 0); 10059bf215546Sopenharmony_ci radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */ 10060bf215546Sopenharmony_ci radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */ 10061bf215546Sopenharmony_ci radeon_emit(cs, 4); /* poll interval */ 10062bf215546Sopenharmony_ci} 10063bf215546Sopenharmony_ci 10064bf215546Sopenharmony_cistatic void 10065bf215546Sopenharmony_ciradv_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer, 10066bf215546Sopenharmony_ci uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, 10067bf215546Sopenharmony_ci const VkDeviceSize *pCounterBufferOffsets) 10068bf215546Sopenharmony_ci 10069bf215546Sopenharmony_ci{ 10070bf215546Sopenharmony_ci struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings; 10071bf215546Sopenharmony_ci struct radv_streamout_state *so = &cmd_buffer->state.streamout; 10072bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline; 10073bf215546Sopenharmony_ci struct radv_shader_info *info = &pipeline->streamout_shader->info; 10074bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 10075bf215546Sopenharmony_ci 10076bf215546Sopenharmony_ci radv_flush_vgt_streamout(cmd_buffer); 10077bf215546Sopenharmony_ci 10078bf215546Sopenharmony_ci assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); 10079bf215546Sopenharmony_ci u_foreach_bit(i, so->enabled_mask) 10080bf215546Sopenharmony_ci { 10081bf215546Sopenharmony_ci int32_t counter_buffer_idx = i - firstCounterBuffer; 10082bf215546Sopenharmony_ci if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) 10083bf215546Sopenharmony_ci counter_buffer_idx = -1; 10084bf215546Sopenharmony_ci 10085bf215546Sopenharmony_ci /* AMD GCN binds streamout buffers as shader resources. 10086bf215546Sopenharmony_ci * VGT only counts primitives and tells the shader through 10087bf215546Sopenharmony_ci * SGPRs what to do. 10088bf215546Sopenharmony_ci */ 10089bf215546Sopenharmony_ci radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 2); 10090bf215546Sopenharmony_ci radeon_emit(cs, sb[i].size >> 2); /* BUFFER_SIZE (in DW) */ 10091bf215546Sopenharmony_ci radeon_emit(cs, info->so.strides[i]); /* VTX_STRIDE (in DW) */ 10092bf215546Sopenharmony_ci 10093bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 10094bf215546Sopenharmony_ci 10095bf215546Sopenharmony_ci if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { 10096bf215546Sopenharmony_ci /* The array of counter buffers is optional. */ 10097bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); 10098bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(buffer->bo); 10099bf215546Sopenharmony_ci uint64_t counter_buffer_offset = 0; 10100bf215546Sopenharmony_ci 10101bf215546Sopenharmony_ci if (pCounterBufferOffsets) 10102bf215546Sopenharmony_ci counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; 10103bf215546Sopenharmony_ci 10104bf215546Sopenharmony_ci va += buffer->offset + counter_buffer_offset; 10105bf215546Sopenharmony_ci 10106bf215546Sopenharmony_ci /* Append */ 10107bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); 10108bf215546Sopenharmony_ci radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ 10109bf215546Sopenharmony_ci STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */ 10110bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 10111bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 10112bf215546Sopenharmony_ci radeon_emit(cs, va); /* src address lo */ 10113bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); /* src address hi */ 10114bf215546Sopenharmony_ci 10115bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); 10116bf215546Sopenharmony_ci } else { 10117bf215546Sopenharmony_ci /* Start from the beginning. */ 10118bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); 10119bf215546Sopenharmony_ci radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ 10120bf215546Sopenharmony_ci STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */ 10121bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 10122bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 10123bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 10124bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 10125bf215546Sopenharmony_ci } 10126bf215546Sopenharmony_ci } 10127bf215546Sopenharmony_ci 10128bf215546Sopenharmony_ci radv_set_streamout_enable(cmd_buffer, true); 10129bf215546Sopenharmony_ci} 10130bf215546Sopenharmony_ci 10131bf215546Sopenharmony_cistatic void 10132bf215546Sopenharmony_cigfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer, 10133bf215546Sopenharmony_ci uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, 10134bf215546Sopenharmony_ci const VkDeviceSize *pCounterBufferOffsets) 10135bf215546Sopenharmony_ci{ 10136bf215546Sopenharmony_ci struct radv_streamout_state *so = &cmd_buffer->state.streamout; 10137bf215546Sopenharmony_ci unsigned last_target = util_last_bit(so->enabled_mask) - 1; 10138bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 10139bf215546Sopenharmony_ci 10140bf215546Sopenharmony_ci assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10); 10141bf215546Sopenharmony_ci assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); 10142bf215546Sopenharmony_ci 10143bf215546Sopenharmony_ci /* Sync because the next streamout operation will overwrite GDS and we 10144bf215546Sopenharmony_ci * have to make sure it's idle. 10145bf215546Sopenharmony_ci * TODO: Improve by tracking if there is a streamout operation in 10146bf215546Sopenharmony_ci * flight. 10147bf215546Sopenharmony_ci */ 10148bf215546Sopenharmony_ci cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH; 10149bf215546Sopenharmony_ci si_emit_cache_flush(cmd_buffer); 10150bf215546Sopenharmony_ci 10151bf215546Sopenharmony_ci u_foreach_bit(i, so->enabled_mask) 10152bf215546Sopenharmony_ci { 10153bf215546Sopenharmony_ci int32_t counter_buffer_idx = i - firstCounterBuffer; 10154bf215546Sopenharmony_ci if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) 10155bf215546Sopenharmony_ci counter_buffer_idx = -1; 10156bf215546Sopenharmony_ci 10157bf215546Sopenharmony_ci bool append = 10158bf215546Sopenharmony_ci counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]; 10159bf215546Sopenharmony_ci uint64_t va = 0; 10160bf215546Sopenharmony_ci 10161bf215546Sopenharmony_ci if (append) { 10162bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); 10163bf215546Sopenharmony_ci uint64_t counter_buffer_offset = 0; 10164bf215546Sopenharmony_ci 10165bf215546Sopenharmony_ci if (pCounterBufferOffsets) 10166bf215546Sopenharmony_ci counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; 10167bf215546Sopenharmony_ci 10168bf215546Sopenharmony_ci va += radv_buffer_get_va(buffer->bo); 10169bf215546Sopenharmony_ci va += buffer->offset + counter_buffer_offset; 10170bf215546Sopenharmony_ci 10171bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); 10172bf215546Sopenharmony_ci } 10173bf215546Sopenharmony_ci 10174bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0)); 10175bf215546Sopenharmony_ci radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) | 10176bf215546Sopenharmony_ci S_411_DST_SEL(V_411_GDS) | S_411_CP_SYNC(i == last_target)); 10177bf215546Sopenharmony_ci radeon_emit(cs, va); 10178bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 10179bf215546Sopenharmony_ci radeon_emit(cs, 4 * i); /* destination in GDS */ 10180bf215546Sopenharmony_ci radeon_emit(cs, 0); 10181bf215546Sopenharmony_ci radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target)); 10182bf215546Sopenharmony_ci } 10183bf215546Sopenharmony_ci 10184bf215546Sopenharmony_ci radv_set_streamout_enable(cmd_buffer, true); 10185bf215546Sopenharmony_ci} 10186bf215546Sopenharmony_ci 10187bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 10188bf215546Sopenharmony_ciradv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, 10189bf215546Sopenharmony_ci uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, 10190bf215546Sopenharmony_ci const VkDeviceSize *pCounterBufferOffsets) 10191bf215546Sopenharmony_ci{ 10192bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 10193bf215546Sopenharmony_ci 10194bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->use_ngg_streamout) { 10195bf215546Sopenharmony_ci gfx10_emit_streamout_begin(cmd_buffer, firstCounterBuffer, counterBufferCount, 10196bf215546Sopenharmony_ci pCounterBuffers, pCounterBufferOffsets); 10197bf215546Sopenharmony_ci } else { 10198bf215546Sopenharmony_ci radv_emit_streamout_begin(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers, 10199bf215546Sopenharmony_ci pCounterBufferOffsets); 10200bf215546Sopenharmony_ci } 10201bf215546Sopenharmony_ci} 10202bf215546Sopenharmony_ci 10203bf215546Sopenharmony_cistatic void 10204bf215546Sopenharmony_ciradv_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer, 10205bf215546Sopenharmony_ci uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, 10206bf215546Sopenharmony_ci const VkDeviceSize *pCounterBufferOffsets) 10207bf215546Sopenharmony_ci{ 10208bf215546Sopenharmony_ci struct radv_streamout_state *so = &cmd_buffer->state.streamout; 10209bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 10210bf215546Sopenharmony_ci 10211bf215546Sopenharmony_ci radv_flush_vgt_streamout(cmd_buffer); 10212bf215546Sopenharmony_ci 10213bf215546Sopenharmony_ci assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); 10214bf215546Sopenharmony_ci u_foreach_bit(i, so->enabled_mask) 10215bf215546Sopenharmony_ci { 10216bf215546Sopenharmony_ci int32_t counter_buffer_idx = i - firstCounterBuffer; 10217bf215546Sopenharmony_ci if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) 10218bf215546Sopenharmony_ci counter_buffer_idx = -1; 10219bf215546Sopenharmony_ci 10220bf215546Sopenharmony_ci if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { 10221bf215546Sopenharmony_ci /* The array of counters buffer is optional. */ 10222bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); 10223bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(buffer->bo); 10224bf215546Sopenharmony_ci uint64_t counter_buffer_offset = 0; 10225bf215546Sopenharmony_ci 10226bf215546Sopenharmony_ci if (pCounterBufferOffsets) 10227bf215546Sopenharmony_ci counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; 10228bf215546Sopenharmony_ci 10229bf215546Sopenharmony_ci va += buffer->offset + counter_buffer_offset; 10230bf215546Sopenharmony_ci 10231bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0)); 10232bf215546Sopenharmony_ci radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */ 10233bf215546Sopenharmony_ci STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) | 10234bf215546Sopenharmony_ci STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */ 10235bf215546Sopenharmony_ci radeon_emit(cs, va); /* dst address lo */ 10236bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); /* dst address hi */ 10237bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 10238bf215546Sopenharmony_ci radeon_emit(cs, 0); /* unused */ 10239bf215546Sopenharmony_ci 10240bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); 10241bf215546Sopenharmony_ci } 10242bf215546Sopenharmony_ci 10243bf215546Sopenharmony_ci /* Deactivate transform feedback by zeroing the buffer size. 10244bf215546Sopenharmony_ci * The counters (primitives generated, primitives emitted) may 10245bf215546Sopenharmony_ci * be enabled even if there is not buffer bound. This ensures 10246bf215546Sopenharmony_ci * that the primitives-emitted query won't increment. 10247bf215546Sopenharmony_ci */ 10248bf215546Sopenharmony_ci radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0); 10249bf215546Sopenharmony_ci 10250bf215546Sopenharmony_ci cmd_buffer->state.context_roll_without_scissor_emitted = true; 10251bf215546Sopenharmony_ci } 10252bf215546Sopenharmony_ci 10253bf215546Sopenharmony_ci radv_set_streamout_enable(cmd_buffer, false); 10254bf215546Sopenharmony_ci} 10255bf215546Sopenharmony_ci 10256bf215546Sopenharmony_cistatic void 10257bf215546Sopenharmony_cigfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer, 10258bf215546Sopenharmony_ci uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, 10259bf215546Sopenharmony_ci const VkDeviceSize *pCounterBufferOffsets) 10260bf215546Sopenharmony_ci{ 10261bf215546Sopenharmony_ci struct radv_streamout_state *so = &cmd_buffer->state.streamout; 10262bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 10263bf215546Sopenharmony_ci 10264bf215546Sopenharmony_ci assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10); 10265bf215546Sopenharmony_ci assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS); 10266bf215546Sopenharmony_ci 10267bf215546Sopenharmony_ci u_foreach_bit(i, so->enabled_mask) 10268bf215546Sopenharmony_ci { 10269bf215546Sopenharmony_ci int32_t counter_buffer_idx = i - firstCounterBuffer; 10270bf215546Sopenharmony_ci if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount) 10271bf215546Sopenharmony_ci counter_buffer_idx = -1; 10272bf215546Sopenharmony_ci 10273bf215546Sopenharmony_ci if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) { 10274bf215546Sopenharmony_ci /* The array of counters buffer is optional. */ 10275bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]); 10276bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(buffer->bo); 10277bf215546Sopenharmony_ci uint64_t counter_buffer_offset = 0; 10278bf215546Sopenharmony_ci 10279bf215546Sopenharmony_ci if (pCounterBufferOffsets) 10280bf215546Sopenharmony_ci counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx]; 10281bf215546Sopenharmony_ci 10282bf215546Sopenharmony_ci va += buffer->offset + counter_buffer_offset; 10283bf215546Sopenharmony_ci 10284bf215546Sopenharmony_ci si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, 10285bf215546Sopenharmony_ci radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0, 10286bf215546Sopenharmony_ci EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0); 10287bf215546Sopenharmony_ci 10288bf215546Sopenharmony_ci radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo); 10289bf215546Sopenharmony_ci } 10290bf215546Sopenharmony_ci } 10291bf215546Sopenharmony_ci 10292bf215546Sopenharmony_ci radv_set_streamout_enable(cmd_buffer, false); 10293bf215546Sopenharmony_ci} 10294bf215546Sopenharmony_ci 10295bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 10296bf215546Sopenharmony_ciradv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer, 10297bf215546Sopenharmony_ci uint32_t counterBufferCount, const VkBuffer *pCounterBuffers, 10298bf215546Sopenharmony_ci const VkDeviceSize *pCounterBufferOffsets) 10299bf215546Sopenharmony_ci{ 10300bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 10301bf215546Sopenharmony_ci 10302bf215546Sopenharmony_ci if (cmd_buffer->device->physical_device->use_ngg_streamout) { 10303bf215546Sopenharmony_ci gfx10_emit_streamout_end(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers, 10304bf215546Sopenharmony_ci pCounterBufferOffsets); 10305bf215546Sopenharmony_ci } else { 10306bf215546Sopenharmony_ci radv_emit_streamout_end(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers, 10307bf215546Sopenharmony_ci pCounterBufferOffsets); 10308bf215546Sopenharmony_ci } 10309bf215546Sopenharmony_ci} 10310bf215546Sopenharmony_ci 10311bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 10312bf215546Sopenharmony_ciradv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount, 10313bf215546Sopenharmony_ci uint32_t firstInstance, VkBuffer _counterBuffer, 10314bf215546Sopenharmony_ci VkDeviceSize counterBufferOffset, uint32_t counterOffset, 10315bf215546Sopenharmony_ci uint32_t vertexStride) 10316bf215546Sopenharmony_ci{ 10317bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 10318bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer); 10319bf215546Sopenharmony_ci struct radv_draw_info info; 10320bf215546Sopenharmony_ci 10321bf215546Sopenharmony_ci info.count = 0; 10322bf215546Sopenharmony_ci info.instance_count = instanceCount; 10323bf215546Sopenharmony_ci info.first_instance = firstInstance; 10324bf215546Sopenharmony_ci info.strmout_buffer = counterBuffer; 10325bf215546Sopenharmony_ci info.strmout_buffer_offset = counterBufferOffset; 10326bf215546Sopenharmony_ci info.stride = vertexStride; 10327bf215546Sopenharmony_ci info.indexed = false; 10328bf215546Sopenharmony_ci info.indirect = NULL; 10329bf215546Sopenharmony_ci 10330bf215546Sopenharmony_ci if (!radv_before_draw(cmd_buffer, &info, 1)) 10331bf215546Sopenharmony_ci return; 10332bf215546Sopenharmony_ci struct VkMultiDrawInfoEXT minfo = { 0, 0 }; 10333bf215546Sopenharmony_ci radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, S_0287F0_USE_OPAQUE(1), 0); 10334bf215546Sopenharmony_ci radv_after_draw(cmd_buffer); 10335bf215546Sopenharmony_ci} 10336bf215546Sopenharmony_ci 10337bf215546Sopenharmony_ci/* VK_AMD_buffer_marker */ 10338bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 10339bf215546Sopenharmony_ciradv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage, 10340bf215546Sopenharmony_ci VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker) 10341bf215546Sopenharmony_ci{ 10342bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer); 10343bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer); 10344bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = cmd_buffer->cs; 10345bf215546Sopenharmony_ci uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + dstOffset; 10346bf215546Sopenharmony_ci 10347bf215546Sopenharmony_ci si_emit_cache_flush(cmd_buffer); 10348bf215546Sopenharmony_ci 10349bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12); 10350bf215546Sopenharmony_ci 10351bf215546Sopenharmony_ci if (!(stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) { 10352bf215546Sopenharmony_ci radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0)); 10353bf215546Sopenharmony_ci radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) | 10354bf215546Sopenharmony_ci COPY_DATA_WR_CONFIRM); 10355bf215546Sopenharmony_ci radeon_emit(cs, marker); 10356bf215546Sopenharmony_ci radeon_emit(cs, 0); 10357bf215546Sopenharmony_ci radeon_emit(cs, va); 10358bf215546Sopenharmony_ci radeon_emit(cs, va >> 32); 10359bf215546Sopenharmony_ci } else { 10360bf215546Sopenharmony_ci si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level, 10361bf215546Sopenharmony_ci radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 10362bf215546Sopenharmony_ci 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker, 10363bf215546Sopenharmony_ci cmd_buffer->gfx9_eop_bug_va); 10364bf215546Sopenharmony_ci } 10365bf215546Sopenharmony_ci 10366bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 10367bf215546Sopenharmony_ci} 10368bf215546Sopenharmony_ci 10369bf215546Sopenharmony_civoid 10370bf215546Sopenharmony_ciradv_CmdBindPipelineShaderGroupNV(VkCommandBuffer commandBuffer, 10371bf215546Sopenharmony_ci VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline, 10372bf215546Sopenharmony_ci uint32_t groupIndex) 10373bf215546Sopenharmony_ci{ 10374bf215546Sopenharmony_ci fprintf(stderr, "radv: unimplemented vkCmdBindPipelineShaderGroupNV\n"); 10375bf215546Sopenharmony_ci abort(); 10376bf215546Sopenharmony_ci}