1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2016 Red Hat. 3bf215546Sopenharmony_ci * Copyright © 2016 Bas Nieuwenhuizen 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * based in part on anv driver which is: 6bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 7bf215546Sopenharmony_ci * 8bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 9bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 10bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 11bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 13bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 16bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 17bf215546Sopenharmony_ci * Software. 18bf215546Sopenharmony_ci * 19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25bf215546Sopenharmony_ci * IN THE SOFTWARE. 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#include "nir/nir.h" 29bf215546Sopenharmony_ci#include "nir/nir_builder.h" 30bf215546Sopenharmony_ci#include "spirv/nir_spirv.h" 31bf215546Sopenharmony_ci#include "util/disk_cache.h" 32bf215546Sopenharmony_ci#include "util/mesa-sha1.h" 33bf215546Sopenharmony_ci#include "util/os_time.h" 34bf215546Sopenharmony_ci#include "util/u_atomic.h" 35bf215546Sopenharmony_ci#include "radv_cs.h" 36bf215546Sopenharmony_ci#include "radv_debug.h" 37bf215546Sopenharmony_ci#include "radv_meta.h" 38bf215546Sopenharmony_ci#include "radv_private.h" 39bf215546Sopenharmony_ci#include "radv_shader.h" 40bf215546Sopenharmony_ci#include "radv_shader_args.h" 41bf215546Sopenharmony_ci#include "vk_pipeline.h" 42bf215546Sopenharmony_ci#include "vk_util.h" 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci#include "util/debug.h" 45bf215546Sopenharmony_ci#include "ac_binary.h" 46bf215546Sopenharmony_ci#include "ac_nir.h" 47bf215546Sopenharmony_ci#include "ac_shader_util.h" 48bf215546Sopenharmony_ci#include "aco_interface.h" 49bf215546Sopenharmony_ci#include "sid.h" 50bf215546Sopenharmony_ci#include "vk_format.h" 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_cistruct radv_blend_state { 53bf215546Sopenharmony_ci uint32_t blend_enable_4bit; 54bf215546Sopenharmony_ci uint32_t need_src_alpha; 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci uint32_t cb_target_mask; 57bf215546Sopenharmony_ci uint32_t cb_target_enabled_4bit; 58bf215546Sopenharmony_ci uint32_t sx_mrt_blend_opt[8]; 59bf215546Sopenharmony_ci uint32_t cb_blend_control[8]; 60bf215546Sopenharmony_ci 61bf215546Sopenharmony_ci uint32_t spi_shader_col_format; 62bf215546Sopenharmony_ci uint32_t col_format_is_int8; 63bf215546Sopenharmony_ci uint32_t col_format_is_int10; 64bf215546Sopenharmony_ci uint32_t col_format_is_float32; 65bf215546Sopenharmony_ci uint32_t cb_shader_mask; 66bf215546Sopenharmony_ci uint32_t db_alpha_to_mask; 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_ci uint32_t commutative_4bit; 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_ci bool mrt0_is_dual_src; 71bf215546Sopenharmony_ci}; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_cistruct radv_depth_stencil_state { 74bf215546Sopenharmony_ci uint32_t db_render_control; 75bf215546Sopenharmony_ci uint32_t db_render_override; 76bf215546Sopenharmony_ci uint32_t db_render_override2; 77bf215546Sopenharmony_ci}; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_cistruct radv_dsa_order_invariance { 80bf215546Sopenharmony_ci /* Whether the final result in Z/S buffers is guaranteed to be 81bf215546Sopenharmony_ci * invariant under changes to the order in which fragments arrive. 82bf215546Sopenharmony_ci */ 83bf215546Sopenharmony_ci bool zs; 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci /* Whether the set of fragments that pass the combined Z/S test is 86bf215546Sopenharmony_ci * guaranteed to be invariant under changes to the order in which 87bf215546Sopenharmony_ci * fragments arrive. 88bf215546Sopenharmony_ci */ 89bf215546Sopenharmony_ci bool pass_set; 90bf215546Sopenharmony_ci}; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_cistatic bool 93bf215546Sopenharmony_ciradv_is_raster_enabled(const struct radv_graphics_pipeline *pipeline, 94bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 95bf215546Sopenharmony_ci{ 96bf215546Sopenharmony_ci return !pCreateInfo->pRasterizationState->rasterizerDiscardEnable || 97bf215546Sopenharmony_ci (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE); 98bf215546Sopenharmony_ci} 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_cistatic bool 101bf215546Sopenharmony_ciradv_is_static_vrs_enabled(const struct radv_graphics_pipeline *pipeline, 102bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 103bf215546Sopenharmony_ci{ 104bf215546Sopenharmony_ci return info->fsr.size.width != 1 || info->fsr.size.height != 1 || 105bf215546Sopenharmony_ci info->fsr.combiner_ops[0] != VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR || 106bf215546Sopenharmony_ci info->fsr.combiner_ops[1] != VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; 107bf215546Sopenharmony_ci} 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_cistatic bool 110bf215546Sopenharmony_ciradv_is_vrs_enabled(const struct radv_graphics_pipeline *pipeline, 111bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 112bf215546Sopenharmony_ci{ 113bf215546Sopenharmony_ci return radv_is_static_vrs_enabled(pipeline, info) || 114bf215546Sopenharmony_ci (pipeline->dynamic_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE); 115bf215546Sopenharmony_ci} 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_cistatic bool 118bf215546Sopenharmony_ciradv_pipeline_has_ds_attachments(const struct radv_rendering_info *ri_info) 119bf215546Sopenharmony_ci{ 120bf215546Sopenharmony_ci return ri_info->depth_att_format != VK_FORMAT_UNDEFINED || 121bf215546Sopenharmony_ci ri_info->stencil_att_format != VK_FORMAT_UNDEFINED; 122bf215546Sopenharmony_ci} 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_cistatic bool 125bf215546Sopenharmony_ciradv_pipeline_has_color_attachments(const struct radv_rendering_info *ri_info) 126bf215546Sopenharmony_ci{ 127bf215546Sopenharmony_ci for (uint32_t i = 0; i < ri_info->color_att_count; ++i) { 128bf215546Sopenharmony_ci if (ri_info->color_att_formats[i] != VK_FORMAT_UNDEFINED) 129bf215546Sopenharmony_ci return true; 130bf215546Sopenharmony_ci } 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci return false; 133bf215546Sopenharmony_ci} 134bf215546Sopenharmony_ci 135bf215546Sopenharmony_cistatic bool 136bf215546Sopenharmony_ciradv_pipeline_has_ngg(const struct radv_graphics_pipeline *pipeline) 137bf215546Sopenharmony_ci{ 138bf215546Sopenharmony_ci struct radv_shader *shader = pipeline->base.shaders[pipeline->last_vgt_api_stage]; 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci return shader->info.is_ngg; 141bf215546Sopenharmony_ci} 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_cibool 144bf215546Sopenharmony_ciradv_pipeline_has_ngg_passthrough(const struct radv_graphics_pipeline *pipeline) 145bf215546Sopenharmony_ci{ 146bf215546Sopenharmony_ci assert(radv_pipeline_has_ngg(pipeline)); 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci struct radv_shader *shader = pipeline->base.shaders[pipeline->last_vgt_api_stage]; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci return shader->info.is_ngg_passthrough; 151bf215546Sopenharmony_ci} 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_cibool 154bf215546Sopenharmony_ciradv_pipeline_has_gs_copy_shader(const struct radv_pipeline *pipeline) 155bf215546Sopenharmony_ci{ 156bf215546Sopenharmony_ci return !!pipeline->gs_copy_shader; 157bf215546Sopenharmony_ci} 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_cistatic struct radv_pipeline_slab * 160bf215546Sopenharmony_ciradv_pipeline_slab_create(struct radv_device *device, struct radv_pipeline *pipeline, 161bf215546Sopenharmony_ci uint32_t code_size) 162bf215546Sopenharmony_ci{ 163bf215546Sopenharmony_ci struct radv_pipeline_slab *slab; 164bf215546Sopenharmony_ci 165bf215546Sopenharmony_ci slab = calloc(1, sizeof(*slab)); 166bf215546Sopenharmony_ci if (!slab) 167bf215546Sopenharmony_ci return NULL; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci slab->ref_count = 1; 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci slab->alloc = radv_alloc_shader_memory(device, code_size, pipeline); 172bf215546Sopenharmony_ci if (!slab->alloc) { 173bf215546Sopenharmony_ci free(slab); 174bf215546Sopenharmony_ci return NULL; 175bf215546Sopenharmony_ci } 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci return slab; 178bf215546Sopenharmony_ci} 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_civoid 181bf215546Sopenharmony_ciradv_pipeline_slab_destroy(struct radv_device *device, struct radv_pipeline_slab *slab) 182bf215546Sopenharmony_ci{ 183bf215546Sopenharmony_ci if (!p_atomic_dec_zero(&slab->ref_count)) 184bf215546Sopenharmony_ci return; 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci radv_free_shader_memory(device, slab->alloc); 187bf215546Sopenharmony_ci free(slab); 188bf215546Sopenharmony_ci} 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_civoid 191bf215546Sopenharmony_ciradv_pipeline_destroy(struct radv_device *device, struct radv_pipeline *pipeline, 192bf215546Sopenharmony_ci const VkAllocationCallbacks *allocator) 193bf215546Sopenharmony_ci{ 194bf215546Sopenharmony_ci if (pipeline->type == RADV_PIPELINE_COMPUTE) { 195bf215546Sopenharmony_ci struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline); 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci free(compute_pipeline->rt_group_handles); 198bf215546Sopenharmony_ci free(compute_pipeline->rt_stack_sizes); 199bf215546Sopenharmony_ci } else if (pipeline->type == RADV_PIPELINE_LIBRARY) { 200bf215546Sopenharmony_ci struct radv_library_pipeline *library_pipeline = radv_pipeline_to_library(pipeline); 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci free(library_pipeline->groups); 203bf215546Sopenharmony_ci for (uint32_t i = 0; i < library_pipeline->stage_count; i++) { 204bf215546Sopenharmony_ci RADV_FROM_HANDLE(vk_shader_module, module, library_pipeline->stages[i].module); 205bf215546Sopenharmony_ci if (module) { 206bf215546Sopenharmony_ci vk_object_base_finish(&module->base); 207bf215546Sopenharmony_ci ralloc_free(module); 208bf215546Sopenharmony_ci } 209bf215546Sopenharmony_ci } 210bf215546Sopenharmony_ci free(library_pipeline->stages); 211bf215546Sopenharmony_ci free(library_pipeline->identifiers); 212bf215546Sopenharmony_ci free(library_pipeline->hashes); 213bf215546Sopenharmony_ci } 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci if (pipeline->slab) 216bf215546Sopenharmony_ci radv_pipeline_slab_destroy(device, pipeline->slab); 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) 219bf215546Sopenharmony_ci if (pipeline->shaders[i]) 220bf215546Sopenharmony_ci radv_shader_destroy(device, pipeline->shaders[i]); 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci if (pipeline->gs_copy_shader) 223bf215546Sopenharmony_ci radv_shader_destroy(device, pipeline->gs_copy_shader); 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci if (pipeline->cs.buf) 226bf215546Sopenharmony_ci free(pipeline->cs.buf); 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci vk_object_base_finish(&pipeline->base); 229bf215546Sopenharmony_ci vk_free2(&device->vk.alloc, allocator, pipeline); 230bf215546Sopenharmony_ci} 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 233bf215546Sopenharmony_ciradv_DestroyPipeline(VkDevice _device, VkPipeline _pipeline, 234bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 235bf215546Sopenharmony_ci{ 236bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 237bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline); 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci if (!_pipeline) 240bf215546Sopenharmony_ci return; 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci radv_pipeline_destroy(device, pipeline, pAllocator); 243bf215546Sopenharmony_ci} 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ciuint32_t 246bf215546Sopenharmony_ciradv_get_hash_flags(const struct radv_device *device, bool stats) 247bf215546Sopenharmony_ci{ 248bf215546Sopenharmony_ci uint32_t hash_flags = 0; 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci if (device->physical_device->use_ngg_culling) 251bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_USE_NGG_CULLING; 252bf215546Sopenharmony_ci if (device->instance->perftest_flags & RADV_PERFTEST_EMULATE_RT) 253bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_EMULATE_RT; 254bf215546Sopenharmony_ci if (device->physical_device->rt_wave_size == 64) 255bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_RT_WAVE64; 256bf215546Sopenharmony_ci if (device->physical_device->cs_wave_size == 32) 257bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_CS_WAVE32; 258bf215546Sopenharmony_ci if (device->physical_device->ps_wave_size == 32) 259bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_PS_WAVE32; 260bf215546Sopenharmony_ci if (device->physical_device->ge_wave_size == 32) 261bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_GE_WAVE32; 262bf215546Sopenharmony_ci if (device->physical_device->use_llvm) 263bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_LLVM; 264bf215546Sopenharmony_ci if (stats) 265bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_KEEP_STATISTICS; 266bf215546Sopenharmony_ci if (device->robust_buffer_access) /* forces per-attribute vertex descriptors */ 267bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS; 268bf215546Sopenharmony_ci if (device->robust_buffer_access2) /* affects load/store vectorizer */ 269bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_ROBUST_BUFFER_ACCESS2; 270bf215546Sopenharmony_ci if (device->instance->debug_flags & RADV_DEBUG_SPLIT_FMA) 271bf215546Sopenharmony_ci hash_flags |= RADV_HASH_SHADER_SPLIT_FMA; 272bf215546Sopenharmony_ci return hash_flags; 273bf215546Sopenharmony_ci} 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_cistatic void 276bf215546Sopenharmony_ciradv_pipeline_init_scratch(const struct radv_device *device, struct radv_pipeline *pipeline) 277bf215546Sopenharmony_ci{ 278bf215546Sopenharmony_ci unsigned scratch_bytes_per_wave = 0; 279bf215546Sopenharmony_ci unsigned max_waves = 0; 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { 282bf215546Sopenharmony_ci if (pipeline->shaders[i] && pipeline->shaders[i]->config.scratch_bytes_per_wave) { 283bf215546Sopenharmony_ci unsigned max_stage_waves = device->scratch_waves; 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci scratch_bytes_per_wave = 286bf215546Sopenharmony_ci MAX2(scratch_bytes_per_wave, pipeline->shaders[i]->config.scratch_bytes_per_wave); 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci max_stage_waves = 289bf215546Sopenharmony_ci MIN2(max_stage_waves, 4 * device->physical_device->rad_info.num_cu * 290bf215546Sopenharmony_ci radv_get_max_waves(device, pipeline->shaders[i], i)); 291bf215546Sopenharmony_ci max_waves = MAX2(max_waves, max_stage_waves); 292bf215546Sopenharmony_ci } 293bf215546Sopenharmony_ci } 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci pipeline->scratch_bytes_per_wave = scratch_bytes_per_wave; 296bf215546Sopenharmony_ci pipeline->max_waves = max_waves; 297bf215546Sopenharmony_ci} 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_cistatic uint32_t 300bf215546Sopenharmony_cisi_translate_blend_function(VkBlendOp op) 301bf215546Sopenharmony_ci{ 302bf215546Sopenharmony_ci switch (op) { 303bf215546Sopenharmony_ci case VK_BLEND_OP_ADD: 304bf215546Sopenharmony_ci return V_028780_COMB_DST_PLUS_SRC; 305bf215546Sopenharmony_ci case VK_BLEND_OP_SUBTRACT: 306bf215546Sopenharmony_ci return V_028780_COMB_SRC_MINUS_DST; 307bf215546Sopenharmony_ci case VK_BLEND_OP_REVERSE_SUBTRACT: 308bf215546Sopenharmony_ci return V_028780_COMB_DST_MINUS_SRC; 309bf215546Sopenharmony_ci case VK_BLEND_OP_MIN: 310bf215546Sopenharmony_ci return V_028780_COMB_MIN_DST_SRC; 311bf215546Sopenharmony_ci case VK_BLEND_OP_MAX: 312bf215546Sopenharmony_ci return V_028780_COMB_MAX_DST_SRC; 313bf215546Sopenharmony_ci default: 314bf215546Sopenharmony_ci return 0; 315bf215546Sopenharmony_ci } 316bf215546Sopenharmony_ci} 317bf215546Sopenharmony_ci 318bf215546Sopenharmony_cistatic uint32_t 319bf215546Sopenharmony_cisi_translate_blend_factor(enum amd_gfx_level gfx_level, VkBlendFactor factor) 320bf215546Sopenharmony_ci{ 321bf215546Sopenharmony_ci switch (factor) { 322bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ZERO: 323bf215546Sopenharmony_ci return V_028780_BLEND_ZERO; 324bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE: 325bf215546Sopenharmony_ci return V_028780_BLEND_ONE; 326bf215546Sopenharmony_ci case VK_BLEND_FACTOR_SRC_COLOR: 327bf215546Sopenharmony_ci return V_028780_BLEND_SRC_COLOR; 328bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR: 329bf215546Sopenharmony_ci return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 330bf215546Sopenharmony_ci case VK_BLEND_FACTOR_DST_COLOR: 331bf215546Sopenharmony_ci return V_028780_BLEND_DST_COLOR; 332bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR: 333bf215546Sopenharmony_ci return V_028780_BLEND_ONE_MINUS_DST_COLOR; 334bf215546Sopenharmony_ci case VK_BLEND_FACTOR_SRC_ALPHA: 335bf215546Sopenharmony_ci return V_028780_BLEND_SRC_ALPHA; 336bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA: 337bf215546Sopenharmony_ci return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 338bf215546Sopenharmony_ci case VK_BLEND_FACTOR_DST_ALPHA: 339bf215546Sopenharmony_ci return V_028780_BLEND_DST_ALPHA; 340bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: 341bf215546Sopenharmony_ci return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 342bf215546Sopenharmony_ci case VK_BLEND_FACTOR_CONSTANT_COLOR: 343bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11 344bf215546Sopenharmony_ci : V_028780_BLEND_CONSTANT_COLOR_GFX6; 345bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR: 346bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11 347bf215546Sopenharmony_ci : V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6; 348bf215546Sopenharmony_ci case VK_BLEND_FACTOR_CONSTANT_ALPHA: 349bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11 350bf215546Sopenharmony_ci : V_028780_BLEND_CONSTANT_ALPHA_GFX6; 351bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA: 352bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11 353bf215546Sopenharmony_ci : V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6; 354bf215546Sopenharmony_ci case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE: 355bf215546Sopenharmony_ci return V_028780_BLEND_SRC_ALPHA_SATURATE; 356bf215546Sopenharmony_ci case VK_BLEND_FACTOR_SRC1_COLOR: 357bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_COLOR_GFX11 : V_028780_BLEND_SRC1_COLOR_GFX6; 358bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: 359bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11 360bf215546Sopenharmony_ci : V_028780_BLEND_INV_SRC1_COLOR_GFX6; 361bf215546Sopenharmony_ci case VK_BLEND_FACTOR_SRC1_ALPHA: 362bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_ALPHA_GFX11 : V_028780_BLEND_SRC1_ALPHA_GFX6; 363bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: 364bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11 365bf215546Sopenharmony_ci : V_028780_BLEND_INV_SRC1_ALPHA_GFX6; 366bf215546Sopenharmony_ci default: 367bf215546Sopenharmony_ci return 0; 368bf215546Sopenharmony_ci } 369bf215546Sopenharmony_ci} 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_cistatic uint32_t 372bf215546Sopenharmony_cisi_translate_blend_opt_function(unsigned op) 373bf215546Sopenharmony_ci{ 374bf215546Sopenharmony_ci switch (op) { 375bf215546Sopenharmony_ci case V_028780_COMB_DST_PLUS_SRC: 376bf215546Sopenharmony_ci return V_028760_OPT_COMB_ADD; 377bf215546Sopenharmony_ci case V_028780_COMB_SRC_MINUS_DST: 378bf215546Sopenharmony_ci return V_028760_OPT_COMB_SUBTRACT; 379bf215546Sopenharmony_ci case V_028780_COMB_DST_MINUS_SRC: 380bf215546Sopenharmony_ci return V_028760_OPT_COMB_REVSUBTRACT; 381bf215546Sopenharmony_ci case V_028780_COMB_MIN_DST_SRC: 382bf215546Sopenharmony_ci return V_028760_OPT_COMB_MIN; 383bf215546Sopenharmony_ci case V_028780_COMB_MAX_DST_SRC: 384bf215546Sopenharmony_ci return V_028760_OPT_COMB_MAX; 385bf215546Sopenharmony_ci default: 386bf215546Sopenharmony_ci return V_028760_OPT_COMB_BLEND_DISABLED; 387bf215546Sopenharmony_ci } 388bf215546Sopenharmony_ci} 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_cistatic uint32_t 391bf215546Sopenharmony_cisi_translate_blend_opt_factor(unsigned factor, bool is_alpha) 392bf215546Sopenharmony_ci{ 393bf215546Sopenharmony_ci switch (factor) { 394bf215546Sopenharmony_ci case V_028780_BLEND_ZERO: 395bf215546Sopenharmony_ci return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 396bf215546Sopenharmony_ci case V_028780_BLEND_ONE: 397bf215546Sopenharmony_ci return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 398bf215546Sopenharmony_ci case V_028780_BLEND_SRC_COLOR: 399bf215546Sopenharmony_ci return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 400bf215546Sopenharmony_ci : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 401bf215546Sopenharmony_ci case V_028780_BLEND_ONE_MINUS_SRC_COLOR: 402bf215546Sopenharmony_ci return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 403bf215546Sopenharmony_ci : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 404bf215546Sopenharmony_ci case V_028780_BLEND_SRC_ALPHA: 405bf215546Sopenharmony_ci return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 406bf215546Sopenharmony_ci case V_028780_BLEND_ONE_MINUS_SRC_ALPHA: 407bf215546Sopenharmony_ci return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 408bf215546Sopenharmony_ci case V_028780_BLEND_SRC_ALPHA_SATURATE: 409bf215546Sopenharmony_ci return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 410bf215546Sopenharmony_ci : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 411bf215546Sopenharmony_ci default: 412bf215546Sopenharmony_ci return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 413bf215546Sopenharmony_ci } 414bf215546Sopenharmony_ci} 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci/** 417bf215546Sopenharmony_ci * Get rid of DST in the blend factors by commuting the operands: 418bf215546Sopenharmony_ci * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 419bf215546Sopenharmony_ci */ 420bf215546Sopenharmony_cistatic void 421bf215546Sopenharmony_cisi_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *dst_factor, 422bf215546Sopenharmony_ci unsigned expected_dst, unsigned replacement_src) 423bf215546Sopenharmony_ci{ 424bf215546Sopenharmony_ci if (*src_factor == expected_dst && *dst_factor == V_028780_BLEND_ZERO) { 425bf215546Sopenharmony_ci *src_factor = V_028780_BLEND_ZERO; 426bf215546Sopenharmony_ci *dst_factor = replacement_src; 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci /* Commuting the operands requires reversing subtractions. */ 429bf215546Sopenharmony_ci if (*func == V_028780_COMB_SRC_MINUS_DST) 430bf215546Sopenharmony_ci *func = V_028780_COMB_DST_MINUS_SRC; 431bf215546Sopenharmony_ci else if (*func == V_028780_COMB_DST_MINUS_SRC) 432bf215546Sopenharmony_ci *func = V_028780_COMB_SRC_MINUS_DST; 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci} 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_cistatic bool 437bf215546Sopenharmony_cisi_blend_factor_uses_dst(unsigned factor) 438bf215546Sopenharmony_ci{ 439bf215546Sopenharmony_ci return factor == V_028780_BLEND_DST_COLOR || 440bf215546Sopenharmony_ci factor == V_028780_BLEND_DST_ALPHA || 441bf215546Sopenharmony_ci factor == V_028780_BLEND_SRC_ALPHA_SATURATE || 442bf215546Sopenharmony_ci factor == V_028780_BLEND_ONE_MINUS_DST_ALPHA || 443bf215546Sopenharmony_ci factor == V_028780_BLEND_ONE_MINUS_DST_COLOR; 444bf215546Sopenharmony_ci} 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_cistatic bool 447bf215546Sopenharmony_ciis_dual_src(enum amd_gfx_level gfx_level, unsigned factor) 448bf215546Sopenharmony_ci{ 449bf215546Sopenharmony_ci if (gfx_level >= GFX11) { 450bf215546Sopenharmony_ci switch (factor) { 451bf215546Sopenharmony_ci case V_028780_BLEND_SRC1_COLOR_GFX11: 452bf215546Sopenharmony_ci case V_028780_BLEND_INV_SRC1_COLOR_GFX11: 453bf215546Sopenharmony_ci case V_028780_BLEND_SRC1_ALPHA_GFX11: 454bf215546Sopenharmony_ci case V_028780_BLEND_INV_SRC1_ALPHA_GFX11: 455bf215546Sopenharmony_ci return true; 456bf215546Sopenharmony_ci default: 457bf215546Sopenharmony_ci return false; 458bf215546Sopenharmony_ci } 459bf215546Sopenharmony_ci } else { 460bf215546Sopenharmony_ci switch (factor) { 461bf215546Sopenharmony_ci case V_028780_BLEND_SRC1_COLOR_GFX6: 462bf215546Sopenharmony_ci case V_028780_BLEND_INV_SRC1_COLOR_GFX6: 463bf215546Sopenharmony_ci case V_028780_BLEND_SRC1_ALPHA_GFX6: 464bf215546Sopenharmony_ci case V_028780_BLEND_INV_SRC1_ALPHA_GFX6: 465bf215546Sopenharmony_ci return true; 466bf215546Sopenharmony_ci default: 467bf215546Sopenharmony_ci return false; 468bf215546Sopenharmony_ci } 469bf215546Sopenharmony_ci } 470bf215546Sopenharmony_ci} 471bf215546Sopenharmony_ci 472bf215546Sopenharmony_cistatic unsigned 473bf215546Sopenharmony_ciradv_choose_spi_color_format(const struct radv_device *device, VkFormat vk_format, 474bf215546Sopenharmony_ci bool blend_enable, bool blend_need_alpha) 475bf215546Sopenharmony_ci{ 476bf215546Sopenharmony_ci const struct util_format_description *desc = vk_format_description(vk_format); 477bf215546Sopenharmony_ci bool use_rbplus = device->physical_device->rad_info.rbplus_allowed; 478bf215546Sopenharmony_ci struct ac_spi_color_formats formats = {0}; 479bf215546Sopenharmony_ci unsigned format, ntype, swap; 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci format = radv_translate_colorformat(vk_format); 482bf215546Sopenharmony_ci ntype = radv_translate_color_numformat(vk_format, desc, 483bf215546Sopenharmony_ci vk_format_get_first_non_void_channel(vk_format)); 484bf215546Sopenharmony_ci swap = radv_translate_colorswap(vk_format, false); 485bf215546Sopenharmony_ci 486bf215546Sopenharmony_ci ac_choose_spi_color_formats(format, swap, ntype, false, use_rbplus, &formats); 487bf215546Sopenharmony_ci 488bf215546Sopenharmony_ci if (blend_enable && blend_need_alpha) 489bf215546Sopenharmony_ci return formats.blend_alpha; 490bf215546Sopenharmony_ci else if (blend_need_alpha) 491bf215546Sopenharmony_ci return formats.alpha; 492bf215546Sopenharmony_ci else if (blend_enable) 493bf215546Sopenharmony_ci return formats.blend; 494bf215546Sopenharmony_ci else 495bf215546Sopenharmony_ci return formats.normal; 496bf215546Sopenharmony_ci} 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_cistatic bool 499bf215546Sopenharmony_ciformat_is_int8(VkFormat format) 500bf215546Sopenharmony_ci{ 501bf215546Sopenharmony_ci const struct util_format_description *desc = vk_format_description(format); 502bf215546Sopenharmony_ci int channel = vk_format_get_first_non_void_channel(format); 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci return channel >= 0 && desc->channel[channel].pure_integer && desc->channel[channel].size == 8; 505bf215546Sopenharmony_ci} 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_cistatic bool 508bf215546Sopenharmony_ciformat_is_int10(VkFormat format) 509bf215546Sopenharmony_ci{ 510bf215546Sopenharmony_ci const struct util_format_description *desc = vk_format_description(format); 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci if (desc->nr_channels != 4) 513bf215546Sopenharmony_ci return false; 514bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 515bf215546Sopenharmony_ci if (desc->channel[i].pure_integer && desc->channel[i].size == 10) 516bf215546Sopenharmony_ci return true; 517bf215546Sopenharmony_ci } 518bf215546Sopenharmony_ci return false; 519bf215546Sopenharmony_ci} 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_cistatic bool 522bf215546Sopenharmony_ciformat_is_float32(VkFormat format) 523bf215546Sopenharmony_ci{ 524bf215546Sopenharmony_ci const struct util_format_description *desc = vk_format_description(format); 525bf215546Sopenharmony_ci int channel = vk_format_get_first_non_void_channel(format); 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci return channel >= 0 && 528bf215546Sopenharmony_ci desc->channel[channel].type == UTIL_FORMAT_TYPE_FLOAT && desc->channel[channel].size == 32; 529bf215546Sopenharmony_ci} 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_cistatic void 532bf215546Sopenharmony_ciradv_pipeline_compute_spi_color_formats(const struct radv_graphics_pipeline *pipeline, 533bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 534bf215546Sopenharmony_ci struct radv_blend_state *blend, 535bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 536bf215546Sopenharmony_ci{ 537bf215546Sopenharmony_ci unsigned col_format = 0, is_int8 = 0, is_int10 = 0, is_float32 = 0; 538bf215546Sopenharmony_ci unsigned num_targets; 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci for (unsigned i = 0; i < info->ri.color_att_count; ++i) { 541bf215546Sopenharmony_ci unsigned cf; 542bf215546Sopenharmony_ci VkFormat fmt = info->ri.color_att_formats[i]; 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci if (fmt == VK_FORMAT_UNDEFINED || !(blend->cb_target_mask & (0xfu << (i * 4)))) { 545bf215546Sopenharmony_ci cf = V_028714_SPI_SHADER_ZERO; 546bf215546Sopenharmony_ci } else { 547bf215546Sopenharmony_ci bool blend_enable = blend->blend_enable_4bit & (0xfu << (i * 4)); 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci cf = radv_choose_spi_color_format(pipeline->base.device, fmt, blend_enable, 550bf215546Sopenharmony_ci blend->need_src_alpha & (1 << i)); 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci if (format_is_int8(fmt)) 553bf215546Sopenharmony_ci is_int8 |= 1 << i; 554bf215546Sopenharmony_ci if (format_is_int10(fmt)) 555bf215546Sopenharmony_ci is_int10 |= 1 << i; 556bf215546Sopenharmony_ci if (format_is_float32(fmt)) 557bf215546Sopenharmony_ci is_float32 |= 1 << i; 558bf215546Sopenharmony_ci } 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci col_format |= cf << (4 * i); 561bf215546Sopenharmony_ci } 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci if (!(col_format & 0xf) && blend->need_src_alpha & (1 << 0)) { 564bf215546Sopenharmony_ci /* When a subpass doesn't have any color attachments, write the 565bf215546Sopenharmony_ci * alpha channel of MRT0 when alpha coverage is enabled because 566bf215546Sopenharmony_ci * the depth attachment needs it. 567bf215546Sopenharmony_ci */ 568bf215546Sopenharmony_ci col_format |= V_028714_SPI_SHADER_32_AR; 569bf215546Sopenharmony_ci } 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci /* If the i-th target format is set, all previous target formats must 572bf215546Sopenharmony_ci * be non-zero to avoid hangs. 573bf215546Sopenharmony_ci */ 574bf215546Sopenharmony_ci num_targets = (util_last_bit(col_format) + 3) / 4; 575bf215546Sopenharmony_ci for (unsigned i = 0; i < num_targets; i++) { 576bf215546Sopenharmony_ci if (!(col_format & (0xfu << (i * 4)))) { 577bf215546Sopenharmony_ci col_format |= V_028714_SPI_SHADER_32_R << (i * 4); 578bf215546Sopenharmony_ci } 579bf215546Sopenharmony_ci } 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_ci /* The output for dual source blending should have the same format as 582bf215546Sopenharmony_ci * the first output. 583bf215546Sopenharmony_ci */ 584bf215546Sopenharmony_ci if (blend->mrt0_is_dual_src) { 585bf215546Sopenharmony_ci assert(!(col_format >> 4)); 586bf215546Sopenharmony_ci col_format |= (col_format & 0xf) << 4; 587bf215546Sopenharmony_ci } 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci blend->cb_shader_mask = ac_get_cb_shader_mask(col_format); 590bf215546Sopenharmony_ci blend->spi_shader_col_format = col_format; 591bf215546Sopenharmony_ci blend->col_format_is_int8 = is_int8; 592bf215546Sopenharmony_ci blend->col_format_is_int10 = is_int10; 593bf215546Sopenharmony_ci blend->col_format_is_float32 = is_float32; 594bf215546Sopenharmony_ci} 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci/* 597bf215546Sopenharmony_ci * Ordered so that for each i, 598bf215546Sopenharmony_ci * radv_format_meta_fs_key(radv_fs_key_format_exemplars[i]) == i. 599bf215546Sopenharmony_ci */ 600bf215546Sopenharmony_ciconst VkFormat radv_fs_key_format_exemplars[NUM_META_FS_KEYS] = { 601bf215546Sopenharmony_ci VK_FORMAT_R32_SFLOAT, 602bf215546Sopenharmony_ci VK_FORMAT_R32G32_SFLOAT, 603bf215546Sopenharmony_ci VK_FORMAT_R8G8B8A8_UNORM, 604bf215546Sopenharmony_ci VK_FORMAT_R16G16B16A16_UNORM, 605bf215546Sopenharmony_ci VK_FORMAT_R16G16B16A16_SNORM, 606bf215546Sopenharmony_ci VK_FORMAT_R16G16B16A16_UINT, 607bf215546Sopenharmony_ci VK_FORMAT_R16G16B16A16_SINT, 608bf215546Sopenharmony_ci VK_FORMAT_R32G32B32A32_SFLOAT, 609bf215546Sopenharmony_ci VK_FORMAT_R8G8B8A8_UINT, 610bf215546Sopenharmony_ci VK_FORMAT_R8G8B8A8_SINT, 611bf215546Sopenharmony_ci VK_FORMAT_A2R10G10B10_UINT_PACK32, 612bf215546Sopenharmony_ci VK_FORMAT_A2R10G10B10_SINT_PACK32, 613bf215546Sopenharmony_ci}; 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ciunsigned 616bf215546Sopenharmony_ciradv_format_meta_fs_key(struct radv_device *device, VkFormat format) 617bf215546Sopenharmony_ci{ 618bf215546Sopenharmony_ci unsigned col_format = radv_choose_spi_color_format(device, format, false, false); 619bf215546Sopenharmony_ci assert(col_format != V_028714_SPI_SHADER_32_AR); 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci bool is_int8 = format_is_int8(format); 622bf215546Sopenharmony_ci bool is_int10 = format_is_int10(format); 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int8) 625bf215546Sopenharmony_ci return 8; 626bf215546Sopenharmony_ci else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int8) 627bf215546Sopenharmony_ci return 9; 628bf215546Sopenharmony_ci else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR && is_int10) 629bf215546Sopenharmony_ci return 10; 630bf215546Sopenharmony_ci else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR && is_int10) 631bf215546Sopenharmony_ci return 11; 632bf215546Sopenharmony_ci else { 633bf215546Sopenharmony_ci if (col_format >= V_028714_SPI_SHADER_32_AR) 634bf215546Sopenharmony_ci --col_format; /* Skip V_028714_SPI_SHADER_32_AR since there is no such VkFormat */ 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci --col_format; /* Skip V_028714_SPI_SHADER_ZERO */ 637bf215546Sopenharmony_ci return col_format; 638bf215546Sopenharmony_ci } 639bf215546Sopenharmony_ci} 640bf215546Sopenharmony_ci 641bf215546Sopenharmony_cistatic void 642bf215546Sopenharmony_ciradv_blend_check_commutativity(enum amd_gfx_level gfx_level, struct radv_blend_state *blend, 643bf215546Sopenharmony_ci unsigned op, unsigned src, unsigned dst, unsigned chanmask) 644bf215546Sopenharmony_ci{ 645bf215546Sopenharmony_ci bool is_src_allowed = false; 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_ci /* Src factor is allowed when it does not depend on Dst. */ 648bf215546Sopenharmony_ci if (src == V_028780_BLEND_ZERO || 649bf215546Sopenharmony_ci src == V_028780_BLEND_ONE || 650bf215546Sopenharmony_ci src == V_028780_BLEND_SRC_COLOR || 651bf215546Sopenharmony_ci src == V_028780_BLEND_SRC_ALPHA || 652bf215546Sopenharmony_ci src == V_028780_BLEND_SRC_ALPHA_SATURATE || 653bf215546Sopenharmony_ci src == V_028780_BLEND_ONE_MINUS_SRC_COLOR || 654bf215546Sopenharmony_ci src == V_028780_BLEND_ONE_MINUS_SRC_ALPHA) { 655bf215546Sopenharmony_ci is_src_allowed = true; 656bf215546Sopenharmony_ci } 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci if (gfx_level >= GFX11) { 659bf215546Sopenharmony_ci if (src == V_028780_BLEND_CONSTANT_COLOR_GFX11 || 660bf215546Sopenharmony_ci src == V_028780_BLEND_CONSTANT_ALPHA_GFX11 || 661bf215546Sopenharmony_ci src == V_028780_BLEND_SRC1_COLOR_GFX11 || 662bf215546Sopenharmony_ci src == V_028780_BLEND_SRC1_ALPHA_GFX11 || 663bf215546Sopenharmony_ci src == V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11 || 664bf215546Sopenharmony_ci src == V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11 || 665bf215546Sopenharmony_ci src == V_028780_BLEND_INV_SRC1_COLOR_GFX11 || 666bf215546Sopenharmony_ci src == V_028780_BLEND_INV_SRC1_ALPHA_GFX11) { 667bf215546Sopenharmony_ci is_src_allowed = true; 668bf215546Sopenharmony_ci } 669bf215546Sopenharmony_ci } else { 670bf215546Sopenharmony_ci if (src == V_028780_BLEND_CONSTANT_COLOR_GFX6 || 671bf215546Sopenharmony_ci src == V_028780_BLEND_CONSTANT_ALPHA_GFX6 || 672bf215546Sopenharmony_ci src == V_028780_BLEND_SRC1_COLOR_GFX6 || 673bf215546Sopenharmony_ci src == V_028780_BLEND_SRC1_ALPHA_GFX6 || 674bf215546Sopenharmony_ci src == V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6 || 675bf215546Sopenharmony_ci src == V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6 || 676bf215546Sopenharmony_ci src == V_028780_BLEND_INV_SRC1_COLOR_GFX6 || 677bf215546Sopenharmony_ci src == V_028780_BLEND_INV_SRC1_ALPHA_GFX6) { 678bf215546Sopenharmony_ci is_src_allowed = true; 679bf215546Sopenharmony_ci } 680bf215546Sopenharmony_ci } 681bf215546Sopenharmony_ci 682bf215546Sopenharmony_ci if (dst == V_028780_BLEND_ONE && is_src_allowed) { 683bf215546Sopenharmony_ci /* Addition is commutative, but floating point addition isn't 684bf215546Sopenharmony_ci * associative: subtle changes can be introduced via different 685bf215546Sopenharmony_ci * rounding. Be conservative, only enable for min and max. 686bf215546Sopenharmony_ci */ 687bf215546Sopenharmony_ci if (op == V_028780_COMB_MAX_DST_SRC || op == V_028780_COMB_MIN_DST_SRC) 688bf215546Sopenharmony_ci blend->commutative_4bit |= chanmask; 689bf215546Sopenharmony_ci } 690bf215546Sopenharmony_ci} 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_cistatic struct radv_blend_state 693bf215546Sopenharmony_ciradv_pipeline_init_blend_state(struct radv_graphics_pipeline *pipeline, 694bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 695bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 696bf215546Sopenharmony_ci{ 697bf215546Sopenharmony_ci const struct radv_device *device = pipeline->base.device; 698bf215546Sopenharmony_ci struct radv_blend_state blend = {0}; 699bf215546Sopenharmony_ci unsigned cb_color_control = 0; 700bf215546Sopenharmony_ci const enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; 701bf215546Sopenharmony_ci int i; 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci if (info->cb.logic_op_enable) 704bf215546Sopenharmony_ci cb_color_control |= S_028808_ROP3(info->cb.logic_op); 705bf215546Sopenharmony_ci else 706bf215546Sopenharmony_ci cb_color_control |= S_028808_ROP3(V_028808_ROP3_COPY); 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_ci if (device->instance->debug_flags & RADV_DEBUG_NO_ATOC_DITHERING) 709bf215546Sopenharmony_ci { 710bf215546Sopenharmony_ci blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 711bf215546Sopenharmony_ci S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 712bf215546Sopenharmony_ci S_028B70_OFFSET_ROUND(0); 713bf215546Sopenharmony_ci } 714bf215546Sopenharmony_ci else 715bf215546Sopenharmony_ci { 716bf215546Sopenharmony_ci blend.db_alpha_to_mask = S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) | 717bf215546Sopenharmony_ci S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 718bf215546Sopenharmony_ci S_028B70_OFFSET_ROUND(1); 719bf215546Sopenharmony_ci } 720bf215546Sopenharmony_ci 721bf215546Sopenharmony_ci if (info->ms.alpha_to_coverage_enable) { 722bf215546Sopenharmony_ci blend.db_alpha_to_mask |= S_028B70_ALPHA_TO_MASK_ENABLE(1); 723bf215546Sopenharmony_ci blend.need_src_alpha |= 0x1; 724bf215546Sopenharmony_ci } 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_ci blend.cb_target_mask = 0; 727bf215546Sopenharmony_ci for (i = 0; i < info->cb.att_count; i++) { 728bf215546Sopenharmony_ci unsigned blend_cntl = 0; 729bf215546Sopenharmony_ci unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 730bf215546Sopenharmony_ci unsigned eqRGB = info->cb.att[i].color_blend_op; 731bf215546Sopenharmony_ci unsigned srcRGB = info->cb.att[i].src_color_blend_factor; 732bf215546Sopenharmony_ci unsigned dstRGB = info->cb.att[i].dst_color_blend_factor; 733bf215546Sopenharmony_ci unsigned eqA = info->cb.att[i].alpha_blend_op; 734bf215546Sopenharmony_ci unsigned srcA = info->cb.att[i].src_alpha_blend_factor; 735bf215546Sopenharmony_ci unsigned dstA = info->cb.att[i].dst_alpha_blend_factor; 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 738bf215546Sopenharmony_ci S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci if (!info->cb.att[i].color_write_mask) 741bf215546Sopenharmony_ci continue; 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci /* Ignore other blend targets if dual-source blending 744bf215546Sopenharmony_ci * is enabled to prevent wrong behaviour. 745bf215546Sopenharmony_ci */ 746bf215546Sopenharmony_ci if (blend.mrt0_is_dual_src) 747bf215546Sopenharmony_ci continue; 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci blend.cb_target_mask |= (unsigned)info->cb.att[i].color_write_mask << (4 * i); 750bf215546Sopenharmony_ci blend.cb_target_enabled_4bit |= 0xfu << (4 * i); 751bf215546Sopenharmony_ci if (!info->cb.att[i].blend_enable) { 752bf215546Sopenharmony_ci blend.cb_blend_control[i] = blend_cntl; 753bf215546Sopenharmony_ci continue; 754bf215546Sopenharmony_ci } 755bf215546Sopenharmony_ci 756bf215546Sopenharmony_ci if (is_dual_src(gfx_level, srcRGB) || is_dual_src(gfx_level, dstRGB) || 757bf215546Sopenharmony_ci is_dual_src(gfx_level, srcA) || is_dual_src(gfx_level, dstA)) 758bf215546Sopenharmony_ci if (i == 0) 759bf215546Sopenharmony_ci blend.mrt0_is_dual_src = true; 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci 762bf215546Sopenharmony_ci if (eqRGB == V_028780_COMB_MIN_DST_SRC || eqRGB == V_028780_COMB_MAX_DST_SRC) { 763bf215546Sopenharmony_ci srcRGB = V_028780_BLEND_ONE; 764bf215546Sopenharmony_ci dstRGB = V_028780_BLEND_ONE; 765bf215546Sopenharmony_ci } 766bf215546Sopenharmony_ci if (eqA == V_028780_COMB_MIN_DST_SRC || eqA == V_028780_COMB_MAX_DST_SRC) { 767bf215546Sopenharmony_ci srcA = V_028780_BLEND_ONE; 768bf215546Sopenharmony_ci dstA = V_028780_BLEND_ONE; 769bf215546Sopenharmony_ci } 770bf215546Sopenharmony_ci 771bf215546Sopenharmony_ci radv_blend_check_commutativity(gfx_level, &blend, eqRGB, srcRGB, dstRGB, 0x7u << (4 * i)); 772bf215546Sopenharmony_ci radv_blend_check_commutativity(gfx_level, &blend, eqA, srcA, dstA, 0x8u << (4 * i)); 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_ci /* Blending optimizations for RB+. 775bf215546Sopenharmony_ci * These transformations don't change the behavior. 776bf215546Sopenharmony_ci * 777bf215546Sopenharmony_ci * First, get rid of DST in the blend factors: 778bf215546Sopenharmony_ci * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 779bf215546Sopenharmony_ci */ 780bf215546Sopenharmony_ci si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, V_028780_BLEND_DST_COLOR, 781bf215546Sopenharmony_ci V_028780_BLEND_SRC_COLOR); 782bf215546Sopenharmony_ci 783bf215546Sopenharmony_ci si_blend_remove_dst(&eqA, &srcA, &dstA, V_028780_BLEND_DST_COLOR, 784bf215546Sopenharmony_ci V_028780_BLEND_SRC_COLOR); 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci si_blend_remove_dst(&eqA, &srcA, &dstA, V_028780_BLEND_DST_ALPHA, 787bf215546Sopenharmony_ci V_028780_BLEND_SRC_ALPHA); 788bf215546Sopenharmony_ci 789bf215546Sopenharmony_ci /* Look up the ideal settings from tables. */ 790bf215546Sopenharmony_ci srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 791bf215546Sopenharmony_ci dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 792bf215546Sopenharmony_ci srcA_opt = si_translate_blend_opt_factor(srcA, true); 793bf215546Sopenharmony_ci dstA_opt = si_translate_blend_opt_factor(dstA, true); 794bf215546Sopenharmony_ci 795bf215546Sopenharmony_ci /* Handle interdependencies. */ 796bf215546Sopenharmony_ci if (si_blend_factor_uses_dst(srcRGB)) 797bf215546Sopenharmony_ci dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 798bf215546Sopenharmony_ci if (si_blend_factor_uses_dst(srcA)) 799bf215546Sopenharmony_ci dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 800bf215546Sopenharmony_ci 801bf215546Sopenharmony_ci if (srcRGB == V_028780_BLEND_SRC_ALPHA_SATURATE && 802bf215546Sopenharmony_ci (dstRGB == V_028780_BLEND_ZERO || dstRGB == V_028780_BLEND_SRC_ALPHA || 803bf215546Sopenharmony_ci dstRGB == V_028780_BLEND_SRC_ALPHA_SATURATE)) 804bf215546Sopenharmony_ci dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci /* Set the final value. */ 807bf215546Sopenharmony_ci blend.sx_mrt_blend_opt[i] = 808bf215546Sopenharmony_ci S_028760_COLOR_SRC_OPT(srcRGB_opt) | S_028760_COLOR_DST_OPT(dstRGB_opt) | 809bf215546Sopenharmony_ci S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 810bf215546Sopenharmony_ci S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) | 811bf215546Sopenharmony_ci S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 812bf215546Sopenharmony_ci blend_cntl |= S_028780_ENABLE(1); 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci blend_cntl |= S_028780_COLOR_COMB_FCN(eqRGB); 815bf215546Sopenharmony_ci blend_cntl |= S_028780_COLOR_SRCBLEND(srcRGB); 816bf215546Sopenharmony_ci blend_cntl |= S_028780_COLOR_DESTBLEND(dstRGB); 817bf215546Sopenharmony_ci if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 818bf215546Sopenharmony_ci blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 819bf215546Sopenharmony_ci blend_cntl |= S_028780_ALPHA_COMB_FCN(eqA); 820bf215546Sopenharmony_ci blend_cntl |= S_028780_ALPHA_SRCBLEND(srcA); 821bf215546Sopenharmony_ci blend_cntl |= S_028780_ALPHA_DESTBLEND(dstA); 822bf215546Sopenharmony_ci } 823bf215546Sopenharmony_ci blend.cb_blend_control[i] = blend_cntl; 824bf215546Sopenharmony_ci 825bf215546Sopenharmony_ci blend.blend_enable_4bit |= 0xfu << (i * 4); 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci if (srcRGB == V_028780_BLEND_SRC_ALPHA || dstRGB == V_028780_BLEND_SRC_ALPHA || 828bf215546Sopenharmony_ci srcRGB == V_028780_BLEND_SRC_ALPHA_SATURATE || 829bf215546Sopenharmony_ci dstRGB == V_028780_BLEND_SRC_ALPHA_SATURATE || 830bf215546Sopenharmony_ci srcRGB == V_028780_BLEND_ONE_MINUS_SRC_ALPHA || 831bf215546Sopenharmony_ci dstRGB == V_028780_BLEND_ONE_MINUS_SRC_ALPHA) 832bf215546Sopenharmony_ci blend.need_src_alpha |= 1 << i; 833bf215546Sopenharmony_ci } 834bf215546Sopenharmony_ci for (i = info->cb.att_count; i < 8; i++) { 835bf215546Sopenharmony_ci blend.cb_blend_control[i] = 0; 836bf215546Sopenharmony_ci blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 837bf215546Sopenharmony_ci S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 838bf215546Sopenharmony_ci } 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci if (device->physical_device->rad_info.has_rbplus) { 841bf215546Sopenharmony_ci /* Disable RB+ blend optimizations for dual source blending. */ 842bf215546Sopenharmony_ci if (blend.mrt0_is_dual_src) { 843bf215546Sopenharmony_ci for (i = 0; i < 8; i++) { 844bf215546Sopenharmony_ci blend.sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 845bf215546Sopenharmony_ci S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 846bf215546Sopenharmony_ci } 847bf215546Sopenharmony_ci } 848bf215546Sopenharmony_ci 849bf215546Sopenharmony_ci /* RB+ doesn't work with dual source blending, logic op and 850bf215546Sopenharmony_ci * RESOLVE. 851bf215546Sopenharmony_ci */ 852bf215546Sopenharmony_ci if (blend.mrt0_is_dual_src || info->cb.logic_op_enable || 853bf215546Sopenharmony_ci (device->physical_device->rad_info.gfx_level >= GFX11 && blend.blend_enable_4bit)) 854bf215546Sopenharmony_ci cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1); 855bf215546Sopenharmony_ci } 856bf215546Sopenharmony_ci 857bf215546Sopenharmony_ci if (blend.cb_target_mask) 858bf215546Sopenharmony_ci cb_color_control |= S_028808_MODE(V_028808_CB_NORMAL); 859bf215546Sopenharmony_ci else 860bf215546Sopenharmony_ci cb_color_control |= S_028808_MODE(V_028808_CB_DISABLE); 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci radv_pipeline_compute_spi_color_formats(pipeline, pCreateInfo, &blend, info); 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci pipeline->cb_color_control = cb_color_control; 865bf215546Sopenharmony_ci 866bf215546Sopenharmony_ci return blend; 867bf215546Sopenharmony_ci} 868bf215546Sopenharmony_ci 869bf215546Sopenharmony_cistatic uint32_t 870bf215546Sopenharmony_cisi_translate_fill(VkPolygonMode func) 871bf215546Sopenharmony_ci{ 872bf215546Sopenharmony_ci switch (func) { 873bf215546Sopenharmony_ci case VK_POLYGON_MODE_FILL: 874bf215546Sopenharmony_ci return V_028814_X_DRAW_TRIANGLES; 875bf215546Sopenharmony_ci case VK_POLYGON_MODE_LINE: 876bf215546Sopenharmony_ci return V_028814_X_DRAW_LINES; 877bf215546Sopenharmony_ci case VK_POLYGON_MODE_POINT: 878bf215546Sopenharmony_ci return V_028814_X_DRAW_POINTS; 879bf215546Sopenharmony_ci default: 880bf215546Sopenharmony_ci assert(0); 881bf215546Sopenharmony_ci return V_028814_X_DRAW_POINTS; 882bf215546Sopenharmony_ci } 883bf215546Sopenharmony_ci} 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_cistatic unsigned 886bf215546Sopenharmony_ciradv_pipeline_color_samples( const struct radv_graphics_pipeline_info *info) 887bf215546Sopenharmony_ci{ 888bf215546Sopenharmony_ci if (info->color_att_samples && radv_pipeline_has_color_attachments(&info->ri)) { 889bf215546Sopenharmony_ci return info->color_att_samples; 890bf215546Sopenharmony_ci } 891bf215546Sopenharmony_ci 892bf215546Sopenharmony_ci return info->ms.raster_samples; 893bf215546Sopenharmony_ci} 894bf215546Sopenharmony_ci 895bf215546Sopenharmony_cistatic unsigned 896bf215546Sopenharmony_ciradv_pipeline_depth_samples(const struct radv_graphics_pipeline_info *info) 897bf215546Sopenharmony_ci{ 898bf215546Sopenharmony_ci if (info->ds_att_samples && radv_pipeline_has_ds_attachments(&info->ri)) { 899bf215546Sopenharmony_ci return info->ds_att_samples; 900bf215546Sopenharmony_ci } 901bf215546Sopenharmony_ci 902bf215546Sopenharmony_ci return info->ms.raster_samples; 903bf215546Sopenharmony_ci} 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_cistatic uint8_t 906bf215546Sopenharmony_ciradv_pipeline_get_ps_iter_samples(const struct radv_graphics_pipeline_info *info) 907bf215546Sopenharmony_ci{ 908bf215546Sopenharmony_ci uint32_t ps_iter_samples = 1; 909bf215546Sopenharmony_ci uint32_t num_samples = radv_pipeline_color_samples(info); 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci if (info->ms.sample_shading_enable) { 912bf215546Sopenharmony_ci ps_iter_samples = ceilf(info->ms.min_sample_shading * num_samples); 913bf215546Sopenharmony_ci ps_iter_samples = util_next_power_of_two(ps_iter_samples); 914bf215546Sopenharmony_ci } 915bf215546Sopenharmony_ci return ps_iter_samples; 916bf215546Sopenharmony_ci} 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_cistatic bool 919bf215546Sopenharmony_ciradv_is_depth_write_enabled(const struct radv_depth_stencil_info *ds_info) 920bf215546Sopenharmony_ci{ 921bf215546Sopenharmony_ci return ds_info->depth_test_enable && ds_info->depth_write_enable && 922bf215546Sopenharmony_ci ds_info->depth_compare_op != VK_COMPARE_OP_NEVER; 923bf215546Sopenharmony_ci} 924bf215546Sopenharmony_ci 925bf215546Sopenharmony_cistatic bool 926bf215546Sopenharmony_ciradv_writes_stencil(const struct radv_stencil_op_info *info) 927bf215546Sopenharmony_ci{ 928bf215546Sopenharmony_ci return info->write_mask && 929bf215546Sopenharmony_ci (info->fail_op != VK_STENCIL_OP_KEEP || info->pass_op != VK_STENCIL_OP_KEEP || 930bf215546Sopenharmony_ci info->depth_fail_op != VK_STENCIL_OP_KEEP); 931bf215546Sopenharmony_ci} 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_cistatic bool 934bf215546Sopenharmony_ciradv_is_stencil_write_enabled(const struct radv_depth_stencil_info *ds_info) 935bf215546Sopenharmony_ci{ 936bf215546Sopenharmony_ci return ds_info->stencil_test_enable && 937bf215546Sopenharmony_ci (radv_writes_stencil(&ds_info->front) || radv_writes_stencil(&ds_info->back)); 938bf215546Sopenharmony_ci} 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_cistatic bool 941bf215546Sopenharmony_ciradv_order_invariant_stencil_op(VkStencilOp op) 942bf215546Sopenharmony_ci{ 943bf215546Sopenharmony_ci /* REPLACE is normally order invariant, except when the stencil 944bf215546Sopenharmony_ci * reference value is written by the fragment shader. Tracking this 945bf215546Sopenharmony_ci * interaction does not seem worth the effort, so be conservative. 946bf215546Sopenharmony_ci */ 947bf215546Sopenharmony_ci return op != VK_STENCIL_OP_INCREMENT_AND_CLAMP && op != VK_STENCIL_OP_DECREMENT_AND_CLAMP && 948bf215546Sopenharmony_ci op != VK_STENCIL_OP_REPLACE; 949bf215546Sopenharmony_ci} 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_cistatic bool 952bf215546Sopenharmony_ciradv_order_invariant_stencil_state(const struct radv_stencil_op_info *info) 953bf215546Sopenharmony_ci{ 954bf215546Sopenharmony_ci /* Compute whether, assuming Z writes are disabled, this stencil state 955bf215546Sopenharmony_ci * is order invariant in the sense that the set of passing fragments as 956bf215546Sopenharmony_ci * well as the final stencil buffer result does not depend on the order 957bf215546Sopenharmony_ci * of fragments. 958bf215546Sopenharmony_ci */ 959bf215546Sopenharmony_ci return !info->write_mask || 960bf215546Sopenharmony_ci /* The following assumes that Z writes are disabled. */ 961bf215546Sopenharmony_ci (info->compare_op == VK_COMPARE_OP_ALWAYS && 962bf215546Sopenharmony_ci radv_order_invariant_stencil_op(info->pass_op) && 963bf215546Sopenharmony_ci radv_order_invariant_stencil_op(info->depth_fail_op)) || 964bf215546Sopenharmony_ci (info->compare_op == VK_COMPARE_OP_NEVER && 965bf215546Sopenharmony_ci radv_order_invariant_stencil_op(info->fail_op)); 966bf215546Sopenharmony_ci} 967bf215546Sopenharmony_ci 968bf215546Sopenharmony_cistatic bool 969bf215546Sopenharmony_ciradv_pipeline_has_dynamic_ds_states(const struct radv_graphics_pipeline *pipeline) 970bf215546Sopenharmony_ci{ 971bf215546Sopenharmony_ci return !!(pipeline->dynamic_states & (RADV_DYNAMIC_DEPTH_TEST_ENABLE | 972bf215546Sopenharmony_ci RADV_DYNAMIC_DEPTH_WRITE_ENABLE | 973bf215546Sopenharmony_ci RADV_DYNAMIC_DEPTH_COMPARE_OP | 974bf215546Sopenharmony_ci RADV_DYNAMIC_STENCIL_TEST_ENABLE | 975bf215546Sopenharmony_ci RADV_DYNAMIC_STENCIL_WRITE_MASK | 976bf215546Sopenharmony_ci RADV_DYNAMIC_STENCIL_OP)); 977bf215546Sopenharmony_ci} 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_cistatic bool 980bf215546Sopenharmony_ciradv_pipeline_out_of_order_rast(struct radv_graphics_pipeline *pipeline, 981bf215546Sopenharmony_ci const struct radv_blend_state *blend, 982bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 983bf215546Sopenharmony_ci{ 984bf215546Sopenharmony_ci unsigned colormask = blend->cb_target_enabled_4bit; 985bf215546Sopenharmony_ci 986bf215546Sopenharmony_ci if (!pipeline->base.device->physical_device->out_of_order_rast_allowed) 987bf215546Sopenharmony_ci return false; 988bf215546Sopenharmony_ci 989bf215546Sopenharmony_ci /* Be conservative if a logic operation is enabled with color buffers. */ 990bf215546Sopenharmony_ci if (colormask && info->cb.logic_op_enable) 991bf215546Sopenharmony_ci return false; 992bf215546Sopenharmony_ci 993bf215546Sopenharmony_ci /* Be conservative if an extended dynamic depth/stencil state is 994bf215546Sopenharmony_ci * enabled because the driver can't update out-of-order rasterization 995bf215546Sopenharmony_ci * dynamically. 996bf215546Sopenharmony_ci */ 997bf215546Sopenharmony_ci if (radv_pipeline_has_dynamic_ds_states(pipeline)) 998bf215546Sopenharmony_ci return false; 999bf215546Sopenharmony_ci 1000bf215546Sopenharmony_ci /* Default depth/stencil invariance when no attachment is bound. */ 1001bf215546Sopenharmony_ci struct radv_dsa_order_invariance dsa_order_invariant = {.zs = true, .pass_set = true}; 1002bf215546Sopenharmony_ci 1003bf215546Sopenharmony_ci bool has_stencil = info->ri.stencil_att_format != VK_FORMAT_UNDEFINED; 1004bf215546Sopenharmony_ci struct radv_dsa_order_invariance order_invariance[2]; 1005bf215546Sopenharmony_ci struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; 1006bf215546Sopenharmony_ci 1007bf215546Sopenharmony_ci /* Compute depth/stencil order invariance in order to know if 1008bf215546Sopenharmony_ci * it's safe to enable out-of-order. 1009bf215546Sopenharmony_ci */ 1010bf215546Sopenharmony_ci bool zfunc_is_ordered = info->ds.depth_compare_op == VK_COMPARE_OP_NEVER || 1011bf215546Sopenharmony_ci info->ds.depth_compare_op == VK_COMPARE_OP_LESS || 1012bf215546Sopenharmony_ci info->ds.depth_compare_op == VK_COMPARE_OP_LESS_OR_EQUAL || 1013bf215546Sopenharmony_ci info->ds.depth_compare_op == VK_COMPARE_OP_GREATER || 1014bf215546Sopenharmony_ci info->ds.depth_compare_op == VK_COMPARE_OP_GREATER_OR_EQUAL; 1015bf215546Sopenharmony_ci bool depth_write_enabled = radv_is_depth_write_enabled(&info->ds); 1016bf215546Sopenharmony_ci bool stencil_write_enabled = radv_is_stencil_write_enabled(&info->ds); 1017bf215546Sopenharmony_ci bool ds_write_enabled = depth_write_enabled || stencil_write_enabled; 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci bool nozwrite_and_order_invariant_stencil = 1020bf215546Sopenharmony_ci !ds_write_enabled || 1021bf215546Sopenharmony_ci (!depth_write_enabled && radv_order_invariant_stencil_state(&info->ds.front) && 1022bf215546Sopenharmony_ci radv_order_invariant_stencil_state(&info->ds.back)); 1023bf215546Sopenharmony_ci 1024bf215546Sopenharmony_ci order_invariance[1].zs = nozwrite_and_order_invariant_stencil || 1025bf215546Sopenharmony_ci (!stencil_write_enabled && zfunc_is_ordered); 1026bf215546Sopenharmony_ci order_invariance[0].zs = !depth_write_enabled || zfunc_is_ordered; 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci order_invariance[1].pass_set = 1029bf215546Sopenharmony_ci nozwrite_and_order_invariant_stencil || 1030bf215546Sopenharmony_ci (!stencil_write_enabled && 1031bf215546Sopenharmony_ci (info->ds.depth_compare_op == VK_COMPARE_OP_ALWAYS || 1032bf215546Sopenharmony_ci info->ds.depth_compare_op == VK_COMPARE_OP_NEVER)); 1033bf215546Sopenharmony_ci order_invariance[0].pass_set = 1034bf215546Sopenharmony_ci !depth_write_enabled || 1035bf215546Sopenharmony_ci (info->ds.depth_compare_op == VK_COMPARE_OP_ALWAYS || 1036bf215546Sopenharmony_ci info->ds.depth_compare_op == VK_COMPARE_OP_NEVER); 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_ci dsa_order_invariant = order_invariance[has_stencil]; 1039bf215546Sopenharmony_ci if (!dsa_order_invariant.zs) 1040bf215546Sopenharmony_ci return false; 1041bf215546Sopenharmony_ci 1042bf215546Sopenharmony_ci /* The set of PS invocations is always order invariant, 1043bf215546Sopenharmony_ci * except when early Z/S tests are requested. 1044bf215546Sopenharmony_ci */ 1045bf215546Sopenharmony_ci if (ps && ps->info.ps.writes_memory && ps->info.ps.early_fragment_test && 1046bf215546Sopenharmony_ci !dsa_order_invariant.pass_set) 1047bf215546Sopenharmony_ci return false; 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_ci /* Determine if out-of-order rasterization should be disabled when occlusion queries are used. */ 1050bf215546Sopenharmony_ci pipeline->disable_out_of_order_rast_for_occlusion = !dsa_order_invariant.pass_set; 1051bf215546Sopenharmony_ci 1052bf215546Sopenharmony_ci /* No color buffers are enabled for writing. */ 1053bf215546Sopenharmony_ci if (!colormask) 1054bf215546Sopenharmony_ci return true; 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_ci unsigned blendmask = colormask & blend->blend_enable_4bit; 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci if (blendmask) { 1059bf215546Sopenharmony_ci /* Only commutative blending. */ 1060bf215546Sopenharmony_ci if (blendmask & ~blend->commutative_4bit) 1061bf215546Sopenharmony_ci return false; 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_ci if (!dsa_order_invariant.pass_set) 1064bf215546Sopenharmony_ci return false; 1065bf215546Sopenharmony_ci } 1066bf215546Sopenharmony_ci 1067bf215546Sopenharmony_ci if (colormask & ~blendmask) 1068bf215546Sopenharmony_ci return false; 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci return true; 1071bf215546Sopenharmony_ci} 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_cistatic void 1074bf215546Sopenharmony_ciradv_pipeline_init_multisample_state(struct radv_graphics_pipeline *pipeline, 1075bf215546Sopenharmony_ci const struct radv_blend_state *blend, 1076bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info, 1077bf215546Sopenharmony_ci unsigned rast_prim) 1078bf215546Sopenharmony_ci{ 1079bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 1080bf215546Sopenharmony_ci struct radv_multisample_state *ms = &pipeline->ms; 1081bf215546Sopenharmony_ci unsigned num_tile_pipes = pdevice->rad_info.num_tile_pipes; 1082bf215546Sopenharmony_ci const VkConservativeRasterizationModeEXT mode = info->rs.conservative_mode; 1083bf215546Sopenharmony_ci bool out_of_order_rast = false; 1084bf215546Sopenharmony_ci int ps_iter_samples = 1; 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci ms->num_samples = info->ms.raster_samples; 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci /* From the Vulkan 1.1.129 spec, 26.7. Sample Shading: 1089bf215546Sopenharmony_ci * 1090bf215546Sopenharmony_ci * "Sample shading is enabled for a graphics pipeline: 1091bf215546Sopenharmony_ci * 1092bf215546Sopenharmony_ci * - If the interface of the fragment shader entry point of the 1093bf215546Sopenharmony_ci * graphics pipeline includes an input variable decorated 1094bf215546Sopenharmony_ci * with SampleId or SamplePosition. In this case 1095bf215546Sopenharmony_ci * minSampleShadingFactor takes the value 1.0. 1096bf215546Sopenharmony_ci * - Else if the sampleShadingEnable member of the 1097bf215546Sopenharmony_ci * VkPipelineMultisampleStateCreateInfo structure specified 1098bf215546Sopenharmony_ci * when creating the graphics pipeline is set to VK_TRUE. In 1099bf215546Sopenharmony_ci * this case minSampleShadingFactor takes the value of 1100bf215546Sopenharmony_ci * VkPipelineMultisampleStateCreateInfo::minSampleShading. 1101bf215546Sopenharmony_ci * 1102bf215546Sopenharmony_ci * Otherwise, sample shading is considered disabled." 1103bf215546Sopenharmony_ci */ 1104bf215546Sopenharmony_ci if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.uses_sample_shading) { 1105bf215546Sopenharmony_ci ps_iter_samples = ms->num_samples; 1106bf215546Sopenharmony_ci } else { 1107bf215546Sopenharmony_ci ps_iter_samples = radv_pipeline_get_ps_iter_samples(info); 1108bf215546Sopenharmony_ci } 1109bf215546Sopenharmony_ci 1110bf215546Sopenharmony_ci if (info->rs.order == VK_RASTERIZATION_ORDER_RELAXED_AMD) { 1111bf215546Sopenharmony_ci /* Out-of-order rasterization is explicitly enabled by the 1112bf215546Sopenharmony_ci * application. 1113bf215546Sopenharmony_ci */ 1114bf215546Sopenharmony_ci out_of_order_rast = true; 1115bf215546Sopenharmony_ci } else { 1116bf215546Sopenharmony_ci /* Determine if the driver can enable out-of-order 1117bf215546Sopenharmony_ci * rasterization internally. 1118bf215546Sopenharmony_ci */ 1119bf215546Sopenharmony_ci out_of_order_rast = radv_pipeline_out_of_order_rast(pipeline, blend, info); 1120bf215546Sopenharmony_ci } 1121bf215546Sopenharmony_ci 1122bf215546Sopenharmony_ci ms->pa_sc_aa_config = 0; 1123bf215546Sopenharmony_ci ms->db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) | 1124bf215546Sopenharmony_ci S_028804_INTERPOLATE_COMP_Z(1) | S_028804_STATIC_ANCHOR_ASSOCIATIONS(1); 1125bf215546Sopenharmony_ci 1126bf215546Sopenharmony_ci /* Adjust MSAA state if conservative rasterization is enabled. */ 1127bf215546Sopenharmony_ci if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { 1128bf215546Sopenharmony_ci ms->pa_sc_aa_config |= S_028BE0_AA_MASK_CENTROID_DTMN(1); 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci ms->db_eqaa |= 1131bf215546Sopenharmony_ci S_028804_ENABLE_POSTZ_OVERRASTERIZATION(1) | S_028804_OVERRASTERIZATION_AMOUNT(4); 1132bf215546Sopenharmony_ci } 1133bf215546Sopenharmony_ci 1134bf215546Sopenharmony_ci ms->pa_sc_mode_cntl_1 = 1135bf215546Sopenharmony_ci S_028A4C_WALK_FENCE_ENABLE(1) | // TODO linear dst fixes 1136bf215546Sopenharmony_ci S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 1137bf215546Sopenharmony_ci S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) | 1138bf215546Sopenharmony_ci S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) | 1139bf215546Sopenharmony_ci /* always 1: */ 1140bf215546Sopenharmony_ci S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 1141bf215546Sopenharmony_ci S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 1142bf215546Sopenharmony_ci S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1); 1143bf215546Sopenharmony_ci ms->pa_sc_mode_cntl_0 = S_028A48_ALTERNATE_RBS_PER_TILE(pdevice->rad_info.gfx_level >= GFX9) | 1144bf215546Sopenharmony_ci S_028A48_VPORT_SCISSOR_ENABLE(1) | 1145bf215546Sopenharmony_ci S_028A48_LINE_STIPPLE_ENABLE(info->rs.stippled_line_enable); 1146bf215546Sopenharmony_ci 1147bf215546Sopenharmony_ci if (info->rs.line_raster_mode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT && 1148bf215546Sopenharmony_ci radv_rast_prim_is_line(rast_prim)) { 1149bf215546Sopenharmony_ci /* From the Vulkan spec 1.3.221: 1150bf215546Sopenharmony_ci * 1151bf215546Sopenharmony_ci * "When Bresenham lines are being rasterized, sample locations may all be treated as being at 1152bf215546Sopenharmony_ci * the pixel center (this may affect attribute and depth interpolation)." 1153bf215546Sopenharmony_ci * 1154bf215546Sopenharmony_ci * "One consequence of this is that Bresenham lines cover the same pixels regardless of the 1155bf215546Sopenharmony_ci * number of rasterization samples, and cover all samples in those pixels (unless masked out 1156bf215546Sopenharmony_ci * or killed)." 1157bf215546Sopenharmony_ci */ 1158bf215546Sopenharmony_ci ms->num_samples = 1; 1159bf215546Sopenharmony_ci } 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_ci if (ms->num_samples > 1) { 1162bf215546Sopenharmony_ci uint32_t z_samples = radv_pipeline_depth_samples(info); 1163bf215546Sopenharmony_ci unsigned log_samples = util_logbase2(ms->num_samples); 1164bf215546Sopenharmony_ci unsigned log_z_samples = util_logbase2(z_samples); 1165bf215546Sopenharmony_ci unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples); 1166bf215546Sopenharmony_ci ms->pa_sc_mode_cntl_0 |= S_028A48_MSAA_ENABLE(1); 1167bf215546Sopenharmony_ci ms->db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | 1168bf215546Sopenharmony_ci S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 1169bf215546Sopenharmony_ci S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 1170bf215546Sopenharmony_ci S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); 1171bf215546Sopenharmony_ci ms->pa_sc_aa_config |= 1172bf215546Sopenharmony_ci S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 1173bf215546Sopenharmony_ci S_028BE0_MAX_SAMPLE_DIST(radv_get_default_max_sample_dist(log_samples)) | 1174bf215546Sopenharmony_ci S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | /* CM_R_028BE0_PA_SC_AA_CONFIG */ 1175bf215546Sopenharmony_ci S_028BE0_COVERED_CENTROID_IS_CENTER(pdevice->rad_info.gfx_level >= GFX10_3); 1176bf215546Sopenharmony_ci ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); 1177bf215546Sopenharmony_ci if (ps_iter_samples > 1) 1178bf215546Sopenharmony_ci pipeline->spi_baryc_cntl |= S_0286E0_POS_FLOAT_LOCATION(2); 1179bf215546Sopenharmony_ci } 1180bf215546Sopenharmony_ci 1181bf215546Sopenharmony_ci ms->pa_sc_aa_mask[0] = info->ms.sample_mask | ((uint32_t)info->ms.sample_mask << 16); 1182bf215546Sopenharmony_ci ms->pa_sc_aa_mask[1] = info->ms.sample_mask | ((uint32_t)info->ms.sample_mask << 16); 1183bf215546Sopenharmony_ci} 1184bf215546Sopenharmony_ci 1185bf215546Sopenharmony_cistatic void 1186bf215546Sopenharmony_cigfx103_pipeline_init_vrs_state(struct radv_graphics_pipeline *pipeline, 1187bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 1188bf215546Sopenharmony_ci{ 1189bf215546Sopenharmony_ci struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; 1190bf215546Sopenharmony_ci struct radv_multisample_state *ms = &pipeline->ms; 1191bf215546Sopenharmony_ci struct radv_vrs_state *vrs = &pipeline->vrs; 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_ci if (info->ms.sample_shading_enable || 1194bf215546Sopenharmony_ci ps->info.ps.uses_sample_shading || ps->info.ps.reads_sample_mask_in) { 1195bf215546Sopenharmony_ci /* Disable VRS and use the rates from PS_ITER_SAMPLES if: 1196bf215546Sopenharmony_ci * 1197bf215546Sopenharmony_ci * 1) sample shading is enabled or per-sample interpolation is 1198bf215546Sopenharmony_ci * used by the fragment shader 1199bf215546Sopenharmony_ci * 2) the fragment shader reads gl_SampleMaskIn because the 1200bf215546Sopenharmony_ci * 16-bit sample coverage mask isn't enough for MSAA8x and 1201bf215546Sopenharmony_ci * 2x2 coarse shading isn't enough. 1202bf215546Sopenharmony_ci */ 1203bf215546Sopenharmony_ci vrs->pa_cl_vrs_cntl = S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE); 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ci /* Make sure sample shading is enabled even if only MSAA1x is 1206bf215546Sopenharmony_ci * used because the SAMPLE_ITER combiner is in passthrough 1207bf215546Sopenharmony_ci * mode if PS_ITER_SAMPLE is 0, and it uses the per-draw rate. 1208bf215546Sopenharmony_ci * The default VRS rate when sample shading is enabled is 1x1. 1209bf215546Sopenharmony_ci */ 1210bf215546Sopenharmony_ci if (!G_028A4C_PS_ITER_SAMPLE(ms->pa_sc_mode_cntl_1)) 1211bf215546Sopenharmony_ci ms->pa_sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(1); 1212bf215546Sopenharmony_ci } else { 1213bf215546Sopenharmony_ci vrs->pa_cl_vrs_cntl = S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU); 1214bf215546Sopenharmony_ci } 1215bf215546Sopenharmony_ci} 1216bf215546Sopenharmony_ci 1217bf215546Sopenharmony_cistatic uint32_t 1218bf215546Sopenharmony_cisi_conv_tess_prim_to_gs_out(enum tess_primitive_mode prim) 1219bf215546Sopenharmony_ci{ 1220bf215546Sopenharmony_ci switch (prim) { 1221bf215546Sopenharmony_ci case TESS_PRIMITIVE_TRIANGLES: 1222bf215546Sopenharmony_ci case TESS_PRIMITIVE_QUADS: 1223bf215546Sopenharmony_ci return V_028A6C_TRISTRIP; 1224bf215546Sopenharmony_ci case TESS_PRIMITIVE_ISOLINES: 1225bf215546Sopenharmony_ci return V_028A6C_LINESTRIP; 1226bf215546Sopenharmony_ci default: 1227bf215546Sopenharmony_ci assert(0); 1228bf215546Sopenharmony_ci return 0; 1229bf215546Sopenharmony_ci } 1230bf215546Sopenharmony_ci} 1231bf215546Sopenharmony_ci 1232bf215546Sopenharmony_cistatic uint32_t 1233bf215546Sopenharmony_cisi_conv_gl_prim_to_gs_out(unsigned gl_prim) 1234bf215546Sopenharmony_ci{ 1235bf215546Sopenharmony_ci switch (gl_prim) { 1236bf215546Sopenharmony_ci case SHADER_PRIM_POINTS: 1237bf215546Sopenharmony_ci return V_028A6C_POINTLIST; 1238bf215546Sopenharmony_ci case SHADER_PRIM_LINES: 1239bf215546Sopenharmony_ci case SHADER_PRIM_LINE_STRIP: 1240bf215546Sopenharmony_ci case SHADER_PRIM_LINES_ADJACENCY: 1241bf215546Sopenharmony_ci return V_028A6C_LINESTRIP; 1242bf215546Sopenharmony_ci 1243bf215546Sopenharmony_ci case SHADER_PRIM_TRIANGLES: 1244bf215546Sopenharmony_ci case SHADER_PRIM_TRIANGLE_STRIP_ADJACENCY: 1245bf215546Sopenharmony_ci case SHADER_PRIM_TRIANGLE_STRIP: 1246bf215546Sopenharmony_ci case SHADER_PRIM_QUADS: 1247bf215546Sopenharmony_ci return V_028A6C_TRISTRIP; 1248bf215546Sopenharmony_ci default: 1249bf215546Sopenharmony_ci assert(0); 1250bf215546Sopenharmony_ci return 0; 1251bf215546Sopenharmony_ci } 1252bf215546Sopenharmony_ci} 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_cistatic uint64_t 1255bf215546Sopenharmony_ciradv_dynamic_state_mask(VkDynamicState state) 1256bf215546Sopenharmony_ci{ 1257bf215546Sopenharmony_ci switch (state) { 1258bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_VIEWPORT: 1259bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT: 1260bf215546Sopenharmony_ci return RADV_DYNAMIC_VIEWPORT; 1261bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_SCISSOR: 1262bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT: 1263bf215546Sopenharmony_ci return RADV_DYNAMIC_SCISSOR; 1264bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_LINE_WIDTH: 1265bf215546Sopenharmony_ci return RADV_DYNAMIC_LINE_WIDTH; 1266bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_BIAS: 1267bf215546Sopenharmony_ci return RADV_DYNAMIC_DEPTH_BIAS; 1268bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_BLEND_CONSTANTS: 1269bf215546Sopenharmony_ci return RADV_DYNAMIC_BLEND_CONSTANTS; 1270bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_BOUNDS: 1271bf215546Sopenharmony_ci return RADV_DYNAMIC_DEPTH_BOUNDS; 1272bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK: 1273bf215546Sopenharmony_ci return RADV_DYNAMIC_STENCIL_COMPARE_MASK; 1274bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK: 1275bf215546Sopenharmony_ci return RADV_DYNAMIC_STENCIL_WRITE_MASK; 1276bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_STENCIL_REFERENCE: 1277bf215546Sopenharmony_ci return RADV_DYNAMIC_STENCIL_REFERENCE; 1278bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DISCARD_RECTANGLE_EXT: 1279bf215546Sopenharmony_ci return RADV_DYNAMIC_DISCARD_RECTANGLE; 1280bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: 1281bf215546Sopenharmony_ci return RADV_DYNAMIC_SAMPLE_LOCATIONS; 1282bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_LINE_STIPPLE_EXT: 1283bf215546Sopenharmony_ci return RADV_DYNAMIC_LINE_STIPPLE; 1284bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_CULL_MODE: 1285bf215546Sopenharmony_ci return RADV_DYNAMIC_CULL_MODE; 1286bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_FRONT_FACE: 1287bf215546Sopenharmony_ci return RADV_DYNAMIC_FRONT_FACE; 1288bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY: 1289bf215546Sopenharmony_ci return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY; 1290bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE: 1291bf215546Sopenharmony_ci return RADV_DYNAMIC_DEPTH_TEST_ENABLE; 1292bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE: 1293bf215546Sopenharmony_ci return RADV_DYNAMIC_DEPTH_WRITE_ENABLE; 1294bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP: 1295bf215546Sopenharmony_ci return RADV_DYNAMIC_DEPTH_COMPARE_OP; 1296bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE: 1297bf215546Sopenharmony_ci return RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE; 1298bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE: 1299bf215546Sopenharmony_ci return RADV_DYNAMIC_STENCIL_TEST_ENABLE; 1300bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_STENCIL_OP: 1301bf215546Sopenharmony_ci return RADV_DYNAMIC_STENCIL_OP; 1302bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE: 1303bf215546Sopenharmony_ci return RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE; 1304bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_FRAGMENT_SHADING_RATE_KHR: 1305bf215546Sopenharmony_ci return RADV_DYNAMIC_FRAGMENT_SHADING_RATE; 1306bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_PATCH_CONTROL_POINTS_EXT: 1307bf215546Sopenharmony_ci return RADV_DYNAMIC_PATCH_CONTROL_POINTS; 1308bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE: 1309bf215546Sopenharmony_ci return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE; 1310bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE: 1311bf215546Sopenharmony_ci return RADV_DYNAMIC_DEPTH_BIAS_ENABLE; 1312bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_LOGIC_OP_EXT: 1313bf215546Sopenharmony_ci return RADV_DYNAMIC_LOGIC_OP; 1314bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE: 1315bf215546Sopenharmony_ci return RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE; 1316bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: 1317bf215546Sopenharmony_ci return RADV_DYNAMIC_COLOR_WRITE_ENABLE; 1318bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_VERTEX_INPUT_EXT: 1319bf215546Sopenharmony_ci return RADV_DYNAMIC_VERTEX_INPUT; 1320bf215546Sopenharmony_ci default: 1321bf215546Sopenharmony_ci unreachable("Unhandled dynamic state"); 1322bf215546Sopenharmony_ci } 1323bf215546Sopenharmony_ci} 1324bf215546Sopenharmony_ci 1325bf215546Sopenharmony_cistatic bool 1326bf215546Sopenharmony_ciradv_pipeline_is_blend_enabled(const struct radv_graphics_pipeline *pipeline, 1327bf215546Sopenharmony_ci const struct radv_color_blend_info *cb_info) 1328bf215546Sopenharmony_ci{ 1329bf215546Sopenharmony_ci for (uint32_t i = 0; i < cb_info->att_count; i++) { 1330bf215546Sopenharmony_ci if (cb_info->att[i].color_write_mask && cb_info->att[i].blend_enable) 1331bf215546Sopenharmony_ci return true; 1332bf215546Sopenharmony_ci } 1333bf215546Sopenharmony_ci 1334bf215546Sopenharmony_ci return false; 1335bf215546Sopenharmony_ci} 1336bf215546Sopenharmony_ci 1337bf215546Sopenharmony_cistatic uint64_t 1338bf215546Sopenharmony_ciradv_pipeline_needed_dynamic_state(const struct radv_graphics_pipeline *pipeline, 1339bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 1340bf215546Sopenharmony_ci{ 1341bf215546Sopenharmony_ci bool has_color_att = radv_pipeline_has_color_attachments(&info->ri); 1342bf215546Sopenharmony_ci bool raster_enabled = !info->rs.discard_enable || 1343bf215546Sopenharmony_ci (pipeline->dynamic_states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE); 1344bf215546Sopenharmony_ci uint64_t states = RADV_DYNAMIC_ALL; 1345bf215546Sopenharmony_ci 1346bf215546Sopenharmony_ci /* Disable dynamic states that are useless to mesh shading. */ 1347bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) { 1348bf215546Sopenharmony_ci if (!raster_enabled) 1349bf215546Sopenharmony_ci return RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE; 1350bf215546Sopenharmony_ci 1351bf215546Sopenharmony_ci states &= ~(RADV_DYNAMIC_VERTEX_INPUT | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | 1352bf215546Sopenharmony_ci RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_PRIMITIVE_TOPOLOGY); 1353bf215546Sopenharmony_ci } 1354bf215546Sopenharmony_ci 1355bf215546Sopenharmony_ci /* If rasterization is disabled we do not care about any of the 1356bf215546Sopenharmony_ci * dynamic states, since they are all rasterization related only, 1357bf215546Sopenharmony_ci * except primitive topology, primitive restart enable, vertex 1358bf215546Sopenharmony_ci * binding stride and rasterization discard itself. 1359bf215546Sopenharmony_ci */ 1360bf215546Sopenharmony_ci if (!raster_enabled) { 1361bf215546Sopenharmony_ci return RADV_DYNAMIC_PRIMITIVE_TOPOLOGY | RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE | 1362bf215546Sopenharmony_ci RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE | RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE | 1363bf215546Sopenharmony_ci RADV_DYNAMIC_VERTEX_INPUT; 1364bf215546Sopenharmony_ci } 1365bf215546Sopenharmony_ci 1366bf215546Sopenharmony_ci if (!info->rs.depth_bias_enable && 1367bf215546Sopenharmony_ci !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE)) 1368bf215546Sopenharmony_ci states &= ~RADV_DYNAMIC_DEPTH_BIAS; 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci if (!info->ds.depth_bounds_test_enable && 1371bf215546Sopenharmony_ci !(pipeline->dynamic_states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE)) 1372bf215546Sopenharmony_ci states &= ~RADV_DYNAMIC_DEPTH_BOUNDS; 1373bf215546Sopenharmony_ci 1374bf215546Sopenharmony_ci if (!info->ds.stencil_test_enable && 1375bf215546Sopenharmony_ci !(pipeline->dynamic_states & RADV_DYNAMIC_STENCIL_TEST_ENABLE)) 1376bf215546Sopenharmony_ci states &= ~(RADV_DYNAMIC_STENCIL_COMPARE_MASK | RADV_DYNAMIC_STENCIL_WRITE_MASK | 1377bf215546Sopenharmony_ci RADV_DYNAMIC_STENCIL_REFERENCE | RADV_DYNAMIC_STENCIL_OP); 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_ci if (!info->dr.count) 1380bf215546Sopenharmony_ci states &= ~RADV_DYNAMIC_DISCARD_RECTANGLE; 1381bf215546Sopenharmony_ci 1382bf215546Sopenharmony_ci if (!info->ms.sample_locs_enable) 1383bf215546Sopenharmony_ci states &= ~RADV_DYNAMIC_SAMPLE_LOCATIONS; 1384bf215546Sopenharmony_ci 1385bf215546Sopenharmony_ci if (!info->rs.stippled_line_enable) 1386bf215546Sopenharmony_ci states &= ~RADV_DYNAMIC_LINE_STIPPLE; 1387bf215546Sopenharmony_ci 1388bf215546Sopenharmony_ci if (!radv_is_vrs_enabled(pipeline, info)) 1389bf215546Sopenharmony_ci states &= ~RADV_DYNAMIC_FRAGMENT_SHADING_RATE; 1390bf215546Sopenharmony_ci 1391bf215546Sopenharmony_ci if (!has_color_att || !radv_pipeline_is_blend_enabled(pipeline, &info->cb)) 1392bf215546Sopenharmony_ci states &= ~RADV_DYNAMIC_BLEND_CONSTANTS; 1393bf215546Sopenharmony_ci 1394bf215546Sopenharmony_ci if (!has_color_att) 1395bf215546Sopenharmony_ci states &= ~RADV_DYNAMIC_COLOR_WRITE_ENABLE; 1396bf215546Sopenharmony_ci 1397bf215546Sopenharmony_ci return states; 1398bf215546Sopenharmony_ci} 1399bf215546Sopenharmony_ci 1400bf215546Sopenharmony_cistatic struct radv_ia_multi_vgt_param_helpers 1401bf215546Sopenharmony_ciradv_compute_ia_multi_vgt_param_helpers(struct radv_graphics_pipeline *pipeline) 1402bf215546Sopenharmony_ci{ 1403bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 1404bf215546Sopenharmony_ci struct radv_ia_multi_vgt_param_helpers ia_multi_vgt_param = {0}; 1405bf215546Sopenharmony_ci 1406bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) 1407bf215546Sopenharmony_ci ia_multi_vgt_param.primgroup_size = 1408bf215546Sopenharmony_ci pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches; 1409bf215546Sopenharmony_ci else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) 1410bf215546Sopenharmony_ci ia_multi_vgt_param.primgroup_size = 64; 1411bf215546Sopenharmony_ci else 1412bf215546Sopenharmony_ci ia_multi_vgt_param.primgroup_size = 128; /* recommended without a GS */ 1413bf215546Sopenharmony_ci 1414bf215546Sopenharmony_ci /* GS requirement. */ 1415bf215546Sopenharmony_ci ia_multi_vgt_param.partial_es_wave = false; 1416bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && pdevice->rad_info.gfx_level <= GFX8) 1417bf215546Sopenharmony_ci if (SI_GS_PER_ES / ia_multi_vgt_param.primgroup_size >= pdevice->gs_table_depth - 3) 1418bf215546Sopenharmony_ci ia_multi_vgt_param.partial_es_wave = true; 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci ia_multi_vgt_param.ia_switch_on_eoi = false; 1421bf215546Sopenharmony_ci if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.prim_id_input) 1422bf215546Sopenharmony_ci ia_multi_vgt_param.ia_switch_on_eoi = true; 1423bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.uses_prim_id) 1424bf215546Sopenharmony_ci ia_multi_vgt_param.ia_switch_on_eoi = true; 1425bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { 1426bf215546Sopenharmony_ci /* SWITCH_ON_EOI must be set if PrimID is used. */ 1427bf215546Sopenharmony_ci if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || 1428bf215546Sopenharmony_ci radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL)->info.uses_prim_id) 1429bf215546Sopenharmony_ci ia_multi_vgt_param.ia_switch_on_eoi = true; 1430bf215546Sopenharmony_ci } 1431bf215546Sopenharmony_ci 1432bf215546Sopenharmony_ci ia_multi_vgt_param.partial_vs_wave = false; 1433bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { 1434bf215546Sopenharmony_ci /* Bug with tessellation and GS on Bonaire and older 2 SE chips. */ 1435bf215546Sopenharmony_ci if ((pdevice->rad_info.family == CHIP_TAHITI || 1436bf215546Sopenharmony_ci pdevice->rad_info.family == CHIP_PITCAIRN || 1437bf215546Sopenharmony_ci pdevice->rad_info.family == CHIP_BONAIRE) && 1438bf215546Sopenharmony_ci radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) 1439bf215546Sopenharmony_ci ia_multi_vgt_param.partial_vs_wave = true; 1440bf215546Sopenharmony_ci /* Needed for 028B6C_DISTRIBUTION_MODE != 0 */ 1441bf215546Sopenharmony_ci if (pdevice->rad_info.has_distributed_tess) { 1442bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 1443bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level <= GFX8) 1444bf215546Sopenharmony_ci ia_multi_vgt_param.partial_es_wave = true; 1445bf215546Sopenharmony_ci } else { 1446bf215546Sopenharmony_ci ia_multi_vgt_param.partial_vs_wave = true; 1447bf215546Sopenharmony_ci } 1448bf215546Sopenharmony_ci } 1449bf215546Sopenharmony_ci } 1450bf215546Sopenharmony_ci 1451bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 1452bf215546Sopenharmony_ci /* On these chips there is the possibility of a hang if the 1453bf215546Sopenharmony_ci * pipeline uses a GS and partial_vs_wave is not set. 1454bf215546Sopenharmony_ci * 1455bf215546Sopenharmony_ci * This mostly does not hit 4-SE chips, as those typically set 1456bf215546Sopenharmony_ci * ia_switch_on_eoi and then partial_vs_wave is set for pipelines 1457bf215546Sopenharmony_ci * with GS due to another workaround. 1458bf215546Sopenharmony_ci * 1459bf215546Sopenharmony_ci * Reproducer: https://bugs.freedesktop.org/show_bug.cgi?id=109242 1460bf215546Sopenharmony_ci */ 1461bf215546Sopenharmony_ci if (pdevice->rad_info.family == CHIP_TONGA || 1462bf215546Sopenharmony_ci pdevice->rad_info.family == CHIP_FIJI || 1463bf215546Sopenharmony_ci pdevice->rad_info.family == CHIP_POLARIS10 || 1464bf215546Sopenharmony_ci pdevice->rad_info.family == CHIP_POLARIS11 || 1465bf215546Sopenharmony_ci pdevice->rad_info.family == CHIP_POLARIS12 || 1466bf215546Sopenharmony_ci pdevice->rad_info.family == CHIP_VEGAM) { 1467bf215546Sopenharmony_ci ia_multi_vgt_param.partial_vs_wave = true; 1468bf215546Sopenharmony_ci } 1469bf215546Sopenharmony_ci } 1470bf215546Sopenharmony_ci 1471bf215546Sopenharmony_ci ia_multi_vgt_param.base = 1472bf215546Sopenharmony_ci S_028AA8_PRIMGROUP_SIZE(ia_multi_vgt_param.primgroup_size - 1) | 1473bf215546Sopenharmony_ci /* The following field was moved to VGT_SHADER_STAGES_EN in GFX9. */ 1474bf215546Sopenharmony_ci S_028AA8_MAX_PRIMGRP_IN_WAVE(pdevice->rad_info.gfx_level == GFX8 ? 2 : 0) | 1475bf215546Sopenharmony_ci S_030960_EN_INST_OPT_BASIC(pdevice->rad_info.gfx_level >= GFX9) | 1476bf215546Sopenharmony_ci S_030960_EN_INST_OPT_ADV(pdevice->rad_info.gfx_level >= GFX9); 1477bf215546Sopenharmony_ci 1478bf215546Sopenharmony_ci return ia_multi_vgt_param; 1479bf215546Sopenharmony_ci} 1480bf215546Sopenharmony_ci 1481bf215546Sopenharmony_cistatic uint32_t 1482bf215546Sopenharmony_ciradv_get_attrib_stride(const VkPipelineVertexInputStateCreateInfo *vi, uint32_t attrib_binding) 1483bf215546Sopenharmony_ci{ 1484bf215546Sopenharmony_ci for (uint32_t i = 0; i < vi->vertexBindingDescriptionCount; i++) { 1485bf215546Sopenharmony_ci const VkVertexInputBindingDescription *input_binding = &vi->pVertexBindingDescriptions[i]; 1486bf215546Sopenharmony_ci 1487bf215546Sopenharmony_ci if (input_binding->binding == attrib_binding) 1488bf215546Sopenharmony_ci return input_binding->stride; 1489bf215546Sopenharmony_ci } 1490bf215546Sopenharmony_ci 1491bf215546Sopenharmony_ci return 0; 1492bf215546Sopenharmony_ci} 1493bf215546Sopenharmony_ci 1494bf215546Sopenharmony_cistatic struct radv_vertex_input_info 1495bf215546Sopenharmony_ciradv_pipeline_init_vertex_input_info(struct radv_graphics_pipeline *pipeline, 1496bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1497bf215546Sopenharmony_ci{ 1498bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 1499bf215546Sopenharmony_ci const VkPipelineVertexInputStateCreateInfo *vi = pCreateInfo->pVertexInputState; 1500bf215546Sopenharmony_ci struct radv_vertex_input_info info = {0}; 1501bf215546Sopenharmony_ci 1502bf215546Sopenharmony_ci if (!(pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT)) { 1503bf215546Sopenharmony_ci /* Vertex input */ 1504bf215546Sopenharmony_ci const VkPipelineVertexInputDivisorStateCreateInfoEXT *divisor_state = 1505bf215546Sopenharmony_ci vk_find_struct_const(vi->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); 1506bf215546Sopenharmony_ci 1507bf215546Sopenharmony_ci uint32_t binding_input_rate = 0; 1508bf215546Sopenharmony_ci uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; 1509bf215546Sopenharmony_ci for (unsigned i = 0; i < vi->vertexBindingDescriptionCount; ++i) { 1510bf215546Sopenharmony_ci const VkVertexInputBindingDescription *desc = &vi->pVertexBindingDescriptions[i]; 1511bf215546Sopenharmony_ci 1512bf215546Sopenharmony_ci if (desc->inputRate) { 1513bf215546Sopenharmony_ci unsigned binding = vi->pVertexBindingDescriptions[i].binding; 1514bf215546Sopenharmony_ci binding_input_rate |= 1u << binding; 1515bf215546Sopenharmony_ci instance_rate_divisors[binding] = 1; 1516bf215546Sopenharmony_ci } 1517bf215546Sopenharmony_ci 1518bf215546Sopenharmony_ci info.binding_stride[desc->binding] = desc->stride; 1519bf215546Sopenharmony_ci } 1520bf215546Sopenharmony_ci 1521bf215546Sopenharmony_ci if (divisor_state) { 1522bf215546Sopenharmony_ci for (unsigned i = 0; i < divisor_state->vertexBindingDivisorCount; ++i) { 1523bf215546Sopenharmony_ci instance_rate_divisors[divisor_state->pVertexBindingDivisors[i].binding] = 1524bf215546Sopenharmony_ci divisor_state->pVertexBindingDivisors[i].divisor; 1525bf215546Sopenharmony_ci } 1526bf215546Sopenharmony_ci } 1527bf215546Sopenharmony_ci 1528bf215546Sopenharmony_ci for (unsigned i = 0; i < vi->vertexAttributeDescriptionCount; ++i) { 1529bf215546Sopenharmony_ci const VkVertexInputAttributeDescription *desc = &vi->pVertexAttributeDescriptions[i]; 1530bf215546Sopenharmony_ci const struct util_format_description *format_desc; 1531bf215546Sopenharmony_ci unsigned location = desc->location; 1532bf215546Sopenharmony_ci unsigned binding = desc->binding; 1533bf215546Sopenharmony_ci unsigned num_format, data_format; 1534bf215546Sopenharmony_ci bool post_shuffle; 1535bf215546Sopenharmony_ci 1536bf215546Sopenharmony_ci if (binding_input_rate & (1u << binding)) { 1537bf215546Sopenharmony_ci info.instance_rate_inputs |= 1u << location; 1538bf215546Sopenharmony_ci info.instance_rate_divisors[location] = instance_rate_divisors[binding]; 1539bf215546Sopenharmony_ci } 1540bf215546Sopenharmony_ci 1541bf215546Sopenharmony_ci format_desc = vk_format_description(desc->format); 1542bf215546Sopenharmony_ci radv_translate_vertex_format(pdevice, desc->format, format_desc, &data_format, &num_format, 1543bf215546Sopenharmony_ci &post_shuffle, &info.vertex_alpha_adjust[location]); 1544bf215546Sopenharmony_ci 1545bf215546Sopenharmony_ci info.vertex_attribute_formats[location] = data_format | (num_format << 4); 1546bf215546Sopenharmony_ci info.vertex_attribute_bindings[location] = desc->binding; 1547bf215546Sopenharmony_ci info.vertex_attribute_offsets[location] = desc->offset; 1548bf215546Sopenharmony_ci 1549bf215546Sopenharmony_ci const struct ac_data_format_info *dfmt_info = ac_get_data_format_info(data_format); 1550bf215546Sopenharmony_ci unsigned attrib_align = 1551bf215546Sopenharmony_ci dfmt_info->chan_byte_size ? dfmt_info->chan_byte_size : dfmt_info->element_size; 1552bf215546Sopenharmony_ci 1553bf215546Sopenharmony_ci /* If desc->offset is misaligned, then the buffer offset must be too. Just 1554bf215546Sopenharmony_ci * skip updating vertex_binding_align in this case. 1555bf215546Sopenharmony_ci */ 1556bf215546Sopenharmony_ci if (desc->offset % attrib_align == 0) 1557bf215546Sopenharmony_ci info.vertex_binding_align[desc->binding] = 1558bf215546Sopenharmony_ci MAX2(info.vertex_binding_align[desc->binding], attrib_align); 1559bf215546Sopenharmony_ci 1560bf215546Sopenharmony_ci if (!(pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE)) { 1561bf215546Sopenharmony_ci /* From the Vulkan spec 1.2.157: 1562bf215546Sopenharmony_ci * 1563bf215546Sopenharmony_ci * "If the bound pipeline state object was created 1564bf215546Sopenharmony_ci * with the 1565bf215546Sopenharmony_ci * VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE 1566bf215546Sopenharmony_ci * dynamic state enabled then pStrides[i] specifies 1567bf215546Sopenharmony_ci * the distance in bytes between two consecutive 1568bf215546Sopenharmony_ci * elements within the corresponding buffer. In this 1569bf215546Sopenharmony_ci * case the VkVertexInputBindingDescription::stride 1570bf215546Sopenharmony_ci * state from the pipeline state object is ignored." 1571bf215546Sopenharmony_ci * 1572bf215546Sopenharmony_ci * Make sure the vertex attribute stride is zero to 1573bf215546Sopenharmony_ci * avoid computing a wrong offset if it's initialized 1574bf215546Sopenharmony_ci * to something else than zero. 1575bf215546Sopenharmony_ci */ 1576bf215546Sopenharmony_ci info.vertex_attribute_strides[location] = radv_get_attrib_stride(vi, desc->binding); 1577bf215546Sopenharmony_ci } 1578bf215546Sopenharmony_ci 1579bf215546Sopenharmony_ci if (post_shuffle) 1580bf215546Sopenharmony_ci info.vertex_post_shuffle |= 1 << location; 1581bf215546Sopenharmony_ci 1582bf215546Sopenharmony_ci uint32_t end = desc->offset + vk_format_get_blocksize(desc->format); 1583bf215546Sopenharmony_ci info.attrib_ends[desc->location] = end; 1584bf215546Sopenharmony_ci if (info.binding_stride[desc->binding]) 1585bf215546Sopenharmony_ci info.attrib_index_offset[desc->location] = 1586bf215546Sopenharmony_ci desc->offset / info.binding_stride[desc->binding]; 1587bf215546Sopenharmony_ci info.attrib_bindings[desc->location] = desc->binding; 1588bf215546Sopenharmony_ci } 1589bf215546Sopenharmony_ci } 1590bf215546Sopenharmony_ci 1591bf215546Sopenharmony_ci return info; 1592bf215546Sopenharmony_ci} 1593bf215546Sopenharmony_ci 1594bf215546Sopenharmony_cistatic struct radv_input_assembly_info 1595bf215546Sopenharmony_ciradv_pipeline_init_input_assembly_info(struct radv_graphics_pipeline *pipeline, 1596bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1597bf215546Sopenharmony_ci{ 1598bf215546Sopenharmony_ci const VkPipelineInputAssemblyStateCreateInfo *ia = pCreateInfo->pInputAssemblyState; 1599bf215546Sopenharmony_ci struct radv_input_assembly_info info = {0}; 1600bf215546Sopenharmony_ci 1601bf215546Sopenharmony_ci info.primitive_topology = si_translate_prim(ia->topology); 1602bf215546Sopenharmony_ci info.primitive_restart_enable = !!ia->primitiveRestartEnable; 1603bf215546Sopenharmony_ci 1604bf215546Sopenharmony_ci return info; 1605bf215546Sopenharmony_ci} 1606bf215546Sopenharmony_ci 1607bf215546Sopenharmony_cistatic struct radv_tessellation_info 1608bf215546Sopenharmony_ciradv_pipeline_init_tessellation_info(struct radv_graphics_pipeline *pipeline, 1609bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1610bf215546Sopenharmony_ci{ 1611bf215546Sopenharmony_ci const VkPipelineTessellationStateCreateInfo *ts = pCreateInfo->pTessellationState; 1612bf215546Sopenharmony_ci const VkShaderStageFlagBits tess_stages = VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | 1613bf215546Sopenharmony_ci VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; 1614bf215546Sopenharmony_ci struct radv_tessellation_info info = {0}; 1615bf215546Sopenharmony_ci 1616bf215546Sopenharmony_ci if ((pipeline->active_stages & tess_stages) == tess_stages) { 1617bf215546Sopenharmony_ci info.patch_control_points = ts->patchControlPoints; 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_ci const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state = 1620bf215546Sopenharmony_ci vk_find_struct_const(ts->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO); 1621bf215546Sopenharmony_ci if (domain_origin_state) { 1622bf215546Sopenharmony_ci info.domain_origin = domain_origin_state->domainOrigin; 1623bf215546Sopenharmony_ci } 1624bf215546Sopenharmony_ci } 1625bf215546Sopenharmony_ci 1626bf215546Sopenharmony_ci return info; 1627bf215546Sopenharmony_ci} 1628bf215546Sopenharmony_ci 1629bf215546Sopenharmony_cistatic struct radv_viewport_info 1630bf215546Sopenharmony_ciradv_pipeline_init_viewport_info(struct radv_graphics_pipeline *pipeline, 1631bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1632bf215546Sopenharmony_ci{ 1633bf215546Sopenharmony_ci const VkPipelineViewportStateCreateInfo *vp = pCreateInfo->pViewportState; 1634bf215546Sopenharmony_ci struct radv_viewport_info info = {0}; 1635bf215546Sopenharmony_ci 1636bf215546Sopenharmony_ci if (radv_is_raster_enabled(pipeline, pCreateInfo)) { 1637bf215546Sopenharmony_ci if (!(pipeline->dynamic_states & RADV_DYNAMIC_VIEWPORT)) { 1638bf215546Sopenharmony_ci typed_memcpy(info.viewports, vp->pViewports, vp->viewportCount); 1639bf215546Sopenharmony_ci } 1640bf215546Sopenharmony_ci info.viewport_count = vp->viewportCount; 1641bf215546Sopenharmony_ci 1642bf215546Sopenharmony_ci if (!(pipeline->dynamic_states & RADV_DYNAMIC_SCISSOR)) { 1643bf215546Sopenharmony_ci typed_memcpy(info.scissors, vp->pScissors, vp->scissorCount); 1644bf215546Sopenharmony_ci } 1645bf215546Sopenharmony_ci info.scissor_count = vp->scissorCount; 1646bf215546Sopenharmony_ci 1647bf215546Sopenharmony_ci const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_control = 1648bf215546Sopenharmony_ci vk_find_struct_const(vp->pNext, PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT); 1649bf215546Sopenharmony_ci if (depth_clip_control) { 1650bf215546Sopenharmony_ci info.negative_one_to_one = !!depth_clip_control->negativeOneToOne; 1651bf215546Sopenharmony_ci } 1652bf215546Sopenharmony_ci } 1653bf215546Sopenharmony_ci 1654bf215546Sopenharmony_ci return info; 1655bf215546Sopenharmony_ci} 1656bf215546Sopenharmony_ci 1657bf215546Sopenharmony_cistatic struct radv_rasterization_info 1658bf215546Sopenharmony_ciradv_pipeline_init_rasterization_info(struct radv_graphics_pipeline *pipeline, 1659bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1660bf215546Sopenharmony_ci{ 1661bf215546Sopenharmony_ci const VkPipelineRasterizationStateCreateInfo *rs = pCreateInfo->pRasterizationState; 1662bf215546Sopenharmony_ci struct radv_rasterization_info info = {0}; 1663bf215546Sopenharmony_ci 1664bf215546Sopenharmony_ci info.discard_enable = rs->rasterizerDiscardEnable; 1665bf215546Sopenharmony_ci info.front_face = rs->frontFace; 1666bf215546Sopenharmony_ci info.cull_mode = rs->cullMode; 1667bf215546Sopenharmony_ci info.polygon_mode = si_translate_fill(rs->polygonMode); 1668bf215546Sopenharmony_ci info.depth_bias_enable = rs->depthBiasEnable; 1669bf215546Sopenharmony_ci info.depth_clamp_enable = rs->depthClampEnable; 1670bf215546Sopenharmony_ci info.line_width = rs->lineWidth; 1671bf215546Sopenharmony_ci info.depth_bias_constant_factor = rs->depthBiasConstantFactor; 1672bf215546Sopenharmony_ci info.depth_bias_clamp = rs->depthBiasClamp; 1673bf215546Sopenharmony_ci info.depth_bias_slope_factor = rs->depthBiasSlopeFactor; 1674bf215546Sopenharmony_ci info.depth_clip_disable = rs->depthClampEnable; 1675bf215546Sopenharmony_ci 1676bf215546Sopenharmony_ci const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_info = 1677bf215546Sopenharmony_ci vk_find_struct_const(rs->pNext, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT); 1678bf215546Sopenharmony_ci if (provoking_vtx_info && 1679bf215546Sopenharmony_ci provoking_vtx_info->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT) { 1680bf215546Sopenharmony_ci info.provoking_vtx_last = true; 1681bf215546Sopenharmony_ci } 1682bf215546Sopenharmony_ci 1683bf215546Sopenharmony_ci const VkPipelineRasterizationConservativeStateCreateInfoEXT *conservative_raster = 1684bf215546Sopenharmony_ci vk_find_struct_const(rs->pNext, PIPELINE_RASTERIZATION_CONSERVATIVE_STATE_CREATE_INFO_EXT); 1685bf215546Sopenharmony_ci if (conservative_raster) { 1686bf215546Sopenharmony_ci info.conservative_mode = conservative_raster->conservativeRasterizationMode; 1687bf215546Sopenharmony_ci } 1688bf215546Sopenharmony_ci 1689bf215546Sopenharmony_ci const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_info = 1690bf215546Sopenharmony_ci vk_find_struct_const(rs->pNext, PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); 1691bf215546Sopenharmony_ci if (rast_line_info) { 1692bf215546Sopenharmony_ci info.stippled_line_enable = rast_line_info->stippledLineEnable; 1693bf215546Sopenharmony_ci info.line_raster_mode = rast_line_info->lineRasterizationMode; 1694bf215546Sopenharmony_ci info.line_stipple_factor = rast_line_info->lineStippleFactor; 1695bf215546Sopenharmony_ci info.line_stipple_pattern = rast_line_info->lineStipplePattern; 1696bf215546Sopenharmony_ci } 1697bf215546Sopenharmony_ci 1698bf215546Sopenharmony_ci const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state = 1699bf215546Sopenharmony_ci vk_find_struct_const(rs->pNext, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT); 1700bf215546Sopenharmony_ci if (depth_clip_state) { 1701bf215546Sopenharmony_ci info.depth_clip_disable = !depth_clip_state->depthClipEnable; 1702bf215546Sopenharmony_ci } 1703bf215546Sopenharmony_ci 1704bf215546Sopenharmony_ci const VkPipelineRasterizationStateRasterizationOrderAMD *raster_order = 1705bf215546Sopenharmony_ci vk_find_struct_const(rs->pNext, PIPELINE_RASTERIZATION_STATE_RASTERIZATION_ORDER_AMD); 1706bf215546Sopenharmony_ci if (raster_order) { 1707bf215546Sopenharmony_ci info.order = raster_order->rasterizationOrder; 1708bf215546Sopenharmony_ci } 1709bf215546Sopenharmony_ci 1710bf215546Sopenharmony_ci return info; 1711bf215546Sopenharmony_ci} 1712bf215546Sopenharmony_ci 1713bf215546Sopenharmony_cistatic struct radv_discard_rectangle_info 1714bf215546Sopenharmony_ciradv_pipeline_init_discard_rectangle_info(struct radv_graphics_pipeline *pipeline, 1715bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1716bf215546Sopenharmony_ci{ 1717bf215546Sopenharmony_ci const VkPipelineDiscardRectangleStateCreateInfoEXT *discard_rectangle_info = 1718bf215546Sopenharmony_ci vk_find_struct_const(pCreateInfo->pNext, PIPELINE_DISCARD_RECTANGLE_STATE_CREATE_INFO_EXT); 1719bf215546Sopenharmony_ci struct radv_discard_rectangle_info info = {0}; 1720bf215546Sopenharmony_ci 1721bf215546Sopenharmony_ci if (discard_rectangle_info) { 1722bf215546Sopenharmony_ci info.mode = discard_rectangle_info->discardRectangleMode; 1723bf215546Sopenharmony_ci if (!(pipeline->dynamic_states & RADV_DYNAMIC_DISCARD_RECTANGLE)) { 1724bf215546Sopenharmony_ci typed_memcpy(info.rects, discard_rectangle_info->pDiscardRectangles, 1725bf215546Sopenharmony_ci discard_rectangle_info->discardRectangleCount); 1726bf215546Sopenharmony_ci } 1727bf215546Sopenharmony_ci info.count = discard_rectangle_info->discardRectangleCount; 1728bf215546Sopenharmony_ci } 1729bf215546Sopenharmony_ci 1730bf215546Sopenharmony_ci return info; 1731bf215546Sopenharmony_ci} 1732bf215546Sopenharmony_ci 1733bf215546Sopenharmony_cistatic struct radv_multisample_info 1734bf215546Sopenharmony_ciradv_pipeline_init_multisample_info(struct radv_graphics_pipeline *pipeline, 1735bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1736bf215546Sopenharmony_ci{ 1737bf215546Sopenharmony_ci const VkPipelineMultisampleStateCreateInfo *ms = pCreateInfo->pMultisampleState; 1738bf215546Sopenharmony_ci struct radv_multisample_info info = {0}; 1739bf215546Sopenharmony_ci 1740bf215546Sopenharmony_ci if (radv_is_raster_enabled(pipeline, pCreateInfo)) { 1741bf215546Sopenharmony_ci info.raster_samples = ms->rasterizationSamples; 1742bf215546Sopenharmony_ci info.sample_shading_enable = ms->sampleShadingEnable; 1743bf215546Sopenharmony_ci info.min_sample_shading = ms->minSampleShading; 1744bf215546Sopenharmony_ci info.alpha_to_coverage_enable = ms->alphaToCoverageEnable; 1745bf215546Sopenharmony_ci if (ms->pSampleMask) { 1746bf215546Sopenharmony_ci info.sample_mask = ms->pSampleMask[0] & 0xffff; 1747bf215546Sopenharmony_ci } else { 1748bf215546Sopenharmony_ci info.sample_mask = 0xffff; 1749bf215546Sopenharmony_ci } 1750bf215546Sopenharmony_ci 1751bf215546Sopenharmony_ci const VkPipelineSampleLocationsStateCreateInfoEXT *sample_location_info = 1752bf215546Sopenharmony_ci vk_find_struct_const(ms->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); 1753bf215546Sopenharmony_ci if (sample_location_info) { 1754bf215546Sopenharmony_ci /* If sampleLocationsEnable is VK_FALSE, the default sample locations are used and the 1755bf215546Sopenharmony_ci * values specified in sampleLocationsInfo are ignored. 1756bf215546Sopenharmony_ci */ 1757bf215546Sopenharmony_ci info.sample_locs_enable = sample_location_info->sampleLocationsEnable; 1758bf215546Sopenharmony_ci if (sample_location_info->sampleLocationsEnable) { 1759bf215546Sopenharmony_ci const VkSampleLocationsInfoEXT *pSampleLocationsInfo = 1760bf215546Sopenharmony_ci &sample_location_info->sampleLocationsInfo; 1761bf215546Sopenharmony_ci assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS); 1762bf215546Sopenharmony_ci 1763bf215546Sopenharmony_ci info.sample_locs_per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel; 1764bf215546Sopenharmony_ci info.sample_locs_grid_size = pSampleLocationsInfo->sampleLocationGridSize; 1765bf215546Sopenharmony_ci for (uint32_t i = 0; i < pSampleLocationsInfo->sampleLocationsCount; i++) { 1766bf215546Sopenharmony_ci info.sample_locs[i] = pSampleLocationsInfo->pSampleLocations[i]; 1767bf215546Sopenharmony_ci } 1768bf215546Sopenharmony_ci info.sample_locs_count = pSampleLocationsInfo->sampleLocationsCount; 1769bf215546Sopenharmony_ci } 1770bf215546Sopenharmony_ci } 1771bf215546Sopenharmony_ci } else { 1772bf215546Sopenharmony_ci info.raster_samples = VK_SAMPLE_COUNT_1_BIT; 1773bf215546Sopenharmony_ci } 1774bf215546Sopenharmony_ci 1775bf215546Sopenharmony_ci return info; 1776bf215546Sopenharmony_ci} 1777bf215546Sopenharmony_ci 1778bf215546Sopenharmony_cistatic struct radv_depth_stencil_info 1779bf215546Sopenharmony_ciradv_pipeline_init_depth_stencil_info(struct radv_graphics_pipeline *pipeline, 1780bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1781bf215546Sopenharmony_ci{ 1782bf215546Sopenharmony_ci const VkPipelineDepthStencilStateCreateInfo *ds = pCreateInfo->pDepthStencilState; 1783bf215546Sopenharmony_ci const VkPipelineRenderingCreateInfo *ri = 1784bf215546Sopenharmony_ci vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RENDERING_CREATE_INFO); 1785bf215546Sopenharmony_ci struct radv_depth_stencil_info info = {0}; 1786bf215546Sopenharmony_ci 1787bf215546Sopenharmony_ci if (radv_is_raster_enabled(pipeline, pCreateInfo) && 1788bf215546Sopenharmony_ci (ri->depthAttachmentFormat != VK_FORMAT_UNDEFINED || 1789bf215546Sopenharmony_ci ri->stencilAttachmentFormat != VK_FORMAT_UNDEFINED)) { 1790bf215546Sopenharmony_ci info.depth_bounds_test_enable = ds->depthBoundsTestEnable; 1791bf215546Sopenharmony_ci info.depth_bounds.min = ds->minDepthBounds; 1792bf215546Sopenharmony_ci info.depth_bounds.max = ds->maxDepthBounds; 1793bf215546Sopenharmony_ci info.stencil_test_enable = ds->stencilTestEnable; 1794bf215546Sopenharmony_ci info.front.fail_op = ds->front.failOp; 1795bf215546Sopenharmony_ci info.front.pass_op = ds->front.passOp; 1796bf215546Sopenharmony_ci info.front.depth_fail_op = ds->front.depthFailOp; 1797bf215546Sopenharmony_ci info.front.compare_op = ds->front.compareOp; 1798bf215546Sopenharmony_ci info.front.compare_mask = ds->front.compareMask; 1799bf215546Sopenharmony_ci info.front.write_mask = ds->front.writeMask; 1800bf215546Sopenharmony_ci info.front.reference = ds->front.reference; 1801bf215546Sopenharmony_ci info.back.fail_op = ds->back.failOp; 1802bf215546Sopenharmony_ci info.back.pass_op = ds->back.passOp; 1803bf215546Sopenharmony_ci info.back.depth_fail_op = ds->back.depthFailOp; 1804bf215546Sopenharmony_ci info.back.compare_op = ds->back.compareOp; 1805bf215546Sopenharmony_ci info.back.compare_mask = ds->back.compareMask; 1806bf215546Sopenharmony_ci info.back.write_mask = ds->back.writeMask; 1807bf215546Sopenharmony_ci info.back.reference = ds->back.reference; 1808bf215546Sopenharmony_ci info.depth_test_enable = ds->depthTestEnable; 1809bf215546Sopenharmony_ci info.depth_write_enable = ds->depthWriteEnable; 1810bf215546Sopenharmony_ci info.depth_compare_op = ds->depthCompareOp; 1811bf215546Sopenharmony_ci } 1812bf215546Sopenharmony_ci 1813bf215546Sopenharmony_ci return info; 1814bf215546Sopenharmony_ci} 1815bf215546Sopenharmony_ci 1816bf215546Sopenharmony_cistatic struct radv_rendering_info 1817bf215546Sopenharmony_ciradv_pipeline_init_rendering_info(struct radv_graphics_pipeline *pipeline, 1818bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1819bf215546Sopenharmony_ci{ 1820bf215546Sopenharmony_ci const VkPipelineRenderingCreateInfo *ri = 1821bf215546Sopenharmony_ci vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RENDERING_CREATE_INFO); 1822bf215546Sopenharmony_ci struct radv_rendering_info info = {0}; 1823bf215546Sopenharmony_ci 1824bf215546Sopenharmony_ci info.view_mask = ri->viewMask; 1825bf215546Sopenharmony_ci for (uint32_t i = 0; i < ri->colorAttachmentCount; i++) { 1826bf215546Sopenharmony_ci info.color_att_formats[i] = ri->pColorAttachmentFormats[i]; 1827bf215546Sopenharmony_ci } 1828bf215546Sopenharmony_ci info.color_att_count = ri->colorAttachmentCount; 1829bf215546Sopenharmony_ci info.depth_att_format = ri->depthAttachmentFormat; 1830bf215546Sopenharmony_ci info.stencil_att_format = ri->stencilAttachmentFormat; 1831bf215546Sopenharmony_ci 1832bf215546Sopenharmony_ci return info; 1833bf215546Sopenharmony_ci} 1834bf215546Sopenharmony_ci 1835bf215546Sopenharmony_cistatic struct radv_color_blend_info 1836bf215546Sopenharmony_ciradv_pipeline_init_color_blend_info(struct radv_graphics_pipeline *pipeline, 1837bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1838bf215546Sopenharmony_ci{ 1839bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 1840bf215546Sopenharmony_ci const VkPipelineColorBlendStateCreateInfo *cb = pCreateInfo->pColorBlendState; 1841bf215546Sopenharmony_ci const VkPipelineRenderingCreateInfo *ri = 1842bf215546Sopenharmony_ci vk_find_struct_const(pCreateInfo->pNext, PIPELINE_RENDERING_CREATE_INFO); 1843bf215546Sopenharmony_ci struct radv_color_blend_info info = {0}; 1844bf215546Sopenharmony_ci bool has_color_att = false; 1845bf215546Sopenharmony_ci 1846bf215546Sopenharmony_ci for (uint32_t i = 0; i < ri->colorAttachmentCount; ++i) { 1847bf215546Sopenharmony_ci if (ri->pColorAttachmentFormats[i] != VK_FORMAT_UNDEFINED) { 1848bf215546Sopenharmony_ci has_color_att = true; 1849bf215546Sopenharmony_ci break; 1850bf215546Sopenharmony_ci } 1851bf215546Sopenharmony_ci } 1852bf215546Sopenharmony_ci 1853bf215546Sopenharmony_ci if (radv_is_raster_enabled(pipeline, pCreateInfo) && has_color_att) { 1854bf215546Sopenharmony_ci for (uint32_t i = 0; i < cb->attachmentCount; i++) { 1855bf215546Sopenharmony_ci const VkPipelineColorBlendAttachmentState *att = &cb->pAttachments[i]; 1856bf215546Sopenharmony_ci 1857bf215546Sopenharmony_ci info.att[i].color_write_mask = att->colorWriteMask; 1858bf215546Sopenharmony_ci info.att[i].blend_enable = att->blendEnable; 1859bf215546Sopenharmony_ci info.att[i].color_blend_op = si_translate_blend_function(att->colorBlendOp); 1860bf215546Sopenharmony_ci info.att[i].alpha_blend_op = si_translate_blend_function(att->alphaBlendOp); 1861bf215546Sopenharmony_ci info.att[i].src_color_blend_factor = 1862bf215546Sopenharmony_ci si_translate_blend_factor(pdevice->rad_info.gfx_level, att->srcColorBlendFactor); 1863bf215546Sopenharmony_ci info.att[i].dst_color_blend_factor = 1864bf215546Sopenharmony_ci si_translate_blend_factor(pdevice->rad_info.gfx_level, att->dstColorBlendFactor); 1865bf215546Sopenharmony_ci info.att[i].src_alpha_blend_factor = 1866bf215546Sopenharmony_ci si_translate_blend_factor(pdevice->rad_info.gfx_level, att->srcAlphaBlendFactor); 1867bf215546Sopenharmony_ci info.att[i].dst_alpha_blend_factor = 1868bf215546Sopenharmony_ci si_translate_blend_factor(pdevice->rad_info.gfx_level, att->dstAlphaBlendFactor); 1869bf215546Sopenharmony_ci } 1870bf215546Sopenharmony_ci info.att_count = cb->attachmentCount; 1871bf215546Sopenharmony_ci 1872bf215546Sopenharmony_ci for (uint32_t i = 0; i < 4; i++) { 1873bf215546Sopenharmony_ci info.blend_constants[i] = cb->blendConstants[i]; 1874bf215546Sopenharmony_ci } 1875bf215546Sopenharmony_ci 1876bf215546Sopenharmony_ci info.logic_op_enable = cb->logicOpEnable; 1877bf215546Sopenharmony_ci if (info.logic_op_enable) 1878bf215546Sopenharmony_ci info.logic_op = si_translate_blend_logic_op(cb->logicOp); 1879bf215546Sopenharmony_ci 1880bf215546Sopenharmony_ci const VkPipelineColorWriteCreateInfoEXT *color_write_info = 1881bf215546Sopenharmony_ci vk_find_struct_const(cb->pNext, PIPELINE_COLOR_WRITE_CREATE_INFO_EXT); 1882bf215546Sopenharmony_ci if (color_write_info) { 1883bf215546Sopenharmony_ci for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) { 1884bf215546Sopenharmony_ci info.color_write_enable |= 1885bf215546Sopenharmony_ci color_write_info->pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0; 1886bf215546Sopenharmony_ci } 1887bf215546Sopenharmony_ci } else { 1888bf215546Sopenharmony_ci info.color_write_enable = 0xffffffffu; 1889bf215546Sopenharmony_ci } 1890bf215546Sopenharmony_ci } 1891bf215546Sopenharmony_ci 1892bf215546Sopenharmony_ci return info; 1893bf215546Sopenharmony_ci} 1894bf215546Sopenharmony_ci 1895bf215546Sopenharmony_cistatic struct radv_fragment_shading_rate_info 1896bf215546Sopenharmony_ciradv_pipeline_init_fragment_shading_rate_info(struct radv_graphics_pipeline *pipeline, 1897bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1898bf215546Sopenharmony_ci{ 1899bf215546Sopenharmony_ci const VkPipelineFragmentShadingRateStateCreateInfoKHR *shading_rate = 1900bf215546Sopenharmony_ci vk_find_struct_const(pCreateInfo->pNext, PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR); 1901bf215546Sopenharmony_ci struct radv_fragment_shading_rate_info info = {0}; 1902bf215546Sopenharmony_ci 1903bf215546Sopenharmony_ci if (shading_rate && !(pipeline->dynamic_states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE)) { 1904bf215546Sopenharmony_ci info.size = shading_rate->fragmentSize; 1905bf215546Sopenharmony_ci for (int i = 0; i < 2; i++) 1906bf215546Sopenharmony_ci info.combiner_ops[i] = shading_rate->combinerOps[i]; 1907bf215546Sopenharmony_ci } else { 1908bf215546Sopenharmony_ci info.size = (VkExtent2D){ 1, 1 }; 1909bf215546Sopenharmony_ci info.combiner_ops[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; 1910bf215546Sopenharmony_ci info.combiner_ops[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; 1911bf215546Sopenharmony_ci } 1912bf215546Sopenharmony_ci 1913bf215546Sopenharmony_ci return info; 1914bf215546Sopenharmony_ci} 1915bf215546Sopenharmony_ci 1916bf215546Sopenharmony_cistatic struct radv_graphics_pipeline_info 1917bf215546Sopenharmony_ciradv_pipeline_init_graphics_info(struct radv_graphics_pipeline *pipeline, 1918bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo) 1919bf215546Sopenharmony_ci{ 1920bf215546Sopenharmony_ci struct radv_graphics_pipeline_info info = {0}; 1921bf215546Sopenharmony_ci 1922bf215546Sopenharmony_ci /* Vertex input interface structs have to be ignored if the pipeline includes a mesh shader. */ 1923bf215546Sopenharmony_ci if (!(pipeline->active_stages & VK_SHADER_STAGE_MESH_BIT_NV)) { 1924bf215546Sopenharmony_ci info.vi = radv_pipeline_init_vertex_input_info(pipeline, pCreateInfo); 1925bf215546Sopenharmony_ci info.ia = radv_pipeline_init_input_assembly_info(pipeline, pCreateInfo); 1926bf215546Sopenharmony_ci } 1927bf215546Sopenharmony_ci 1928bf215546Sopenharmony_ci info.ts = radv_pipeline_init_tessellation_info(pipeline, pCreateInfo); 1929bf215546Sopenharmony_ci info.vp = radv_pipeline_init_viewport_info(pipeline, pCreateInfo); 1930bf215546Sopenharmony_ci info.rs = radv_pipeline_init_rasterization_info(pipeline, pCreateInfo); 1931bf215546Sopenharmony_ci info.dr = radv_pipeline_init_discard_rectangle_info(pipeline, pCreateInfo); 1932bf215546Sopenharmony_ci 1933bf215546Sopenharmony_ci info.ms = radv_pipeline_init_multisample_info(pipeline, pCreateInfo); 1934bf215546Sopenharmony_ci info.ds = radv_pipeline_init_depth_stencil_info(pipeline, pCreateInfo); 1935bf215546Sopenharmony_ci info.ri = radv_pipeline_init_rendering_info(pipeline, pCreateInfo); 1936bf215546Sopenharmony_ci info.cb = radv_pipeline_init_color_blend_info(pipeline, pCreateInfo); 1937bf215546Sopenharmony_ci 1938bf215546Sopenharmony_ci info.fsr = radv_pipeline_init_fragment_shading_rate_info(pipeline, pCreateInfo); 1939bf215546Sopenharmony_ci 1940bf215546Sopenharmony_ci /* VK_AMD_mixed_attachment_samples */ 1941bf215546Sopenharmony_ci const VkAttachmentSampleCountInfoAMD *sample_info = 1942bf215546Sopenharmony_ci vk_find_struct_const(pCreateInfo->pNext, ATTACHMENT_SAMPLE_COUNT_INFO_AMD); 1943bf215546Sopenharmony_ci if (sample_info) { 1944bf215546Sopenharmony_ci for (uint32_t i = 0; i < sample_info->colorAttachmentCount; ++i) { 1945bf215546Sopenharmony_ci if (info.ri.color_att_formats[i] != VK_FORMAT_UNDEFINED) { 1946bf215546Sopenharmony_ci info.color_att_samples = MAX2(info.color_att_samples, sample_info->pColorAttachmentSamples[i]); 1947bf215546Sopenharmony_ci } 1948bf215546Sopenharmony_ci } 1949bf215546Sopenharmony_ci info.ds_att_samples = sample_info->depthStencilAttachmentSamples; 1950bf215546Sopenharmony_ci } 1951bf215546Sopenharmony_ci 1952bf215546Sopenharmony_ci return info; 1953bf215546Sopenharmony_ci} 1954bf215546Sopenharmony_ci 1955bf215546Sopenharmony_cistatic void 1956bf215546Sopenharmony_ciradv_pipeline_init_input_assembly_state(struct radv_graphics_pipeline *pipeline, 1957bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 1958bf215546Sopenharmony_ci{ 1959bf215546Sopenharmony_ci pipeline->ia_multi_vgt_param = radv_compute_ia_multi_vgt_param_helpers(pipeline); 1960bf215546Sopenharmony_ci} 1961bf215546Sopenharmony_ci 1962bf215546Sopenharmony_cistatic void 1963bf215546Sopenharmony_ciradv_pipeline_init_dynamic_state(struct radv_graphics_pipeline *pipeline, 1964bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 1965bf215546Sopenharmony_ci{ 1966bf215546Sopenharmony_ci uint64_t needed_states = radv_pipeline_needed_dynamic_state(pipeline, info); 1967bf215546Sopenharmony_ci uint64_t states = needed_states; 1968bf215546Sopenharmony_ci 1969bf215546Sopenharmony_ci pipeline->dynamic_state = default_dynamic_state; 1970bf215546Sopenharmony_ci pipeline->needed_dynamic_state = needed_states; 1971bf215546Sopenharmony_ci 1972bf215546Sopenharmony_ci states &= ~pipeline->dynamic_states; 1973bf215546Sopenharmony_ci 1974bf215546Sopenharmony_ci struct radv_dynamic_state *dynamic = &pipeline->dynamic_state; 1975bf215546Sopenharmony_ci 1976bf215546Sopenharmony_ci if (needed_states & RADV_DYNAMIC_VIEWPORT) { 1977bf215546Sopenharmony_ci dynamic->viewport.count = info->vp.viewport_count; 1978bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_VIEWPORT) { 1979bf215546Sopenharmony_ci typed_memcpy(dynamic->viewport.viewports, info->vp.viewports, info->vp.viewport_count); 1980bf215546Sopenharmony_ci for (unsigned i = 0; i < dynamic->viewport.count; i++) 1981bf215546Sopenharmony_ci radv_get_viewport_xform(&dynamic->viewport.viewports[i], 1982bf215546Sopenharmony_ci dynamic->viewport.xform[i].scale, dynamic->viewport.xform[i].translate); 1983bf215546Sopenharmony_ci } 1984bf215546Sopenharmony_ci } 1985bf215546Sopenharmony_ci 1986bf215546Sopenharmony_ci if (needed_states & RADV_DYNAMIC_SCISSOR) { 1987bf215546Sopenharmony_ci dynamic->scissor.count = info->vp.scissor_count; 1988bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_SCISSOR) { 1989bf215546Sopenharmony_ci typed_memcpy(dynamic->scissor.scissors, info->vp.scissors, info->vp.scissor_count); 1990bf215546Sopenharmony_ci } 1991bf215546Sopenharmony_ci } 1992bf215546Sopenharmony_ci 1993bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_LINE_WIDTH) { 1994bf215546Sopenharmony_ci dynamic->line_width = info->rs.line_width; 1995bf215546Sopenharmony_ci } 1996bf215546Sopenharmony_ci 1997bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_DEPTH_BIAS) { 1998bf215546Sopenharmony_ci dynamic->depth_bias.bias = info->rs.depth_bias_constant_factor; 1999bf215546Sopenharmony_ci dynamic->depth_bias.clamp = info->rs.depth_bias_clamp; 2000bf215546Sopenharmony_ci dynamic->depth_bias.slope = info->rs.depth_bias_slope_factor; 2001bf215546Sopenharmony_ci } 2002bf215546Sopenharmony_ci 2003bf215546Sopenharmony_ci /* Section 9.2 of the Vulkan 1.0.15 spec says: 2004bf215546Sopenharmony_ci * 2005bf215546Sopenharmony_ci * pColorBlendState is [...] NULL if the pipeline has rasterization 2006bf215546Sopenharmony_ci * disabled or if the subpass of the render pass the pipeline is 2007bf215546Sopenharmony_ci * created against does not use any color attachments. 2008bf215546Sopenharmony_ci */ 2009bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_BLEND_CONSTANTS) { 2010bf215546Sopenharmony_ci typed_memcpy(dynamic->blend_constants, info->cb.blend_constants, 4); 2011bf215546Sopenharmony_ci } 2012bf215546Sopenharmony_ci 2013bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_CULL_MODE) { 2014bf215546Sopenharmony_ci dynamic->cull_mode = info->rs.cull_mode; 2015bf215546Sopenharmony_ci } 2016bf215546Sopenharmony_ci 2017bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_FRONT_FACE) { 2018bf215546Sopenharmony_ci dynamic->front_face = info->rs.front_face; 2019bf215546Sopenharmony_ci } 2020bf215546Sopenharmony_ci 2021bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) { 2022bf215546Sopenharmony_ci dynamic->primitive_topology = info->ia.primitive_topology; 2023bf215546Sopenharmony_ci } 2024bf215546Sopenharmony_ci 2025bf215546Sopenharmony_ci /* If there is no depthstencil attachment, then don't read 2026bf215546Sopenharmony_ci * pDepthStencilState. The Vulkan spec states that pDepthStencilState may 2027bf215546Sopenharmony_ci * be NULL in this case. Even if pDepthStencilState is non-NULL, there is 2028bf215546Sopenharmony_ci * no need to override the depthstencil defaults in 2029bf215546Sopenharmony_ci * radv_pipeline::dynamic_state when there is no depthstencil attachment. 2030bf215546Sopenharmony_ci * 2031bf215546Sopenharmony_ci * Section 9.2 of the Vulkan 1.0.15 spec says: 2032bf215546Sopenharmony_ci * 2033bf215546Sopenharmony_ci * pDepthStencilState is [...] NULL if the pipeline has rasterization 2034bf215546Sopenharmony_ci * disabled or if the subpass of the render pass the pipeline is created 2035bf215546Sopenharmony_ci * against does not use a depth/stencil attachment. 2036bf215546Sopenharmony_ci */ 2037bf215546Sopenharmony_ci if (needed_states && radv_pipeline_has_ds_attachments(&info->ri)) { 2038bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_DEPTH_BOUNDS) { 2039bf215546Sopenharmony_ci dynamic->depth_bounds.min = info->ds.depth_bounds.min; 2040bf215546Sopenharmony_ci dynamic->depth_bounds.max = info->ds.depth_bounds.max; 2041bf215546Sopenharmony_ci } 2042bf215546Sopenharmony_ci 2043bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_STENCIL_COMPARE_MASK) { 2044bf215546Sopenharmony_ci dynamic->stencil_compare_mask.front = info->ds.front.compare_mask; 2045bf215546Sopenharmony_ci dynamic->stencil_compare_mask.back = info->ds.back.compare_mask; 2046bf215546Sopenharmony_ci } 2047bf215546Sopenharmony_ci 2048bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_STENCIL_WRITE_MASK) { 2049bf215546Sopenharmony_ci dynamic->stencil_write_mask.front = info->ds.front.write_mask; 2050bf215546Sopenharmony_ci dynamic->stencil_write_mask.back = info->ds.back.write_mask; 2051bf215546Sopenharmony_ci } 2052bf215546Sopenharmony_ci 2053bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_STENCIL_REFERENCE) { 2054bf215546Sopenharmony_ci dynamic->stencil_reference.front = info->ds.front.reference; 2055bf215546Sopenharmony_ci dynamic->stencil_reference.back = info->ds.back.reference; 2056bf215546Sopenharmony_ci } 2057bf215546Sopenharmony_ci 2058bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_DEPTH_TEST_ENABLE) { 2059bf215546Sopenharmony_ci dynamic->depth_test_enable = info->ds.depth_test_enable; 2060bf215546Sopenharmony_ci } 2061bf215546Sopenharmony_ci 2062bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) { 2063bf215546Sopenharmony_ci dynamic->depth_write_enable = info->ds.depth_write_enable; 2064bf215546Sopenharmony_ci } 2065bf215546Sopenharmony_ci 2066bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_DEPTH_COMPARE_OP) { 2067bf215546Sopenharmony_ci dynamic->depth_compare_op = info->ds.depth_compare_op; 2068bf215546Sopenharmony_ci } 2069bf215546Sopenharmony_ci 2070bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) { 2071bf215546Sopenharmony_ci dynamic->depth_bounds_test_enable = info->ds.depth_bounds_test_enable; 2072bf215546Sopenharmony_ci } 2073bf215546Sopenharmony_ci 2074bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_STENCIL_TEST_ENABLE) { 2075bf215546Sopenharmony_ci dynamic->stencil_test_enable = info->ds.stencil_test_enable; 2076bf215546Sopenharmony_ci } 2077bf215546Sopenharmony_ci 2078bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_STENCIL_OP) { 2079bf215546Sopenharmony_ci dynamic->stencil_op.front.compare_op = info->ds.front.compare_op; 2080bf215546Sopenharmony_ci dynamic->stencil_op.front.fail_op = info->ds.front.fail_op; 2081bf215546Sopenharmony_ci dynamic->stencil_op.front.pass_op = info->ds.front.pass_op; 2082bf215546Sopenharmony_ci dynamic->stencil_op.front.depth_fail_op = info->ds.front.depth_fail_op; 2083bf215546Sopenharmony_ci 2084bf215546Sopenharmony_ci dynamic->stencil_op.back.compare_op = info->ds.back.compare_op; 2085bf215546Sopenharmony_ci dynamic->stencil_op.back.fail_op = info->ds.back.fail_op; 2086bf215546Sopenharmony_ci dynamic->stencil_op.back.pass_op = info->ds.back.pass_op; 2087bf215546Sopenharmony_ci dynamic->stencil_op.back.depth_fail_op = info->ds.back.depth_fail_op; 2088bf215546Sopenharmony_ci } 2089bf215546Sopenharmony_ci } 2090bf215546Sopenharmony_ci 2091bf215546Sopenharmony_ci if (needed_states & RADV_DYNAMIC_DISCARD_RECTANGLE) { 2092bf215546Sopenharmony_ci dynamic->discard_rectangle.count = info->dr.count; 2093bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_DISCARD_RECTANGLE) { 2094bf215546Sopenharmony_ci typed_memcpy(dynamic->discard_rectangle.rectangles, info->dr.rects, info->dr.count); 2095bf215546Sopenharmony_ci } 2096bf215546Sopenharmony_ci } 2097bf215546Sopenharmony_ci 2098bf215546Sopenharmony_ci if (needed_states & RADV_DYNAMIC_SAMPLE_LOCATIONS) { 2099bf215546Sopenharmony_ci if (info->ms.sample_locs_enable) { 2100bf215546Sopenharmony_ci dynamic->sample_location.per_pixel = info->ms.sample_locs_per_pixel; 2101bf215546Sopenharmony_ci dynamic->sample_location.grid_size = info->ms.sample_locs_grid_size; 2102bf215546Sopenharmony_ci dynamic->sample_location.count = info->ms.sample_locs_count; 2103bf215546Sopenharmony_ci typed_memcpy(&dynamic->sample_location.locations[0], info->ms.sample_locs, 2104bf215546Sopenharmony_ci info->ms.sample_locs_count); 2105bf215546Sopenharmony_ci } 2106bf215546Sopenharmony_ci } 2107bf215546Sopenharmony_ci 2108bf215546Sopenharmony_ci if (needed_states & RADV_DYNAMIC_LINE_STIPPLE) { 2109bf215546Sopenharmony_ci dynamic->line_stipple.factor = info->rs.line_stipple_factor; 2110bf215546Sopenharmony_ci dynamic->line_stipple.pattern = info->rs.line_stipple_pattern; 2111bf215546Sopenharmony_ci } 2112bf215546Sopenharmony_ci 2113bf215546Sopenharmony_ci if (!(states & RADV_DYNAMIC_VERTEX_INPUT_BINDING_STRIDE) || 2114bf215546Sopenharmony_ci !(states & RADV_DYNAMIC_VERTEX_INPUT)) 2115bf215546Sopenharmony_ci pipeline->uses_dynamic_stride = true; 2116bf215546Sopenharmony_ci 2117bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) { 2118bf215546Sopenharmony_ci dynamic->fragment_shading_rate.size = info->fsr.size; 2119bf215546Sopenharmony_ci for (int i = 0; i < 2; i++) 2120bf215546Sopenharmony_ci dynamic->fragment_shading_rate.combiner_ops[i] = info->fsr.combiner_ops[i]; 2121bf215546Sopenharmony_ci } 2122bf215546Sopenharmony_ci 2123bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_DEPTH_BIAS_ENABLE) { 2124bf215546Sopenharmony_ci dynamic->depth_bias_enable = info->rs.depth_bias_enable; 2125bf215546Sopenharmony_ci } 2126bf215546Sopenharmony_ci 2127bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE) { 2128bf215546Sopenharmony_ci dynamic->primitive_restart_enable = info->ia.primitive_restart_enable; 2129bf215546Sopenharmony_ci } 2130bf215546Sopenharmony_ci 2131bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE) { 2132bf215546Sopenharmony_ci dynamic->rasterizer_discard_enable = info->rs.discard_enable; 2133bf215546Sopenharmony_ci } 2134bf215546Sopenharmony_ci 2135bf215546Sopenharmony_ci if (radv_pipeline_has_color_attachments(&info->ri) && states & RADV_DYNAMIC_LOGIC_OP) { 2136bf215546Sopenharmony_ci if (info->cb.logic_op_enable) { 2137bf215546Sopenharmony_ci dynamic->logic_op = info->cb.logic_op; 2138bf215546Sopenharmony_ci } else { 2139bf215546Sopenharmony_ci dynamic->logic_op = V_028808_ROP3_COPY; 2140bf215546Sopenharmony_ci } 2141bf215546Sopenharmony_ci } 2142bf215546Sopenharmony_ci 2143bf215546Sopenharmony_ci if (states & RADV_DYNAMIC_COLOR_WRITE_ENABLE) { 2144bf215546Sopenharmony_ci dynamic->color_write_enable = info->cb.color_write_enable; 2145bf215546Sopenharmony_ci } 2146bf215546Sopenharmony_ci 2147bf215546Sopenharmony_ci pipeline->dynamic_state.mask = states; 2148bf215546Sopenharmony_ci} 2149bf215546Sopenharmony_ci 2150bf215546Sopenharmony_cistatic void 2151bf215546Sopenharmony_ciradv_pipeline_init_raster_state(struct radv_graphics_pipeline *pipeline, 2152bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 2153bf215546Sopenharmony_ci{ 2154bf215546Sopenharmony_ci const struct radv_device *device = pipeline->base.device; 2155bf215546Sopenharmony_ci 2156bf215546Sopenharmony_ci pipeline->pa_su_sc_mode_cntl = 2157bf215546Sopenharmony_ci S_028814_FACE(info->rs.front_face) | 2158bf215546Sopenharmony_ci S_028814_CULL_FRONT(!!(info->rs.cull_mode & VK_CULL_MODE_FRONT_BIT)) | 2159bf215546Sopenharmony_ci S_028814_CULL_BACK(!!(info->rs.cull_mode & VK_CULL_MODE_BACK_BIT)) | 2160bf215546Sopenharmony_ci S_028814_POLY_MODE(info->rs.polygon_mode != V_028814_X_DRAW_TRIANGLES) | 2161bf215546Sopenharmony_ci S_028814_POLYMODE_FRONT_PTYPE(info->rs.polygon_mode) | 2162bf215546Sopenharmony_ci S_028814_POLYMODE_BACK_PTYPE(info->rs.polygon_mode) | 2163bf215546Sopenharmony_ci S_028814_POLY_OFFSET_FRONT_ENABLE(info->rs.depth_bias_enable) | 2164bf215546Sopenharmony_ci S_028814_POLY_OFFSET_BACK_ENABLE(info->rs.depth_bias_enable) | 2165bf215546Sopenharmony_ci S_028814_POLY_OFFSET_PARA_ENABLE(info->rs.depth_bias_enable) | 2166bf215546Sopenharmony_ci S_028814_PROVOKING_VTX_LAST(info->rs.provoking_vtx_last); 2167bf215546Sopenharmony_ci 2168bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX10) { 2169bf215546Sopenharmony_ci /* It should also be set if PERPENDICULAR_ENDCAP_ENA is set. */ 2170bf215546Sopenharmony_ci pipeline->pa_su_sc_mode_cntl |= 2171bf215546Sopenharmony_ci S_028814_KEEP_TOGETHER_ENABLE(info->rs.polygon_mode != V_028814_X_DRAW_TRIANGLES); 2172bf215546Sopenharmony_ci } 2173bf215546Sopenharmony_ci 2174bf215546Sopenharmony_ci pipeline->pa_cl_clip_cntl = 2175bf215546Sopenharmony_ci S_028810_DX_CLIP_SPACE_DEF(!pipeline->negative_one_to_one) | 2176bf215546Sopenharmony_ci S_028810_ZCLIP_NEAR_DISABLE(info->rs.depth_clip_disable) | 2177bf215546Sopenharmony_ci S_028810_ZCLIP_FAR_DISABLE(info->rs.depth_clip_disable) | 2178bf215546Sopenharmony_ci S_028810_DX_RASTERIZATION_KILL(info->rs.discard_enable) | 2179bf215546Sopenharmony_ci S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 2180bf215546Sopenharmony_ci 2181bf215546Sopenharmony_ci pipeline->uses_conservative_overestimate = 2182bf215546Sopenharmony_ci info->rs.conservative_mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT; 2183bf215546Sopenharmony_ci 2184bf215546Sopenharmony_ci pipeline->depth_clamp_mode = RADV_DEPTH_CLAMP_MODE_VIEWPORT; 2185bf215546Sopenharmony_ci if (!info->rs.depth_clamp_enable) { 2186bf215546Sopenharmony_ci /* For optimal performance, depth clamping should always be enabled except if the 2187bf215546Sopenharmony_ci * application disables clamping explicitly or uses depth values outside of the [0.0, 1.0] 2188bf215546Sopenharmony_ci * range. 2189bf215546Sopenharmony_ci */ 2190bf215546Sopenharmony_ci if (info->rs.depth_clip_disable || 2191bf215546Sopenharmony_ci device->vk.enabled_extensions.EXT_depth_range_unrestricted) { 2192bf215546Sopenharmony_ci pipeline->depth_clamp_mode = RADV_DEPTH_CLAMP_MODE_DISABLED; 2193bf215546Sopenharmony_ci } else { 2194bf215546Sopenharmony_ci pipeline->depth_clamp_mode = RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE; 2195bf215546Sopenharmony_ci } 2196bf215546Sopenharmony_ci } 2197bf215546Sopenharmony_ci} 2198bf215546Sopenharmony_ci 2199bf215546Sopenharmony_cistatic struct radv_depth_stencil_state 2200bf215546Sopenharmony_ciradv_pipeline_init_depth_stencil_state(struct radv_graphics_pipeline *pipeline, 2201bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 2202bf215546Sopenharmony_ci{ 2203bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 2204bf215546Sopenharmony_ci struct radv_depth_stencil_state ds_state = {0}; 2205bf215546Sopenharmony_ci uint32_t db_depth_control = 0; 2206bf215546Sopenharmony_ci 2207bf215546Sopenharmony_ci bool has_depth_attachment = info->ri.depth_att_format != VK_FORMAT_UNDEFINED; 2208bf215546Sopenharmony_ci bool has_stencil_attachment = info->ri.stencil_att_format != VK_FORMAT_UNDEFINED; 2209bf215546Sopenharmony_ci 2210bf215546Sopenharmony_ci if (has_depth_attachment) { 2211bf215546Sopenharmony_ci /* from amdvlk: For 4xAA and 8xAA need to decompress on flush for better performance */ 2212bf215546Sopenharmony_ci ds_state.db_render_override2 |= S_028010_DECOMPRESS_Z_ON_FLUSH(info->ms.raster_samples > 2); 2213bf215546Sopenharmony_ci 2214bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10_3) 2215bf215546Sopenharmony_ci ds_state.db_render_override2 |= S_028010_CENTROID_COMPUTATION_MODE(1); 2216bf215546Sopenharmony_ci 2217bf215546Sopenharmony_ci db_depth_control = S_028800_Z_ENABLE(info->ds.depth_test_enable) | 2218bf215546Sopenharmony_ci S_028800_Z_WRITE_ENABLE(info->ds.depth_write_enable) | 2219bf215546Sopenharmony_ci S_028800_ZFUNC(info->ds.depth_compare_op) | 2220bf215546Sopenharmony_ci S_028800_DEPTH_BOUNDS_ENABLE(info->ds.depth_bounds_test_enable); 2221bf215546Sopenharmony_ci } 2222bf215546Sopenharmony_ci 2223bf215546Sopenharmony_ci if (has_stencil_attachment && info->ds.stencil_test_enable) { 2224bf215546Sopenharmony_ci db_depth_control |= S_028800_STENCIL_ENABLE(1) | S_028800_BACKFACE_ENABLE(1); 2225bf215546Sopenharmony_ci db_depth_control |= S_028800_STENCILFUNC(info->ds.front.compare_op); 2226bf215546Sopenharmony_ci db_depth_control |= S_028800_STENCILFUNC_BF(info->ds.back.compare_op); 2227bf215546Sopenharmony_ci } 2228bf215546Sopenharmony_ci 2229bf215546Sopenharmony_ci ds_state.db_render_override |= S_02800C_FORCE_HIS_ENABLE0(V_02800C_FORCE_DISABLE) | 2230bf215546Sopenharmony_ci S_02800C_FORCE_HIS_ENABLE1(V_02800C_FORCE_DISABLE); 2231bf215546Sopenharmony_ci 2232bf215546Sopenharmony_ci if (pipeline->depth_clamp_mode == RADV_DEPTH_CLAMP_MODE_DISABLED) 2233bf215546Sopenharmony_ci ds_state.db_render_override |= S_02800C_DISABLE_VIEWPORT_CLAMP(1); 2234bf215546Sopenharmony_ci 2235bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX11) { 2236bf215546Sopenharmony_ci unsigned max_allowed_tiles_in_wave = 0; 2237bf215546Sopenharmony_ci unsigned num_samples = MAX2(radv_pipeline_color_samples(info), 2238bf215546Sopenharmony_ci radv_pipeline_depth_samples(info)); 2239bf215546Sopenharmony_ci 2240bf215546Sopenharmony_ci if (pdevice->rad_info.has_dedicated_vram) { 2241bf215546Sopenharmony_ci if (num_samples == 8) 2242bf215546Sopenharmony_ci max_allowed_tiles_in_wave = 7; 2243bf215546Sopenharmony_ci else if (num_samples == 4) 2244bf215546Sopenharmony_ci max_allowed_tiles_in_wave = 14; 2245bf215546Sopenharmony_ci } else { 2246bf215546Sopenharmony_ci if (num_samples == 8) 2247bf215546Sopenharmony_ci max_allowed_tiles_in_wave = 8; 2248bf215546Sopenharmony_ci } 2249bf215546Sopenharmony_ci 2250bf215546Sopenharmony_ci /* TODO: We may want to disable this workaround for future chips. */ 2251bf215546Sopenharmony_ci if (num_samples >= 4) { 2252bf215546Sopenharmony_ci if (max_allowed_tiles_in_wave) 2253bf215546Sopenharmony_ci max_allowed_tiles_in_wave--; 2254bf215546Sopenharmony_ci else 2255bf215546Sopenharmony_ci max_allowed_tiles_in_wave = 15; 2256bf215546Sopenharmony_ci } 2257bf215546Sopenharmony_ci 2258bf215546Sopenharmony_ci ds_state.db_render_control |= S_028000_OREO_MODE(V_028000_OMODE_O_THEN_B) | 2259bf215546Sopenharmony_ci S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave); 2260bf215546Sopenharmony_ci } 2261bf215546Sopenharmony_ci 2262bf215546Sopenharmony_ci pipeline->db_depth_control = db_depth_control; 2263bf215546Sopenharmony_ci 2264bf215546Sopenharmony_ci return ds_state; 2265bf215546Sopenharmony_ci} 2266bf215546Sopenharmony_ci 2267bf215546Sopenharmony_cistatic void 2268bf215546Sopenharmony_cigfx9_get_gs_info(const struct radv_pipeline_key *key, const struct radv_pipeline *pipeline, 2269bf215546Sopenharmony_ci struct radv_pipeline_stage *stages, struct gfx9_gs_info *out) 2270bf215546Sopenharmony_ci{ 2271bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->device->physical_device; 2272bf215546Sopenharmony_ci struct radv_shader_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info; 2273bf215546Sopenharmony_ci struct radv_es_output_info *es_info; 2274bf215546Sopenharmony_ci bool has_tess = !!stages[MESA_SHADER_TESS_CTRL].nir; 2275bf215546Sopenharmony_ci 2276bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX9) 2277bf215546Sopenharmony_ci es_info = has_tess ? &gs_info->tes.es_info : &gs_info->vs.es_info; 2278bf215546Sopenharmony_ci else 2279bf215546Sopenharmony_ci es_info = has_tess ? &stages[MESA_SHADER_TESS_EVAL].info.tes.es_info 2280bf215546Sopenharmony_ci : &stages[MESA_SHADER_VERTEX].info.vs.es_info; 2281bf215546Sopenharmony_ci 2282bf215546Sopenharmony_ci unsigned gs_num_invocations = MAX2(gs_info->gs.invocations, 1); 2283bf215546Sopenharmony_ci bool uses_adjacency; 2284bf215546Sopenharmony_ci switch (key->vs.topology) { 2285bf215546Sopenharmony_ci case V_008958_DI_PT_LINELIST_ADJ: 2286bf215546Sopenharmony_ci case V_008958_DI_PT_LINESTRIP_ADJ: 2287bf215546Sopenharmony_ci case V_008958_DI_PT_TRILIST_ADJ: 2288bf215546Sopenharmony_ci case V_008958_DI_PT_TRISTRIP_ADJ: 2289bf215546Sopenharmony_ci uses_adjacency = true; 2290bf215546Sopenharmony_ci break; 2291bf215546Sopenharmony_ci default: 2292bf215546Sopenharmony_ci uses_adjacency = false; 2293bf215546Sopenharmony_ci break; 2294bf215546Sopenharmony_ci } 2295bf215546Sopenharmony_ci 2296bf215546Sopenharmony_ci /* All these are in dwords: */ 2297bf215546Sopenharmony_ci /* We can't allow using the whole LDS, because GS waves compete with 2298bf215546Sopenharmony_ci * other shader stages for LDS space. */ 2299bf215546Sopenharmony_ci const unsigned max_lds_size = 8 * 1024; 2300bf215546Sopenharmony_ci const unsigned esgs_itemsize = es_info->esgs_itemsize / 4; 2301bf215546Sopenharmony_ci unsigned esgs_lds_size; 2302bf215546Sopenharmony_ci 2303bf215546Sopenharmony_ci /* All these are per subgroup: */ 2304bf215546Sopenharmony_ci const unsigned max_out_prims = 32 * 1024; 2305bf215546Sopenharmony_ci const unsigned max_es_verts = 255; 2306bf215546Sopenharmony_ci const unsigned ideal_gs_prims = 64; 2307bf215546Sopenharmony_ci unsigned max_gs_prims, gs_prims; 2308bf215546Sopenharmony_ci unsigned min_es_verts, es_verts, worst_case_es_verts; 2309bf215546Sopenharmony_ci 2310bf215546Sopenharmony_ci if (uses_adjacency || gs_num_invocations > 1) 2311bf215546Sopenharmony_ci max_gs_prims = 127 / gs_num_invocations; 2312bf215546Sopenharmony_ci else 2313bf215546Sopenharmony_ci max_gs_prims = 255; 2314bf215546Sopenharmony_ci 2315bf215546Sopenharmony_ci /* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations. 2316bf215546Sopenharmony_ci * Make sure we don't go over the maximum value. 2317bf215546Sopenharmony_ci */ 2318bf215546Sopenharmony_ci if (gs_info->gs.vertices_out > 0) { 2319bf215546Sopenharmony_ci max_gs_prims = 2320bf215546Sopenharmony_ci MIN2(max_gs_prims, max_out_prims / (gs_info->gs.vertices_out * gs_num_invocations)); 2321bf215546Sopenharmony_ci } 2322bf215546Sopenharmony_ci assert(max_gs_prims > 0); 2323bf215546Sopenharmony_ci 2324bf215546Sopenharmony_ci /* If the primitive has adjacency, halve the number of vertices 2325bf215546Sopenharmony_ci * that will be reused in multiple primitives. 2326bf215546Sopenharmony_ci */ 2327bf215546Sopenharmony_ci min_es_verts = gs_info->gs.vertices_in / (uses_adjacency ? 2 : 1); 2328bf215546Sopenharmony_ci 2329bf215546Sopenharmony_ci gs_prims = MIN2(ideal_gs_prims, max_gs_prims); 2330bf215546Sopenharmony_ci worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts); 2331bf215546Sopenharmony_ci 2332bf215546Sopenharmony_ci /* Compute ESGS LDS size based on the worst case number of ES vertices 2333bf215546Sopenharmony_ci * needed to create the target number of GS prims per subgroup. 2334bf215546Sopenharmony_ci */ 2335bf215546Sopenharmony_ci esgs_lds_size = esgs_itemsize * worst_case_es_verts; 2336bf215546Sopenharmony_ci 2337bf215546Sopenharmony_ci /* If total LDS usage is too big, refactor partitions based on ratio 2338bf215546Sopenharmony_ci * of ESGS item sizes. 2339bf215546Sopenharmony_ci */ 2340bf215546Sopenharmony_ci if (esgs_lds_size > max_lds_size) { 2341bf215546Sopenharmony_ci /* Our target GS Prims Per Subgroup was too large. Calculate 2342bf215546Sopenharmony_ci * the maximum number of GS Prims Per Subgroup that will fit 2343bf215546Sopenharmony_ci * into LDS, capped by the maximum that the hardware can support. 2344bf215546Sopenharmony_ci */ 2345bf215546Sopenharmony_ci gs_prims = MIN2((max_lds_size / (esgs_itemsize * min_es_verts)), max_gs_prims); 2346bf215546Sopenharmony_ci assert(gs_prims > 0); 2347bf215546Sopenharmony_ci worst_case_es_verts = MIN2(min_es_verts * gs_prims, max_es_verts); 2348bf215546Sopenharmony_ci 2349bf215546Sopenharmony_ci esgs_lds_size = esgs_itemsize * worst_case_es_verts; 2350bf215546Sopenharmony_ci assert(esgs_lds_size <= max_lds_size); 2351bf215546Sopenharmony_ci } 2352bf215546Sopenharmony_ci 2353bf215546Sopenharmony_ci /* Now calculate remaining ESGS information. */ 2354bf215546Sopenharmony_ci if (esgs_lds_size) 2355bf215546Sopenharmony_ci es_verts = MIN2(esgs_lds_size / esgs_itemsize, max_es_verts); 2356bf215546Sopenharmony_ci else 2357bf215546Sopenharmony_ci es_verts = max_es_verts; 2358bf215546Sopenharmony_ci 2359bf215546Sopenharmony_ci /* Vertices for adjacency primitives are not always reused, so restore 2360bf215546Sopenharmony_ci * it for ES_VERTS_PER_SUBGRP. 2361bf215546Sopenharmony_ci */ 2362bf215546Sopenharmony_ci min_es_verts = gs_info->gs.vertices_in; 2363bf215546Sopenharmony_ci 2364bf215546Sopenharmony_ci /* For normal primitives, the VGT only checks if they are past the ES 2365bf215546Sopenharmony_ci * verts per subgroup after allocating a full GS primitive and if they 2366bf215546Sopenharmony_ci * are, kick off a new subgroup. But if those additional ES verts are 2367bf215546Sopenharmony_ci * unique (e.g. not reused) we need to make sure there is enough LDS 2368bf215546Sopenharmony_ci * space to account for those ES verts beyond ES_VERTS_PER_SUBGRP. 2369bf215546Sopenharmony_ci */ 2370bf215546Sopenharmony_ci es_verts -= min_es_verts - 1; 2371bf215546Sopenharmony_ci 2372bf215546Sopenharmony_ci uint32_t es_verts_per_subgroup = es_verts; 2373bf215546Sopenharmony_ci uint32_t gs_prims_per_subgroup = gs_prims; 2374bf215546Sopenharmony_ci uint32_t gs_inst_prims_in_subgroup = gs_prims * gs_num_invocations; 2375bf215546Sopenharmony_ci uint32_t max_prims_per_subgroup = gs_inst_prims_in_subgroup * gs_info->gs.vertices_out; 2376bf215546Sopenharmony_ci out->lds_size = align(esgs_lds_size, 128) / 128; 2377bf215546Sopenharmony_ci out->vgt_gs_onchip_cntl = S_028A44_ES_VERTS_PER_SUBGRP(es_verts_per_subgroup) | 2378bf215546Sopenharmony_ci S_028A44_GS_PRIMS_PER_SUBGRP(gs_prims_per_subgroup) | 2379bf215546Sopenharmony_ci S_028A44_GS_INST_PRIMS_IN_SUBGRP(gs_inst_prims_in_subgroup); 2380bf215546Sopenharmony_ci out->vgt_gs_max_prims_per_subgroup = S_028A94_MAX_PRIMS_PER_SUBGROUP(max_prims_per_subgroup); 2381bf215546Sopenharmony_ci out->vgt_esgs_ring_itemsize = esgs_itemsize; 2382bf215546Sopenharmony_ci assert(max_prims_per_subgroup <= max_out_prims); 2383bf215546Sopenharmony_ci 2384bf215546Sopenharmony_ci gl_shader_stage es_stage = has_tess ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 2385bf215546Sopenharmony_ci unsigned workgroup_size = ac_compute_esgs_workgroup_size( 2386bf215546Sopenharmony_ci pdevice->rad_info.gfx_level, stages[es_stage].info.wave_size, 2387bf215546Sopenharmony_ci es_verts_per_subgroup, gs_inst_prims_in_subgroup); 2388bf215546Sopenharmony_ci stages[es_stage].info.workgroup_size = workgroup_size; 2389bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.workgroup_size = workgroup_size; 2390bf215546Sopenharmony_ci} 2391bf215546Sopenharmony_ci 2392bf215546Sopenharmony_cistatic void 2393bf215546Sopenharmony_ciclamp_gsprims_to_esverts(unsigned *max_gsprims, unsigned max_esverts, unsigned min_verts_per_prim, 2394bf215546Sopenharmony_ci bool use_adjacency) 2395bf215546Sopenharmony_ci{ 2396bf215546Sopenharmony_ci unsigned max_reuse = max_esverts - min_verts_per_prim; 2397bf215546Sopenharmony_ci if (use_adjacency) 2398bf215546Sopenharmony_ci max_reuse /= 2; 2399bf215546Sopenharmony_ci *max_gsprims = MIN2(*max_gsprims, 1 + max_reuse); 2400bf215546Sopenharmony_ci} 2401bf215546Sopenharmony_ci 2402bf215546Sopenharmony_cistatic unsigned 2403bf215546Sopenharmony_ciradv_get_num_input_vertices(const struct radv_pipeline_stage *stages) 2404bf215546Sopenharmony_ci{ 2405bf215546Sopenharmony_ci if (stages[MESA_SHADER_GEOMETRY].nir) { 2406bf215546Sopenharmony_ci nir_shader *gs = stages[MESA_SHADER_GEOMETRY].nir; 2407bf215546Sopenharmony_ci 2408bf215546Sopenharmony_ci return gs->info.gs.vertices_in; 2409bf215546Sopenharmony_ci } 2410bf215546Sopenharmony_ci 2411bf215546Sopenharmony_ci if (stages[MESA_SHADER_TESS_CTRL].nir) { 2412bf215546Sopenharmony_ci nir_shader *tes = stages[MESA_SHADER_TESS_EVAL].nir; 2413bf215546Sopenharmony_ci 2414bf215546Sopenharmony_ci if (tes->info.tess.point_mode) 2415bf215546Sopenharmony_ci return 1; 2416bf215546Sopenharmony_ci if (tes->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES) 2417bf215546Sopenharmony_ci return 2; 2418bf215546Sopenharmony_ci return 3; 2419bf215546Sopenharmony_ci } 2420bf215546Sopenharmony_ci 2421bf215546Sopenharmony_ci return 3; 2422bf215546Sopenharmony_ci} 2423bf215546Sopenharmony_ci 2424bf215546Sopenharmony_cistatic void 2425bf215546Sopenharmony_cigfx10_emit_ge_pc_alloc(struct radeon_cmdbuf *cs, enum amd_gfx_level gfx_level, 2426bf215546Sopenharmony_ci uint32_t oversub_pc_lines) 2427bf215546Sopenharmony_ci{ 2428bf215546Sopenharmony_ci radeon_set_uconfig_reg( 2429bf215546Sopenharmony_ci cs, R_030980_GE_PC_ALLOC, 2430bf215546Sopenharmony_ci S_030980_OVERSUB_EN(oversub_pc_lines > 0) | S_030980_NUM_PC_LINES(oversub_pc_lines - 1)); 2431bf215546Sopenharmony_ci} 2432bf215546Sopenharmony_ci 2433bf215546Sopenharmony_cistatic void 2434bf215546Sopenharmony_cigfx10_get_ngg_ms_info(struct radv_pipeline_stage *stage, struct gfx10_ngg_info *ngg) 2435bf215546Sopenharmony_ci{ 2436bf215546Sopenharmony_ci /* Special case for mesh shader workgroups. 2437bf215546Sopenharmony_ci * 2438bf215546Sopenharmony_ci * Mesh shaders don't have any real vertex input, but they can produce 2439bf215546Sopenharmony_ci * an arbitrary number of vertices and primitives (up to 256). 2440bf215546Sopenharmony_ci * We need to precisely control the number of mesh shader workgroups 2441bf215546Sopenharmony_ci * that are launched from draw calls. 2442bf215546Sopenharmony_ci * 2443bf215546Sopenharmony_ci * To achieve that, we set: 2444bf215546Sopenharmony_ci * - input primitive topology to point list 2445bf215546Sopenharmony_ci * - input vertex and primitive count to 1 2446bf215546Sopenharmony_ci * - max output vertex count and primitive amplification factor 2447bf215546Sopenharmony_ci * to the boundaries of the shader 2448bf215546Sopenharmony_ci * 2449bf215546Sopenharmony_ci * With that, in the draw call: 2450bf215546Sopenharmony_ci * - drawing 1 input vertex ~ launching 1 mesh shader workgroup 2451bf215546Sopenharmony_ci * 2452bf215546Sopenharmony_ci * In the shader: 2453bf215546Sopenharmony_ci * - base vertex ~ first workgroup index (firstTask in NV_mesh_shader) 2454bf215546Sopenharmony_ci * - input vertex id ~ workgroup id (in 1D - shader needs to calculate in 3D) 2455bf215546Sopenharmony_ci * 2456bf215546Sopenharmony_ci * Notes: 2457bf215546Sopenharmony_ci * - without GS_EN=1 PRIM_AMP_FACTOR and MAX_VERTS_PER_SUBGROUP don't seem to work 2458bf215546Sopenharmony_ci * - with GS_EN=1 we must also set VGT_GS_MAX_VERT_OUT (otherwise the GPU hangs) 2459bf215546Sopenharmony_ci * - with GS_FAST_LAUNCH=1 every lane's VGPRs are initialized to the same input vertex index 2460bf215546Sopenharmony_ci * 2461bf215546Sopenharmony_ci */ 2462bf215546Sopenharmony_ci nir_shader *ms = stage->nir; 2463bf215546Sopenharmony_ci 2464bf215546Sopenharmony_ci ngg->enable_vertex_grouping = true; 2465bf215546Sopenharmony_ci ngg->esgs_ring_size = 1; 2466bf215546Sopenharmony_ci ngg->hw_max_esverts = 1; 2467bf215546Sopenharmony_ci ngg->max_gsprims = 1; 2468bf215546Sopenharmony_ci ngg->max_out_verts = ms->info.mesh.max_vertices_out; 2469bf215546Sopenharmony_ci ngg->max_vert_out_per_gs_instance = false; 2470bf215546Sopenharmony_ci ngg->ngg_emit_size = 0; 2471bf215546Sopenharmony_ci ngg->prim_amp_factor = ms->info.mesh.max_primitives_out; 2472bf215546Sopenharmony_ci ngg->vgt_esgs_ring_itemsize = 1; 2473bf215546Sopenharmony_ci 2474bf215546Sopenharmony_ci unsigned min_ngg_workgroup_size = 2475bf215546Sopenharmony_ci ac_compute_ngg_workgroup_size(ngg->hw_max_esverts, ngg->max_gsprims, 2476bf215546Sopenharmony_ci ngg->max_out_verts, ngg->prim_amp_factor); 2477bf215546Sopenharmony_ci 2478bf215546Sopenharmony_ci unsigned api_workgroup_size = 2479bf215546Sopenharmony_ci ac_compute_cs_workgroup_size(ms->info.workgroup_size, false, UINT32_MAX); 2480bf215546Sopenharmony_ci 2481bf215546Sopenharmony_ci stage->info.workgroup_size = MAX2(min_ngg_workgroup_size, api_workgroup_size); 2482bf215546Sopenharmony_ci} 2483bf215546Sopenharmony_ci 2484bf215546Sopenharmony_cistatic void 2485bf215546Sopenharmony_cigfx10_get_ngg_info(const struct radv_pipeline_key *key, struct radv_pipeline *pipeline, 2486bf215546Sopenharmony_ci struct radv_pipeline_stage *stages, struct gfx10_ngg_info *ngg) 2487bf215546Sopenharmony_ci{ 2488bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->device->physical_device; 2489bf215546Sopenharmony_ci struct radv_shader_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info; 2490bf215546Sopenharmony_ci struct radv_es_output_info *es_info = 2491bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].nir ? &gs_info->tes.es_info : &gs_info->vs.es_info; 2492bf215546Sopenharmony_ci unsigned gs_type = stages[MESA_SHADER_GEOMETRY].nir ? MESA_SHADER_GEOMETRY : MESA_SHADER_VERTEX; 2493bf215546Sopenharmony_ci unsigned max_verts_per_prim = radv_get_num_input_vertices(stages); 2494bf215546Sopenharmony_ci unsigned min_verts_per_prim = gs_type == MESA_SHADER_GEOMETRY ? max_verts_per_prim : 1; 2495bf215546Sopenharmony_ci unsigned gs_num_invocations = stages[MESA_SHADER_GEOMETRY].nir ? MAX2(gs_info->gs.invocations, 1) : 1; 2496bf215546Sopenharmony_ci bool uses_adjacency; 2497bf215546Sopenharmony_ci switch (key->vs.topology) { 2498bf215546Sopenharmony_ci case V_008958_DI_PT_LINELIST_ADJ: 2499bf215546Sopenharmony_ci case V_008958_DI_PT_LINESTRIP_ADJ: 2500bf215546Sopenharmony_ci case V_008958_DI_PT_TRILIST_ADJ: 2501bf215546Sopenharmony_ci case V_008958_DI_PT_TRISTRIP_ADJ: 2502bf215546Sopenharmony_ci uses_adjacency = true; 2503bf215546Sopenharmony_ci break; 2504bf215546Sopenharmony_ci default: 2505bf215546Sopenharmony_ci uses_adjacency = false; 2506bf215546Sopenharmony_ci break; 2507bf215546Sopenharmony_ci } 2508bf215546Sopenharmony_ci 2509bf215546Sopenharmony_ci /* All these are in dwords: */ 2510bf215546Sopenharmony_ci /* We can't allow using the whole LDS, because GS waves compete with 2511bf215546Sopenharmony_ci * other shader stages for LDS space. 2512bf215546Sopenharmony_ci * 2513bf215546Sopenharmony_ci * TODO: We should really take the shader's internal LDS use into 2514bf215546Sopenharmony_ci * account. The linker will fail if the size is greater than 2515bf215546Sopenharmony_ci * 8K dwords. 2516bf215546Sopenharmony_ci */ 2517bf215546Sopenharmony_ci const unsigned max_lds_size = 8 * 1024 - 768; 2518bf215546Sopenharmony_ci const unsigned target_lds_size = max_lds_size; 2519bf215546Sopenharmony_ci unsigned esvert_lds_size = 0; 2520bf215546Sopenharmony_ci unsigned gsprim_lds_size = 0; 2521bf215546Sopenharmony_ci 2522bf215546Sopenharmony_ci /* All these are per subgroup: */ 2523bf215546Sopenharmony_ci const unsigned min_esverts = pdevice->rad_info.gfx_level >= GFX10_3 ? 29 : 24; 2524bf215546Sopenharmony_ci bool max_vert_out_per_gs_instance = false; 2525bf215546Sopenharmony_ci unsigned max_esverts_base = 128; 2526bf215546Sopenharmony_ci unsigned max_gsprims_base = 128; /* default prim group size clamp */ 2527bf215546Sopenharmony_ci 2528bf215546Sopenharmony_ci /* Hardware has the following non-natural restrictions on the value 2529bf215546Sopenharmony_ci * of GE_CNTL.VERT_GRP_SIZE based on based on the primitive type of 2530bf215546Sopenharmony_ci * the draw: 2531bf215546Sopenharmony_ci * - at most 252 for any line input primitive type 2532bf215546Sopenharmony_ci * - at most 251 for any quad input primitive type 2533bf215546Sopenharmony_ci * - at most 251 for triangle strips with adjacency (this happens to 2534bf215546Sopenharmony_ci * be the natural limit for triangle *lists* with adjacency) 2535bf215546Sopenharmony_ci */ 2536bf215546Sopenharmony_ci max_esverts_base = MIN2(max_esverts_base, 251 + max_verts_per_prim - 1); 2537bf215546Sopenharmony_ci 2538bf215546Sopenharmony_ci if (gs_type == MESA_SHADER_GEOMETRY) { 2539bf215546Sopenharmony_ci unsigned max_out_verts_per_gsprim = gs_info->gs.vertices_out * gs_num_invocations; 2540bf215546Sopenharmony_ci 2541bf215546Sopenharmony_ci if (max_out_verts_per_gsprim <= 256) { 2542bf215546Sopenharmony_ci if (max_out_verts_per_gsprim) { 2543bf215546Sopenharmony_ci max_gsprims_base = MIN2(max_gsprims_base, 256 / max_out_verts_per_gsprim); 2544bf215546Sopenharmony_ci } 2545bf215546Sopenharmony_ci } else { 2546bf215546Sopenharmony_ci /* Use special multi-cycling mode in which each GS 2547bf215546Sopenharmony_ci * instance gets its own subgroup. Does not work with 2548bf215546Sopenharmony_ci * tessellation. */ 2549bf215546Sopenharmony_ci max_vert_out_per_gs_instance = true; 2550bf215546Sopenharmony_ci max_gsprims_base = 1; 2551bf215546Sopenharmony_ci max_out_verts_per_gsprim = gs_info->gs.vertices_out; 2552bf215546Sopenharmony_ci } 2553bf215546Sopenharmony_ci 2554bf215546Sopenharmony_ci esvert_lds_size = es_info->esgs_itemsize / 4; 2555bf215546Sopenharmony_ci gsprim_lds_size = (gs_info->gs.gsvs_vertex_size / 4 + 1) * max_out_verts_per_gsprim; 2556bf215546Sopenharmony_ci } else { 2557bf215546Sopenharmony_ci /* VS and TES. */ 2558bf215546Sopenharmony_ci /* LDS size for passing data from GS to ES. */ 2559bf215546Sopenharmony_ci struct radv_streamout_info *so_info = stages[MESA_SHADER_TESS_CTRL].nir 2560bf215546Sopenharmony_ci ? &stages[MESA_SHADER_TESS_EVAL].info.so 2561bf215546Sopenharmony_ci : &stages[MESA_SHADER_VERTEX].info.so; 2562bf215546Sopenharmony_ci 2563bf215546Sopenharmony_ci if (so_info->num_outputs) 2564bf215546Sopenharmony_ci esvert_lds_size = 4 * so_info->num_outputs + 1; 2565bf215546Sopenharmony_ci 2566bf215546Sopenharmony_ci /* GS stores Primitive IDs (one DWORD) into LDS at the address 2567bf215546Sopenharmony_ci * corresponding to the ES thread of the provoking vertex. All 2568bf215546Sopenharmony_ci * ES threads load and export PrimitiveID for their thread. 2569bf215546Sopenharmony_ci */ 2570bf215546Sopenharmony_ci if (!stages[MESA_SHADER_TESS_CTRL].nir && stages[MESA_SHADER_VERTEX].info.vs.outinfo.export_prim_id) 2571bf215546Sopenharmony_ci esvert_lds_size = MAX2(esvert_lds_size, 1); 2572bf215546Sopenharmony_ci } 2573bf215546Sopenharmony_ci 2574bf215546Sopenharmony_ci unsigned max_gsprims = max_gsprims_base; 2575bf215546Sopenharmony_ci unsigned max_esverts = max_esverts_base; 2576bf215546Sopenharmony_ci 2577bf215546Sopenharmony_ci if (esvert_lds_size) 2578bf215546Sopenharmony_ci max_esverts = MIN2(max_esverts, target_lds_size / esvert_lds_size); 2579bf215546Sopenharmony_ci if (gsprim_lds_size) 2580bf215546Sopenharmony_ci max_gsprims = MIN2(max_gsprims, target_lds_size / gsprim_lds_size); 2581bf215546Sopenharmony_ci 2582bf215546Sopenharmony_ci max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); 2583bf215546Sopenharmony_ci clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency); 2584bf215546Sopenharmony_ci assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); 2585bf215546Sopenharmony_ci 2586bf215546Sopenharmony_ci if (esvert_lds_size || gsprim_lds_size) { 2587bf215546Sopenharmony_ci /* Now that we have a rough proportionality between esverts 2588bf215546Sopenharmony_ci * and gsprims based on the primitive type, scale both of them 2589bf215546Sopenharmony_ci * down simultaneously based on required LDS space. 2590bf215546Sopenharmony_ci * 2591bf215546Sopenharmony_ci * We could be smarter about this if we knew how much vertex 2592bf215546Sopenharmony_ci * reuse to expect. 2593bf215546Sopenharmony_ci */ 2594bf215546Sopenharmony_ci unsigned lds_total = max_esverts * esvert_lds_size + max_gsprims * gsprim_lds_size; 2595bf215546Sopenharmony_ci if (lds_total > target_lds_size) { 2596bf215546Sopenharmony_ci max_esverts = max_esverts * target_lds_size / lds_total; 2597bf215546Sopenharmony_ci max_gsprims = max_gsprims * target_lds_size / lds_total; 2598bf215546Sopenharmony_ci 2599bf215546Sopenharmony_ci max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); 2600bf215546Sopenharmony_ci clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency); 2601bf215546Sopenharmony_ci assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); 2602bf215546Sopenharmony_ci } 2603bf215546Sopenharmony_ci } 2604bf215546Sopenharmony_ci 2605bf215546Sopenharmony_ci /* Round up towards full wave sizes for better ALU utilization. */ 2606bf215546Sopenharmony_ci if (!max_vert_out_per_gs_instance) { 2607bf215546Sopenharmony_ci unsigned orig_max_esverts; 2608bf215546Sopenharmony_ci unsigned orig_max_gsprims; 2609bf215546Sopenharmony_ci unsigned wavesize; 2610bf215546Sopenharmony_ci 2611bf215546Sopenharmony_ci if (gs_type == MESA_SHADER_GEOMETRY) { 2612bf215546Sopenharmony_ci wavesize = gs_info->wave_size; 2613bf215546Sopenharmony_ci } else { 2614bf215546Sopenharmony_ci wavesize = stages[MESA_SHADER_TESS_CTRL].nir ? stages[MESA_SHADER_TESS_EVAL].info.wave_size 2615bf215546Sopenharmony_ci : stages[MESA_SHADER_VERTEX].info.wave_size; 2616bf215546Sopenharmony_ci } 2617bf215546Sopenharmony_ci 2618bf215546Sopenharmony_ci do { 2619bf215546Sopenharmony_ci orig_max_esverts = max_esverts; 2620bf215546Sopenharmony_ci orig_max_gsprims = max_gsprims; 2621bf215546Sopenharmony_ci 2622bf215546Sopenharmony_ci max_esverts = align(max_esverts, wavesize); 2623bf215546Sopenharmony_ci max_esverts = MIN2(max_esverts, max_esverts_base); 2624bf215546Sopenharmony_ci if (esvert_lds_size) 2625bf215546Sopenharmony_ci max_esverts = 2626bf215546Sopenharmony_ci MIN2(max_esverts, (max_lds_size - max_gsprims * gsprim_lds_size) / esvert_lds_size); 2627bf215546Sopenharmony_ci max_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); 2628bf215546Sopenharmony_ci 2629bf215546Sopenharmony_ci /* Hardware restriction: minimum value of max_esverts */ 2630bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level == GFX10) 2631bf215546Sopenharmony_ci max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); 2632bf215546Sopenharmony_ci else 2633bf215546Sopenharmony_ci max_esverts = MAX2(max_esverts, min_esverts); 2634bf215546Sopenharmony_ci 2635bf215546Sopenharmony_ci max_gsprims = align(max_gsprims, wavesize); 2636bf215546Sopenharmony_ci max_gsprims = MIN2(max_gsprims, max_gsprims_base); 2637bf215546Sopenharmony_ci if (gsprim_lds_size) { 2638bf215546Sopenharmony_ci /* Don't count unusable vertices to the LDS 2639bf215546Sopenharmony_ci * size. Those are vertices above the maximum 2640bf215546Sopenharmony_ci * number of vertices that can occur in the 2641bf215546Sopenharmony_ci * workgroup, which is e.g. max_gsprims * 3 2642bf215546Sopenharmony_ci * for triangles. 2643bf215546Sopenharmony_ci */ 2644bf215546Sopenharmony_ci unsigned usable_esverts = MIN2(max_esverts, max_gsprims * max_verts_per_prim); 2645bf215546Sopenharmony_ci max_gsprims = MIN2(max_gsprims, 2646bf215546Sopenharmony_ci (max_lds_size - usable_esverts * esvert_lds_size) / gsprim_lds_size); 2647bf215546Sopenharmony_ci } 2648bf215546Sopenharmony_ci clamp_gsprims_to_esverts(&max_gsprims, max_esverts, min_verts_per_prim, uses_adjacency); 2649bf215546Sopenharmony_ci assert(max_esverts >= max_verts_per_prim && max_gsprims >= 1); 2650bf215546Sopenharmony_ci } while (orig_max_esverts != max_esverts || orig_max_gsprims != max_gsprims); 2651bf215546Sopenharmony_ci 2652bf215546Sopenharmony_ci /* Verify the restriction. */ 2653bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level == GFX10) 2654bf215546Sopenharmony_ci assert(max_esverts >= min_esverts - 1 + max_verts_per_prim); 2655bf215546Sopenharmony_ci else 2656bf215546Sopenharmony_ci assert(max_esverts >= min_esverts); 2657bf215546Sopenharmony_ci } else { 2658bf215546Sopenharmony_ci /* Hardware restriction: minimum value of max_esverts */ 2659bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level == GFX10) 2660bf215546Sopenharmony_ci max_esverts = MAX2(max_esverts, min_esverts - 1 + max_verts_per_prim); 2661bf215546Sopenharmony_ci else 2662bf215546Sopenharmony_ci max_esverts = MAX2(max_esverts, min_esverts); 2663bf215546Sopenharmony_ci } 2664bf215546Sopenharmony_ci 2665bf215546Sopenharmony_ci unsigned max_out_vertices = max_vert_out_per_gs_instance ? gs_info->gs.vertices_out 2666bf215546Sopenharmony_ci : gs_type == MESA_SHADER_GEOMETRY 2667bf215546Sopenharmony_ci ? max_gsprims * gs_num_invocations * gs_info->gs.vertices_out 2668bf215546Sopenharmony_ci : max_esverts; 2669bf215546Sopenharmony_ci assert(max_out_vertices <= 256); 2670bf215546Sopenharmony_ci 2671bf215546Sopenharmony_ci unsigned prim_amp_factor = 1; 2672bf215546Sopenharmony_ci if (gs_type == MESA_SHADER_GEOMETRY) { 2673bf215546Sopenharmony_ci /* Number of output primitives per GS input primitive after 2674bf215546Sopenharmony_ci * GS instancing. */ 2675bf215546Sopenharmony_ci prim_amp_factor = gs_info->gs.vertices_out; 2676bf215546Sopenharmony_ci } 2677bf215546Sopenharmony_ci 2678bf215546Sopenharmony_ci /* On Gfx10, the GE only checks against the maximum number of ES verts 2679bf215546Sopenharmony_ci * after allocating a full GS primitive. So we need to ensure that 2680bf215546Sopenharmony_ci * whenever this check passes, there is enough space for a full 2681bf215546Sopenharmony_ci * primitive without vertex reuse. 2682bf215546Sopenharmony_ci */ 2683bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level == GFX10) 2684bf215546Sopenharmony_ci ngg->hw_max_esverts = max_esverts - max_verts_per_prim + 1; 2685bf215546Sopenharmony_ci else 2686bf215546Sopenharmony_ci ngg->hw_max_esverts = max_esverts; 2687bf215546Sopenharmony_ci 2688bf215546Sopenharmony_ci ngg->max_gsprims = max_gsprims; 2689bf215546Sopenharmony_ci ngg->max_out_verts = max_out_vertices; 2690bf215546Sopenharmony_ci ngg->prim_amp_factor = prim_amp_factor; 2691bf215546Sopenharmony_ci ngg->max_vert_out_per_gs_instance = max_vert_out_per_gs_instance; 2692bf215546Sopenharmony_ci ngg->ngg_emit_size = max_gsprims * gsprim_lds_size; 2693bf215546Sopenharmony_ci ngg->enable_vertex_grouping = true; 2694bf215546Sopenharmony_ci 2695bf215546Sopenharmony_ci /* Don't count unusable vertices. */ 2696bf215546Sopenharmony_ci ngg->esgs_ring_size = MIN2(max_esverts, max_gsprims * max_verts_per_prim) * esvert_lds_size * 4; 2697bf215546Sopenharmony_ci 2698bf215546Sopenharmony_ci if (gs_type == MESA_SHADER_GEOMETRY) { 2699bf215546Sopenharmony_ci ngg->vgt_esgs_ring_itemsize = es_info->esgs_itemsize / 4; 2700bf215546Sopenharmony_ci } else { 2701bf215546Sopenharmony_ci ngg->vgt_esgs_ring_itemsize = 1; 2702bf215546Sopenharmony_ci } 2703bf215546Sopenharmony_ci 2704bf215546Sopenharmony_ci assert(ngg->hw_max_esverts >= min_esverts); /* HW limitation */ 2705bf215546Sopenharmony_ci 2706bf215546Sopenharmony_ci gl_shader_stage es_stage = stages[MESA_SHADER_TESS_CTRL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 2707bf215546Sopenharmony_ci unsigned workgroup_size = 2708bf215546Sopenharmony_ci ac_compute_ngg_workgroup_size( 2709bf215546Sopenharmony_ci max_esverts, max_gsprims * gs_num_invocations, max_out_vertices, prim_amp_factor); 2710bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.workgroup_size = workgroup_size; 2711bf215546Sopenharmony_ci stages[es_stage].info.workgroup_size = workgroup_size; 2712bf215546Sopenharmony_ci} 2713bf215546Sopenharmony_ci 2714bf215546Sopenharmony_cistatic void 2715bf215546Sopenharmony_ciradv_pipeline_init_gs_ring_state(struct radv_graphics_pipeline *pipeline, const struct gfx9_gs_info *gs) 2716bf215546Sopenharmony_ci{ 2717bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 2718bf215546Sopenharmony_ci unsigned num_se = pdevice->rad_info.max_se; 2719bf215546Sopenharmony_ci unsigned wave_size = 64; 2720bf215546Sopenharmony_ci unsigned max_gs_waves = 32 * num_se; /* max 32 per SE on GCN */ 2721bf215546Sopenharmony_ci /* On GFX6-GFX7, the value comes from VGT_GS_VERTEX_REUSE = 16. 2722bf215546Sopenharmony_ci * On GFX8+, the value comes from VGT_VERTEX_REUSE_BLOCK_CNTL = 30 (+2). 2723bf215546Sopenharmony_ci */ 2724bf215546Sopenharmony_ci unsigned gs_vertex_reuse = (pdevice->rad_info.gfx_level >= GFX8 ? 32 : 16) * num_se; 2725bf215546Sopenharmony_ci unsigned alignment = 256 * num_se; 2726bf215546Sopenharmony_ci /* The maximum size is 63.999 MB per SE. */ 2727bf215546Sopenharmony_ci unsigned max_size = ((unsigned)(63.999 * 1024 * 1024) & ~255) * num_se; 2728bf215546Sopenharmony_ci struct radv_shader_info *gs_info = &pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info; 2729bf215546Sopenharmony_ci 2730bf215546Sopenharmony_ci /* Calculate the minimum size. */ 2731bf215546Sopenharmony_ci unsigned min_esgs_ring_size = 2732bf215546Sopenharmony_ci align(gs->vgt_esgs_ring_itemsize * 4 * gs_vertex_reuse * wave_size, alignment); 2733bf215546Sopenharmony_ci /* These are recommended sizes, not minimum sizes. */ 2734bf215546Sopenharmony_ci unsigned esgs_ring_size = 2735bf215546Sopenharmony_ci max_gs_waves * 2 * wave_size * gs->vgt_esgs_ring_itemsize * 4 * gs_info->gs.vertices_in; 2736bf215546Sopenharmony_ci unsigned gsvs_ring_size = max_gs_waves * 2 * wave_size * gs_info->gs.max_gsvs_emit_size; 2737bf215546Sopenharmony_ci 2738bf215546Sopenharmony_ci min_esgs_ring_size = align(min_esgs_ring_size, alignment); 2739bf215546Sopenharmony_ci esgs_ring_size = align(esgs_ring_size, alignment); 2740bf215546Sopenharmony_ci gsvs_ring_size = align(gsvs_ring_size, alignment); 2741bf215546Sopenharmony_ci 2742bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level <= GFX8) 2743bf215546Sopenharmony_ci pipeline->esgs_ring_size = CLAMP(esgs_ring_size, min_esgs_ring_size, max_size); 2744bf215546Sopenharmony_ci 2745bf215546Sopenharmony_ci pipeline->gsvs_ring_size = MIN2(gsvs_ring_size, max_size); 2746bf215546Sopenharmony_ci} 2747bf215546Sopenharmony_ci 2748bf215546Sopenharmony_cistruct radv_shader * 2749bf215546Sopenharmony_ciradv_get_shader(const struct radv_pipeline *pipeline, gl_shader_stage stage) 2750bf215546Sopenharmony_ci{ 2751bf215546Sopenharmony_ci if (stage == MESA_SHADER_VERTEX) { 2752bf215546Sopenharmony_ci if (pipeline->shaders[MESA_SHADER_VERTEX]) 2753bf215546Sopenharmony_ci return pipeline->shaders[MESA_SHADER_VERTEX]; 2754bf215546Sopenharmony_ci if (pipeline->shaders[MESA_SHADER_TESS_CTRL]) 2755bf215546Sopenharmony_ci return pipeline->shaders[MESA_SHADER_TESS_CTRL]; 2756bf215546Sopenharmony_ci if (pipeline->shaders[MESA_SHADER_GEOMETRY]) 2757bf215546Sopenharmony_ci return pipeline->shaders[MESA_SHADER_GEOMETRY]; 2758bf215546Sopenharmony_ci } else if (stage == MESA_SHADER_TESS_EVAL) { 2759bf215546Sopenharmony_ci if (!pipeline->shaders[MESA_SHADER_TESS_CTRL]) 2760bf215546Sopenharmony_ci return NULL; 2761bf215546Sopenharmony_ci if (pipeline->shaders[MESA_SHADER_TESS_EVAL]) 2762bf215546Sopenharmony_ci return pipeline->shaders[MESA_SHADER_TESS_EVAL]; 2763bf215546Sopenharmony_ci if (pipeline->shaders[MESA_SHADER_GEOMETRY]) 2764bf215546Sopenharmony_ci return pipeline->shaders[MESA_SHADER_GEOMETRY]; 2765bf215546Sopenharmony_ci } 2766bf215546Sopenharmony_ci return pipeline->shaders[stage]; 2767bf215546Sopenharmony_ci} 2768bf215546Sopenharmony_ci 2769bf215546Sopenharmony_cistatic const struct radv_vs_output_info * 2770bf215546Sopenharmony_ciget_vs_output_info(const struct radv_graphics_pipeline *pipeline) 2771bf215546Sopenharmony_ci{ 2772bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) 2773bf215546Sopenharmony_ci if (radv_pipeline_has_ngg(pipeline)) 2774bf215546Sopenharmony_ci return &pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.vs.outinfo; 2775bf215546Sopenharmony_ci else 2776bf215546Sopenharmony_ci return &pipeline->base.gs_copy_shader->info.vs.outinfo; 2777bf215546Sopenharmony_ci else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) 2778bf215546Sopenharmony_ci return &pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes.outinfo; 2779bf215546Sopenharmony_ci else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) 2780bf215546Sopenharmony_ci return &pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.outinfo; 2781bf215546Sopenharmony_ci else 2782bf215546Sopenharmony_ci return &pipeline->base.shaders[MESA_SHADER_VERTEX]->info.vs.outinfo; 2783bf215546Sopenharmony_ci} 2784bf215546Sopenharmony_ci 2785bf215546Sopenharmony_cistatic bool 2786bf215546Sopenharmony_ciradv_lower_viewport_to_zero(nir_shader *nir) 2787bf215546Sopenharmony_ci{ 2788bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(nir); 2789bf215546Sopenharmony_ci bool progress = false; 2790bf215546Sopenharmony_ci 2791bf215546Sopenharmony_ci nir_builder b; 2792bf215546Sopenharmony_ci nir_builder_init(&b, impl); 2793bf215546Sopenharmony_ci 2794bf215546Sopenharmony_ci /* There should be only one deref load for VIEWPORT after lower_io_to_temporaries. */ 2795bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 2796bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 2797bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 2798bf215546Sopenharmony_ci continue; 2799bf215546Sopenharmony_ci 2800bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 2801bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_load_deref) 2802bf215546Sopenharmony_ci continue; 2803bf215546Sopenharmony_ci 2804bf215546Sopenharmony_ci nir_variable *var = nir_intrinsic_get_var(intr, 0); 2805bf215546Sopenharmony_ci if (var->data.mode != nir_var_shader_in || 2806bf215546Sopenharmony_ci var->data.location != VARYING_SLOT_VIEWPORT) 2807bf215546Sopenharmony_ci continue; 2808bf215546Sopenharmony_ci 2809bf215546Sopenharmony_ci b.cursor = nir_before_instr(instr); 2810bf215546Sopenharmony_ci 2811bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_imm_zero(&b, 1, 32)); 2812bf215546Sopenharmony_ci progress = true; 2813bf215546Sopenharmony_ci break; 2814bf215546Sopenharmony_ci } 2815bf215546Sopenharmony_ci if (progress) 2816bf215546Sopenharmony_ci break; 2817bf215546Sopenharmony_ci } 2818bf215546Sopenharmony_ci 2819bf215546Sopenharmony_ci if (progress) 2820bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); 2821bf215546Sopenharmony_ci else 2822bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 2823bf215546Sopenharmony_ci 2824bf215546Sopenharmony_ci return progress; 2825bf215546Sopenharmony_ci} 2826bf215546Sopenharmony_ci 2827bf215546Sopenharmony_cistatic nir_variable * 2828bf215546Sopenharmony_cifind_layer_out_var(nir_shader *nir) 2829bf215546Sopenharmony_ci{ 2830bf215546Sopenharmony_ci nir_variable *var = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_LAYER); 2831bf215546Sopenharmony_ci if (var != NULL) 2832bf215546Sopenharmony_ci return var; 2833bf215546Sopenharmony_ci 2834bf215546Sopenharmony_ci var = nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), "layer id"); 2835bf215546Sopenharmony_ci var->data.location = VARYING_SLOT_LAYER; 2836bf215546Sopenharmony_ci var->data.interpolation = INTERP_MODE_NONE; 2837bf215546Sopenharmony_ci 2838bf215546Sopenharmony_ci return var; 2839bf215546Sopenharmony_ci} 2840bf215546Sopenharmony_ci 2841bf215546Sopenharmony_cistatic bool 2842bf215546Sopenharmony_ciradv_lower_multiview(nir_shader *nir) 2843bf215546Sopenharmony_ci{ 2844bf215546Sopenharmony_ci /* This pass is not suitable for mesh shaders, because it can't know 2845bf215546Sopenharmony_ci * the mapping between API mesh shader invocations and output primitives. 2846bf215546Sopenharmony_ci * Needs to be handled in ac_nir_lower_ngg. 2847bf215546Sopenharmony_ci */ 2848bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_MESH) 2849bf215546Sopenharmony_ci return false; 2850bf215546Sopenharmony_ci 2851bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(nir); 2852bf215546Sopenharmony_ci bool progress = false; 2853bf215546Sopenharmony_ci 2854bf215546Sopenharmony_ci nir_builder b; 2855bf215546Sopenharmony_ci nir_builder_init(&b, impl); 2856bf215546Sopenharmony_ci 2857bf215546Sopenharmony_ci /* Iterate in reverse order since there should be only one deref store to POS after 2858bf215546Sopenharmony_ci * lower_io_to_temporaries for vertex shaders and inject the layer there. For geometry shaders, 2859bf215546Sopenharmony_ci * the layer is injected right before every emit_vertex_with_counter. 2860bf215546Sopenharmony_ci */ 2861bf215546Sopenharmony_ci nir_variable *layer = NULL; 2862bf215546Sopenharmony_ci nir_foreach_block_reverse(block, impl) { 2863bf215546Sopenharmony_ci nir_foreach_instr_reverse(instr, block) { 2864bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 2865bf215546Sopenharmony_ci continue; 2866bf215546Sopenharmony_ci 2867bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_GEOMETRY) { 2868bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 2869bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_emit_vertex_with_counter) 2870bf215546Sopenharmony_ci continue; 2871bf215546Sopenharmony_ci 2872bf215546Sopenharmony_ci b.cursor = nir_before_instr(instr); 2873bf215546Sopenharmony_ci } else { 2874bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 2875bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_store_deref) 2876bf215546Sopenharmony_ci continue; 2877bf215546Sopenharmony_ci 2878bf215546Sopenharmony_ci nir_variable *var = nir_intrinsic_get_var(intr, 0); 2879bf215546Sopenharmony_ci if (var->data.mode != nir_var_shader_out || var->data.location != VARYING_SLOT_POS) 2880bf215546Sopenharmony_ci continue; 2881bf215546Sopenharmony_ci 2882bf215546Sopenharmony_ci b.cursor = nir_after_instr(instr); 2883bf215546Sopenharmony_ci } 2884bf215546Sopenharmony_ci 2885bf215546Sopenharmony_ci if (!layer) 2886bf215546Sopenharmony_ci layer = find_layer_out_var(nir); 2887bf215546Sopenharmony_ci 2888bf215546Sopenharmony_ci nir_store_var(&b, layer, nir_load_view_index(&b), 1); 2889bf215546Sopenharmony_ci 2890bf215546Sopenharmony_ci /* Update outputs_written to reflect that the pass added a new output. */ 2891bf215546Sopenharmony_ci nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_LAYER); 2892bf215546Sopenharmony_ci 2893bf215546Sopenharmony_ci progress = true; 2894bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_VERTEX) 2895bf215546Sopenharmony_ci break; 2896bf215546Sopenharmony_ci } 2897bf215546Sopenharmony_ci if (nir->info.stage == MESA_SHADER_VERTEX && progress) 2898bf215546Sopenharmony_ci break; 2899bf215546Sopenharmony_ci } 2900bf215546Sopenharmony_ci 2901bf215546Sopenharmony_ci if (progress) 2902bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); 2903bf215546Sopenharmony_ci else 2904bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 2905bf215546Sopenharmony_ci 2906bf215546Sopenharmony_ci return progress; 2907bf215546Sopenharmony_ci} 2908bf215546Sopenharmony_ci 2909bf215546Sopenharmony_cistatic bool 2910bf215546Sopenharmony_ciradv_export_implicit_primitive_id(nir_shader *nir) 2911bf215546Sopenharmony_ci{ 2912bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(nir); 2913bf215546Sopenharmony_ci nir_builder b; 2914bf215546Sopenharmony_ci nir_builder_init(&b, impl); 2915bf215546Sopenharmony_ci 2916bf215546Sopenharmony_ci b.cursor = nir_after_cf_list(&impl->body); 2917bf215546Sopenharmony_ci 2918bf215546Sopenharmony_ci nir_variable *var = nir_variable_create(nir, nir_var_shader_out, glsl_int_type(), NULL); 2919bf215546Sopenharmony_ci var->data.location = VARYING_SLOT_PRIMITIVE_ID; 2920bf215546Sopenharmony_ci var->data.interpolation = INTERP_MODE_NONE; 2921bf215546Sopenharmony_ci 2922bf215546Sopenharmony_ci nir_store_var(&b, var, nir_load_primitive_id(&b), 1); 2923bf215546Sopenharmony_ci 2924bf215546Sopenharmony_ci /* Update outputs_written to reflect that the pass added a new output. */ 2925bf215546Sopenharmony_ci nir->info.outputs_written |= BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID); 2926bf215546Sopenharmony_ci 2927bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); 2928bf215546Sopenharmony_ci 2929bf215546Sopenharmony_ci return true; 2930bf215546Sopenharmony_ci} 2931bf215546Sopenharmony_ci 2932bf215546Sopenharmony_cistatic void 2933bf215546Sopenharmony_ciradv_link_shaders(struct radv_pipeline *pipeline, 2934bf215546Sopenharmony_ci const struct radv_pipeline_key *pipeline_key, 2935bf215546Sopenharmony_ci const struct radv_pipeline_stage *stages, 2936bf215546Sopenharmony_ci bool optimize_conservatively, 2937bf215546Sopenharmony_ci gl_shader_stage last_vgt_api_stage) 2938bf215546Sopenharmony_ci{ 2939bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->device->physical_device; 2940bf215546Sopenharmony_ci nir_shader *ordered_shaders[MESA_VULKAN_SHADER_STAGES]; 2941bf215546Sopenharmony_ci int shader_count = 0; 2942bf215546Sopenharmony_ci 2943bf215546Sopenharmony_ci if (stages[MESA_SHADER_FRAGMENT].nir) { 2944bf215546Sopenharmony_ci ordered_shaders[shader_count++] = stages[MESA_SHADER_FRAGMENT].nir; 2945bf215546Sopenharmony_ci } 2946bf215546Sopenharmony_ci if (stages[MESA_SHADER_GEOMETRY].nir) { 2947bf215546Sopenharmony_ci ordered_shaders[shader_count++] = stages[MESA_SHADER_GEOMETRY].nir; 2948bf215546Sopenharmony_ci } 2949bf215546Sopenharmony_ci if (stages[MESA_SHADER_TESS_EVAL].nir) { 2950bf215546Sopenharmony_ci ordered_shaders[shader_count++] = stages[MESA_SHADER_TESS_EVAL].nir; 2951bf215546Sopenharmony_ci } 2952bf215546Sopenharmony_ci if (stages[MESA_SHADER_TESS_CTRL].nir) { 2953bf215546Sopenharmony_ci ordered_shaders[shader_count++] = stages[MESA_SHADER_TESS_CTRL].nir; 2954bf215546Sopenharmony_ci } 2955bf215546Sopenharmony_ci if (stages[MESA_SHADER_VERTEX].nir) { 2956bf215546Sopenharmony_ci ordered_shaders[shader_count++] = stages[MESA_SHADER_VERTEX].nir; 2957bf215546Sopenharmony_ci } 2958bf215546Sopenharmony_ci if (stages[MESA_SHADER_MESH].nir) { 2959bf215546Sopenharmony_ci ordered_shaders[shader_count++] = stages[MESA_SHADER_MESH].nir; 2960bf215546Sopenharmony_ci } 2961bf215546Sopenharmony_ci if (stages[MESA_SHADER_TASK].nir) { 2962bf215546Sopenharmony_ci ordered_shaders[shader_count++] = stages[MESA_SHADER_TASK].nir; 2963bf215546Sopenharmony_ci } 2964bf215546Sopenharmony_ci if (stages[MESA_SHADER_COMPUTE].nir) { 2965bf215546Sopenharmony_ci ordered_shaders[shader_count++] = stages[MESA_SHADER_COMPUTE].nir; 2966bf215546Sopenharmony_ci } 2967bf215546Sopenharmony_ci 2968bf215546Sopenharmony_ci if (stages[MESA_SHADER_MESH].nir && stages[MESA_SHADER_FRAGMENT].nir) { 2969bf215546Sopenharmony_ci nir_shader *ps = stages[MESA_SHADER_FRAGMENT].nir; 2970bf215546Sopenharmony_ci 2971bf215546Sopenharmony_ci nir_foreach_shader_in_variable(var, ps) { 2972bf215546Sopenharmony_ci /* These variables are per-primitive when used with a mesh shader. */ 2973bf215546Sopenharmony_ci if (var->data.location == VARYING_SLOT_PRIMITIVE_ID || 2974bf215546Sopenharmony_ci var->data.location == VARYING_SLOT_VIEWPORT || 2975bf215546Sopenharmony_ci var->data.location == VARYING_SLOT_LAYER) 2976bf215546Sopenharmony_ci var->data.per_primitive = true; 2977bf215546Sopenharmony_ci } 2978bf215546Sopenharmony_ci } 2979bf215546Sopenharmony_ci 2980bf215546Sopenharmony_ci bool has_geom_tess = stages[MESA_SHADER_GEOMETRY].nir || stages[MESA_SHADER_TESS_CTRL].nir; 2981bf215546Sopenharmony_ci bool merged_gs = stages[MESA_SHADER_GEOMETRY].nir && pdevice->rad_info.gfx_level >= GFX9; 2982bf215546Sopenharmony_ci 2983bf215546Sopenharmony_ci if (!optimize_conservatively && shader_count > 1) { 2984bf215546Sopenharmony_ci unsigned first = ordered_shaders[shader_count - 1]->info.stage; 2985bf215546Sopenharmony_ci unsigned last = ordered_shaders[0]->info.stage; 2986bf215546Sopenharmony_ci 2987bf215546Sopenharmony_ci if (ordered_shaders[0]->info.stage == MESA_SHADER_FRAGMENT && 2988bf215546Sopenharmony_ci ordered_shaders[1]->info.has_transform_feedback_varyings) 2989bf215546Sopenharmony_ci nir_link_xfb_varyings(ordered_shaders[1], ordered_shaders[0]); 2990bf215546Sopenharmony_ci 2991bf215546Sopenharmony_ci for (int i = 1; i < shader_count; ++i) { 2992bf215546Sopenharmony_ci nir_lower_io_arrays_to_elements(ordered_shaders[i], ordered_shaders[i - 1]); 2993bf215546Sopenharmony_ci nir_validate_shader(ordered_shaders[i], "after nir_lower_io_arrays_to_elements"); 2994bf215546Sopenharmony_ci nir_validate_shader(ordered_shaders[i - 1], "after nir_lower_io_arrays_to_elements"); 2995bf215546Sopenharmony_ci } 2996bf215546Sopenharmony_ci 2997bf215546Sopenharmony_ci for (int i = 0; i < shader_count; ++i) { 2998bf215546Sopenharmony_ci nir_variable_mode mask = 0; 2999bf215546Sopenharmony_ci 3000bf215546Sopenharmony_ci if (ordered_shaders[i]->info.stage != first) 3001bf215546Sopenharmony_ci mask = mask | nir_var_shader_in; 3002bf215546Sopenharmony_ci 3003bf215546Sopenharmony_ci if (ordered_shaders[i]->info.stage != last) 3004bf215546Sopenharmony_ci mask = mask | nir_var_shader_out; 3005bf215546Sopenharmony_ci 3006bf215546Sopenharmony_ci bool progress = false; 3007bf215546Sopenharmony_ci NIR_PASS(progress, ordered_shaders[i], nir_lower_io_to_scalar_early, mask); 3008bf215546Sopenharmony_ci if (progress) { 3009bf215546Sopenharmony_ci /* Optimize the new vector code and then remove dead vars */ 3010bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_copy_prop); 3011bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_opt_shrink_vectors); 3012bf215546Sopenharmony_ci 3013bf215546Sopenharmony_ci if (ordered_shaders[i]->info.stage != last) { 3014bf215546Sopenharmony_ci /* Optimize swizzled movs of load_const for 3015bf215546Sopenharmony_ci * nir_link_opt_varyings's constant propagation 3016bf215546Sopenharmony_ci */ 3017bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_opt_constant_folding); 3018bf215546Sopenharmony_ci /* For nir_link_opt_varyings's duplicate input opt */ 3019bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_opt_cse); 3020bf215546Sopenharmony_ci } 3021bf215546Sopenharmony_ci 3022bf215546Sopenharmony_ci /* Run copy-propagation to help remove dead 3023bf215546Sopenharmony_ci * output variables (some shaders have useless 3024bf215546Sopenharmony_ci * copies to/from an output), so compaction 3025bf215546Sopenharmony_ci * later will be more effective. 3026bf215546Sopenharmony_ci * 3027bf215546Sopenharmony_ci * This will have been done earlier but it might 3028bf215546Sopenharmony_ci * not have worked because the outputs were vector. 3029bf215546Sopenharmony_ci */ 3030bf215546Sopenharmony_ci if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) 3031bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_opt_copy_prop_vars); 3032bf215546Sopenharmony_ci 3033bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_opt_dce); 3034bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_remove_dead_variables, 3035bf215546Sopenharmony_ci nir_var_function_temp | nir_var_shader_in | nir_var_shader_out, NULL); 3036bf215546Sopenharmony_ci } 3037bf215546Sopenharmony_ci } 3038bf215546Sopenharmony_ci } 3039bf215546Sopenharmony_ci 3040bf215546Sopenharmony_ci /* Export the primitive ID when VS or TES don't export it because it's implicit, while it's 3041bf215546Sopenharmony_ci * required for GS or MS. The primitive ID is added during lowering for NGG. 3042bf215546Sopenharmony_ci */ 3043bf215546Sopenharmony_ci if (stages[MESA_SHADER_FRAGMENT].nir && 3044bf215546Sopenharmony_ci (stages[MESA_SHADER_FRAGMENT].nir->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) && 3045bf215546Sopenharmony_ci !(stages[last_vgt_api_stage].nir->info.outputs_written & VARYING_BIT_PRIMITIVE_ID) && 3046bf215546Sopenharmony_ci ((last_vgt_api_stage == MESA_SHADER_VERTEX && !stages[MESA_SHADER_VERTEX].info.is_ngg) || 3047bf215546Sopenharmony_ci (last_vgt_api_stage == MESA_SHADER_TESS_EVAL && !stages[MESA_SHADER_TESS_EVAL].info.is_ngg))) { 3048bf215546Sopenharmony_ci radv_export_implicit_primitive_id(stages[last_vgt_api_stage].nir); 3049bf215546Sopenharmony_ci } 3050bf215546Sopenharmony_ci 3051bf215546Sopenharmony_ci if (!optimize_conservatively) { 3052bf215546Sopenharmony_ci bool uses_xfb = last_vgt_api_stage != -1 && 3053bf215546Sopenharmony_ci stages[last_vgt_api_stage].nir->xfb_info; 3054bf215546Sopenharmony_ci 3055bf215546Sopenharmony_ci for (unsigned i = 0; i < shader_count; ++i) { 3056bf215546Sopenharmony_ci shader_info *info = &ordered_shaders[i]->info; 3057bf215546Sopenharmony_ci 3058bf215546Sopenharmony_ci /* Remove exports without color attachment or writemask. */ 3059bf215546Sopenharmony_ci if (info->stage == MESA_SHADER_FRAGMENT) { 3060bf215546Sopenharmony_ci bool fixup_derefs = false; 3061bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, ordered_shaders[i], nir_var_shader_out) { 3062bf215546Sopenharmony_ci int idx = var->data.location; 3063bf215546Sopenharmony_ci idx -= FRAG_RESULT_DATA0; 3064bf215546Sopenharmony_ci if (idx < 0) 3065bf215546Sopenharmony_ci continue; 3066bf215546Sopenharmony_ci 3067bf215546Sopenharmony_ci unsigned col_format = (pipeline_key->ps.col_format >> (4 * idx)) & 0xf; 3068bf215546Sopenharmony_ci unsigned cb_target_mask = (pipeline_key->ps.cb_target_mask >> (4 * idx)) & 0xf; 3069bf215546Sopenharmony_ci 3070bf215546Sopenharmony_ci if (col_format == V_028714_SPI_SHADER_ZERO || 3071bf215546Sopenharmony_ci (col_format == V_028714_SPI_SHADER_32_R && !cb_target_mask && 3072bf215546Sopenharmony_ci !pipeline_key->ps.mrt0_is_dual_src)) { 3073bf215546Sopenharmony_ci /* Remove the color export if it's unused or in presence of holes. */ 3074bf215546Sopenharmony_ci info->outputs_written &= ~BITFIELD64_BIT(var->data.location); 3075bf215546Sopenharmony_ci var->data.location = 0; 3076bf215546Sopenharmony_ci var->data.mode = nir_var_shader_temp; 3077bf215546Sopenharmony_ci fixup_derefs = true; 3078bf215546Sopenharmony_ci } 3079bf215546Sopenharmony_ci } 3080bf215546Sopenharmony_ci if (fixup_derefs) { 3081bf215546Sopenharmony_ci NIR_PASS_V(ordered_shaders[i], nir_fixup_deref_modes); 3082bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_remove_dead_variables, nir_var_shader_temp, 3083bf215546Sopenharmony_ci NULL); 3084bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_opt_dce); 3085bf215546Sopenharmony_ci } 3086bf215546Sopenharmony_ci continue; 3087bf215546Sopenharmony_ci } 3088bf215546Sopenharmony_ci 3089bf215546Sopenharmony_ci /* Remove PSIZ from shaders when it's not needed. 3090bf215546Sopenharmony_ci * This is typically produced by translation layers like Zink or D9VK. 3091bf215546Sopenharmony_ci */ 3092bf215546Sopenharmony_ci if (uses_xfb || !(info->outputs_written & VARYING_BIT_PSIZ)) 3093bf215546Sopenharmony_ci continue; 3094bf215546Sopenharmony_ci 3095bf215546Sopenharmony_ci bool next_stage_needs_psiz = 3096bf215546Sopenharmony_ci i != 0 && /* ordered_shaders is backwards, so next stage is: i - 1 */ 3097bf215546Sopenharmony_ci ordered_shaders[i - 1]->info.inputs_read & VARYING_BIT_PSIZ; 3098bf215546Sopenharmony_ci bool topology_uses_psiz = 3099bf215546Sopenharmony_ci info->stage == last_vgt_api_stage && 3100bf215546Sopenharmony_ci ((info->stage == MESA_SHADER_VERTEX && pipeline_key->vs.topology == V_008958_DI_PT_POINTLIST) || 3101bf215546Sopenharmony_ci (info->stage == MESA_SHADER_TESS_EVAL && info->tess.point_mode) || 3102bf215546Sopenharmony_ci (info->stage == MESA_SHADER_GEOMETRY && info->gs.output_primitive == SHADER_PRIM_POINTS) || 3103bf215546Sopenharmony_ci (info->stage == MESA_SHADER_MESH && info->mesh.primitive_type == SHADER_PRIM_POINTS)); 3104bf215546Sopenharmony_ci 3105bf215546Sopenharmony_ci nir_variable *psiz_var = 3106bf215546Sopenharmony_ci nir_find_variable_with_location(ordered_shaders[i], nir_var_shader_out, VARYING_SLOT_PSIZ); 3107bf215546Sopenharmony_ci 3108bf215546Sopenharmony_ci if (!next_stage_needs_psiz && !topology_uses_psiz && psiz_var) { 3109bf215546Sopenharmony_ci /* Change PSIZ to a global variable which allows it to be DCE'd. */ 3110bf215546Sopenharmony_ci psiz_var->data.location = 0; 3111bf215546Sopenharmony_ci psiz_var->data.mode = nir_var_shader_temp; 3112bf215546Sopenharmony_ci 3113bf215546Sopenharmony_ci info->outputs_written &= ~VARYING_BIT_PSIZ; 3114bf215546Sopenharmony_ci NIR_PASS_V(ordered_shaders[i], nir_fixup_deref_modes); 3115bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_remove_dead_variables, nir_var_shader_temp, NULL); 3116bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_opt_dce); 3117bf215546Sopenharmony_ci } 3118bf215546Sopenharmony_ci } 3119bf215546Sopenharmony_ci } 3120bf215546Sopenharmony_ci 3121bf215546Sopenharmony_ci /* Lower the viewport index to zero when the last vertex stage doesn't export it. */ 3122bf215546Sopenharmony_ci if (stages[MESA_SHADER_FRAGMENT].nir && 3123bf215546Sopenharmony_ci (stages[MESA_SHADER_FRAGMENT].nir->info.inputs_read & VARYING_BIT_VIEWPORT) && 3124bf215546Sopenharmony_ci !(stages[last_vgt_api_stage].nir->info.outputs_written & VARYING_BIT_VIEWPORT)) { 3125bf215546Sopenharmony_ci NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_lower_viewport_to_zero); 3126bf215546Sopenharmony_ci } 3127bf215546Sopenharmony_ci 3128bf215546Sopenharmony_ci /* Export the layer in the last VGT stage if multiview is used. */ 3129bf215546Sopenharmony_ci if (pipeline_key->has_multiview_view_index && last_vgt_api_stage != -1 && 3130bf215546Sopenharmony_ci !(stages[last_vgt_api_stage].nir->info.outputs_written & 3131bf215546Sopenharmony_ci VARYING_BIT_LAYER)) { 3132bf215546Sopenharmony_ci nir_shader *last_vgt_shader = stages[last_vgt_api_stage].nir; 3133bf215546Sopenharmony_ci NIR_PASS(_, last_vgt_shader, radv_lower_multiview); 3134bf215546Sopenharmony_ci } 3135bf215546Sopenharmony_ci 3136bf215546Sopenharmony_ci for (int i = 1; !optimize_conservatively && (i < shader_count); ++i) { 3137bf215546Sopenharmony_ci if (nir_link_opt_varyings(ordered_shaders[i], ordered_shaders[i - 1])) { 3138bf215546Sopenharmony_ci nir_validate_shader(ordered_shaders[i], "after nir_link_opt_varyings"); 3139bf215546Sopenharmony_ci nir_validate_shader(ordered_shaders[i - 1], "after nir_link_opt_varyings"); 3140bf215546Sopenharmony_ci 3141bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i - 1], nir_opt_constant_folding); 3142bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i - 1], nir_opt_algebraic); 3143bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i - 1], nir_opt_dce); 3144bf215546Sopenharmony_ci } 3145bf215546Sopenharmony_ci 3146bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_remove_dead_variables, nir_var_shader_out, NULL); 3147bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i - 1], nir_remove_dead_variables, nir_var_shader_in, NULL); 3148bf215546Sopenharmony_ci 3149bf215546Sopenharmony_ci bool progress = nir_remove_unused_varyings(ordered_shaders[i], ordered_shaders[i - 1]); 3150bf215546Sopenharmony_ci 3151bf215546Sopenharmony_ci nir_compact_varyings(ordered_shaders[i], ordered_shaders[i - 1], true); 3152bf215546Sopenharmony_ci nir_validate_shader(ordered_shaders[i], "after nir_compact_varyings"); 3153bf215546Sopenharmony_ci nir_validate_shader(ordered_shaders[i - 1], "after nir_compact_varyings"); 3154bf215546Sopenharmony_ci if (ordered_shaders[i]->info.stage == MESA_SHADER_MESH) { 3155bf215546Sopenharmony_ci /* nir_compact_varyings can change the location of per-vertex and per-primitive outputs */ 3156bf215546Sopenharmony_ci nir_shader_gather_info(ordered_shaders[i], nir_shader_get_entrypoint(ordered_shaders[i])); 3157bf215546Sopenharmony_ci } 3158bf215546Sopenharmony_ci 3159bf215546Sopenharmony_ci if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL || 3160bf215546Sopenharmony_ci ordered_shaders[i]->info.stage == MESA_SHADER_MESH || 3161bf215546Sopenharmony_ci (ordered_shaders[i]->info.stage == MESA_SHADER_VERTEX && has_geom_tess) || 3162bf215546Sopenharmony_ci (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_EVAL && merged_gs)) { 3163bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_lower_io_to_vector, nir_var_shader_out); 3164bf215546Sopenharmony_ci if (ordered_shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) 3165bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_vectorize_tess_levels); 3166bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_opt_combine_stores, nir_var_shader_out); 3167bf215546Sopenharmony_ci } 3168bf215546Sopenharmony_ci if (ordered_shaders[i - 1]->info.stage == MESA_SHADER_GEOMETRY || 3169bf215546Sopenharmony_ci ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_CTRL || 3170bf215546Sopenharmony_ci ordered_shaders[i - 1]->info.stage == MESA_SHADER_TESS_EVAL) { 3171bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i - 1], nir_lower_io_to_vector, nir_var_shader_in); 3172bf215546Sopenharmony_ci } 3173bf215546Sopenharmony_ci 3174bf215546Sopenharmony_ci if (progress) { 3175bf215546Sopenharmony_ci progress = false; 3176bf215546Sopenharmony_ci NIR_PASS(progress, ordered_shaders[i], nir_lower_global_vars_to_local); 3177bf215546Sopenharmony_ci if (progress) { 3178bf215546Sopenharmony_ci ac_nir_lower_indirect_derefs(ordered_shaders[i], pdevice->rad_info.gfx_level); 3179bf215546Sopenharmony_ci /* remove dead writes, which can remove input loads */ 3180bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_lower_vars_to_ssa); 3181bf215546Sopenharmony_ci NIR_PASS(_, ordered_shaders[i], nir_opt_dce); 3182bf215546Sopenharmony_ci } 3183bf215546Sopenharmony_ci 3184bf215546Sopenharmony_ci progress = false; 3185bf215546Sopenharmony_ci NIR_PASS(progress, ordered_shaders[i - 1], nir_lower_global_vars_to_local); 3186bf215546Sopenharmony_ci if (progress) { 3187bf215546Sopenharmony_ci ac_nir_lower_indirect_derefs(ordered_shaders[i - 1], pdevice->rad_info.gfx_level); 3188bf215546Sopenharmony_ci } 3189bf215546Sopenharmony_ci } 3190bf215546Sopenharmony_ci } 3191bf215546Sopenharmony_ci} 3192bf215546Sopenharmony_ci 3193bf215546Sopenharmony_cistatic void 3194bf215546Sopenharmony_ciradv_set_driver_locations(struct radv_pipeline *pipeline, struct radv_pipeline_stage *stages, 3195bf215546Sopenharmony_ci gl_shader_stage last_vgt_api_stage) 3196bf215546Sopenharmony_ci{ 3197bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->device->physical_device; 3198bf215546Sopenharmony_ci 3199bf215546Sopenharmony_ci if (stages[MESA_SHADER_FRAGMENT].nir) { 3200bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, stages[MESA_SHADER_FRAGMENT].nir) 3201bf215546Sopenharmony_ci { 3202bf215546Sopenharmony_ci var->data.driver_location = var->data.location + var->data.index; 3203bf215546Sopenharmony_ci } 3204bf215546Sopenharmony_ci } 3205bf215546Sopenharmony_ci 3206bf215546Sopenharmony_ci if (stages[MESA_SHADER_MESH].nir) { 3207bf215546Sopenharmony_ci /* ac_nir_lower_ngg ignores driver locations for mesh shaders, 3208bf215546Sopenharmony_ci * but set them to all zero just to be on the safe side. 3209bf215546Sopenharmony_ci */ 3210bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, stages[MESA_SHADER_MESH].nir) { 3211bf215546Sopenharmony_ci var->data.driver_location = 0; 3212bf215546Sopenharmony_ci } 3213bf215546Sopenharmony_ci return; 3214bf215546Sopenharmony_ci } 3215bf215546Sopenharmony_ci 3216bf215546Sopenharmony_ci if (!stages[MESA_SHADER_VERTEX].nir) 3217bf215546Sopenharmony_ci return; 3218bf215546Sopenharmony_ci 3219bf215546Sopenharmony_ci bool has_tess = stages[MESA_SHADER_TESS_CTRL].nir; 3220bf215546Sopenharmony_ci bool has_gs = stages[MESA_SHADER_GEOMETRY].nir; 3221bf215546Sopenharmony_ci 3222bf215546Sopenharmony_ci /* Merged stage for VS and TES */ 3223bf215546Sopenharmony_ci unsigned vs_info_idx = MESA_SHADER_VERTEX; 3224bf215546Sopenharmony_ci unsigned tes_info_idx = MESA_SHADER_TESS_EVAL; 3225bf215546Sopenharmony_ci 3226bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX9) { 3227bf215546Sopenharmony_ci /* These are merged into the next stage */ 3228bf215546Sopenharmony_ci vs_info_idx = has_tess ? MESA_SHADER_TESS_CTRL : MESA_SHADER_GEOMETRY; 3229bf215546Sopenharmony_ci tes_info_idx = has_gs ? MESA_SHADER_GEOMETRY : MESA_SHADER_TESS_EVAL; 3230bf215546Sopenharmony_ci } 3231bf215546Sopenharmony_ci 3232bf215546Sopenharmony_ci nir_foreach_shader_in_variable (var, stages[MESA_SHADER_VERTEX].nir) { 3233bf215546Sopenharmony_ci var->data.driver_location = var->data.location; 3234bf215546Sopenharmony_ci } 3235bf215546Sopenharmony_ci 3236bf215546Sopenharmony_ci if (has_tess) { 3237bf215546Sopenharmony_ci nir_linked_io_var_info vs2tcs = nir_assign_linked_io_var_locations( 3238bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].nir, stages[MESA_SHADER_TESS_CTRL].nir); 3239bf215546Sopenharmony_ci nir_linked_io_var_info tcs2tes = nir_assign_linked_io_var_locations( 3240bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].nir, stages[MESA_SHADER_TESS_EVAL].nir); 3241bf215546Sopenharmony_ci 3242bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.vs.num_linked_outputs = vs2tcs.num_linked_io_vars; 3243bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs = vs2tcs.num_linked_io_vars; 3244bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs = tcs2tes.num_linked_io_vars; 3245bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs = tcs2tes.num_linked_patch_io_vars; 3246bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].info.tes.num_linked_inputs = tcs2tes.num_linked_io_vars; 3247bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].info.tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars; 3248bf215546Sopenharmony_ci 3249bf215546Sopenharmony_ci /* Copy data to merged stage */ 3250bf215546Sopenharmony_ci stages[vs_info_idx].info.vs.num_linked_outputs = vs2tcs.num_linked_io_vars; 3251bf215546Sopenharmony_ci stages[tes_info_idx].info.tes.num_linked_inputs = tcs2tes.num_linked_io_vars; 3252bf215546Sopenharmony_ci stages[tes_info_idx].info.tes.num_linked_patch_inputs = tcs2tes.num_linked_patch_io_vars; 3253bf215546Sopenharmony_ci 3254bf215546Sopenharmony_ci if (has_gs) { 3255bf215546Sopenharmony_ci nir_linked_io_var_info tes2gs = nir_assign_linked_io_var_locations( 3256bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].nir, stages[MESA_SHADER_GEOMETRY].nir); 3257bf215546Sopenharmony_ci 3258bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].info.tes.num_linked_outputs = tes2gs.num_linked_io_vars; 3259bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.gs.num_linked_inputs = tes2gs.num_linked_io_vars; 3260bf215546Sopenharmony_ci 3261bf215546Sopenharmony_ci /* Copy data to merged stage */ 3262bf215546Sopenharmony_ci stages[tes_info_idx].info.tes.num_linked_outputs = tes2gs.num_linked_io_vars; 3263bf215546Sopenharmony_ci } 3264bf215546Sopenharmony_ci } else if (has_gs) { 3265bf215546Sopenharmony_ci nir_linked_io_var_info vs2gs = nir_assign_linked_io_var_locations( 3266bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].nir, stages[MESA_SHADER_GEOMETRY].nir); 3267bf215546Sopenharmony_ci 3268bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.vs.num_linked_outputs = vs2gs.num_linked_io_vars; 3269bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.gs.num_linked_inputs = vs2gs.num_linked_io_vars; 3270bf215546Sopenharmony_ci 3271bf215546Sopenharmony_ci /* Copy data to merged stage */ 3272bf215546Sopenharmony_ci stages[vs_info_idx].info.vs.num_linked_outputs = vs2gs.num_linked_io_vars; 3273bf215546Sopenharmony_ci } 3274bf215546Sopenharmony_ci 3275bf215546Sopenharmony_ci assert(last_vgt_api_stage != MESA_SHADER_NONE); 3276bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, stages[last_vgt_api_stage].nir) 3277bf215546Sopenharmony_ci { 3278bf215546Sopenharmony_ci var->data.driver_location = var->data.location; 3279bf215546Sopenharmony_ci } 3280bf215546Sopenharmony_ci} 3281bf215546Sopenharmony_ci 3282bf215546Sopenharmony_cistatic struct radv_pipeline_key 3283bf215546Sopenharmony_ciradv_generate_pipeline_key(const struct radv_pipeline *pipeline, VkPipelineCreateFlags flags) 3284bf215546Sopenharmony_ci{ 3285bf215546Sopenharmony_ci struct radv_device *device = pipeline->device; 3286bf215546Sopenharmony_ci struct radv_pipeline_key key; 3287bf215546Sopenharmony_ci 3288bf215546Sopenharmony_ci memset(&key, 0, sizeof(key)); 3289bf215546Sopenharmony_ci 3290bf215546Sopenharmony_ci if (flags & VK_PIPELINE_CREATE_DISABLE_OPTIMIZATION_BIT) 3291bf215546Sopenharmony_ci key.optimisations_disabled = 1; 3292bf215546Sopenharmony_ci 3293bf215546Sopenharmony_ci key.disable_aniso_single_level = device->instance->disable_aniso_single_level && 3294bf215546Sopenharmony_ci device->physical_device->rad_info.gfx_level < GFX8; 3295bf215546Sopenharmony_ci 3296bf215546Sopenharmony_ci key.image_2d_view_of_3d = device->image_2d_view_of_3d && 3297bf215546Sopenharmony_ci device->physical_device->rad_info.gfx_level == GFX9; 3298bf215546Sopenharmony_ci 3299bf215546Sopenharmony_ci return key; 3300bf215546Sopenharmony_ci} 3301bf215546Sopenharmony_ci 3302bf215546Sopenharmony_cistatic struct radv_pipeline_key 3303bf215546Sopenharmony_ciradv_generate_graphics_pipeline_key(const struct radv_graphics_pipeline *pipeline, 3304bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 3305bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info, 3306bf215546Sopenharmony_ci const struct radv_blend_state *blend) 3307bf215546Sopenharmony_ci{ 3308bf215546Sopenharmony_ci struct radv_device *device = pipeline->base.device; 3309bf215546Sopenharmony_ci struct radv_pipeline_key key = radv_generate_pipeline_key(&pipeline->base, pCreateInfo->flags); 3310bf215546Sopenharmony_ci 3311bf215546Sopenharmony_ci key.has_multiview_view_index = !!info->ri.view_mask; 3312bf215546Sopenharmony_ci 3313bf215546Sopenharmony_ci if (pipeline->dynamic_states & RADV_DYNAMIC_VERTEX_INPUT) { 3314bf215546Sopenharmony_ci key.vs.dynamic_input_state = true; 3315bf215546Sopenharmony_ci } 3316bf215546Sopenharmony_ci 3317bf215546Sopenharmony_ci /* Vertex input state */ 3318bf215546Sopenharmony_ci key.vs.instance_rate_inputs = info->vi.instance_rate_inputs; 3319bf215546Sopenharmony_ci key.vs.vertex_post_shuffle = info->vi.vertex_post_shuffle; 3320bf215546Sopenharmony_ci 3321bf215546Sopenharmony_ci for (uint32_t i = 0; i < MAX_VERTEX_ATTRIBS; i++) { 3322bf215546Sopenharmony_ci key.vs.instance_rate_divisors[i] = info->vi.instance_rate_divisors[i]; 3323bf215546Sopenharmony_ci key.vs.vertex_attribute_formats[i] = info->vi.vertex_attribute_formats[i]; 3324bf215546Sopenharmony_ci key.vs.vertex_attribute_bindings[i] = info->vi.vertex_attribute_bindings[i]; 3325bf215546Sopenharmony_ci key.vs.vertex_attribute_offsets[i] = info->vi.vertex_attribute_offsets[i]; 3326bf215546Sopenharmony_ci key.vs.vertex_attribute_strides[i] = info->vi.vertex_attribute_strides[i]; 3327bf215546Sopenharmony_ci key.vs.vertex_alpha_adjust[i] = info->vi.vertex_alpha_adjust[i]; 3328bf215546Sopenharmony_ci } 3329bf215546Sopenharmony_ci 3330bf215546Sopenharmony_ci for (uint32_t i = 0; i < MAX_VBS; i++) { 3331bf215546Sopenharmony_ci key.vs.vertex_binding_align[i] = info->vi.vertex_binding_align[i]; 3332bf215546Sopenharmony_ci } 3333bf215546Sopenharmony_ci 3334bf215546Sopenharmony_ci key.tcs.tess_input_vertices = info->ts.patch_control_points; 3335bf215546Sopenharmony_ci 3336bf215546Sopenharmony_ci if (info->ms.raster_samples > 1) { 3337bf215546Sopenharmony_ci uint32_t ps_iter_samples = radv_pipeline_get_ps_iter_samples(info); 3338bf215546Sopenharmony_ci key.ps.num_samples = info->ms.raster_samples; 3339bf215546Sopenharmony_ci key.ps.log2_ps_iter_samples = util_logbase2(ps_iter_samples); 3340bf215546Sopenharmony_ci } 3341bf215546Sopenharmony_ci 3342bf215546Sopenharmony_ci key.ps.col_format = blend->spi_shader_col_format; 3343bf215546Sopenharmony_ci key.ps.cb_target_mask = blend->cb_target_mask; 3344bf215546Sopenharmony_ci key.ps.mrt0_is_dual_src = blend->mrt0_is_dual_src; 3345bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level < GFX8) { 3346bf215546Sopenharmony_ci key.ps.is_int8 = blend->col_format_is_int8; 3347bf215546Sopenharmony_ci key.ps.is_int10 = blend->col_format_is_int10; 3348bf215546Sopenharmony_ci } 3349bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX11) { 3350bf215546Sopenharmony_ci key.ps.alpha_to_coverage_via_mrtz = info->ms.alpha_to_coverage_enable; 3351bf215546Sopenharmony_ci } 3352bf215546Sopenharmony_ci 3353bf215546Sopenharmony_ci key.vs.topology = info->ia.primitive_topology; 3354bf215546Sopenharmony_ci 3355bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX10) { 3356bf215546Sopenharmony_ci key.vs.provoking_vtx_last = info->rs.provoking_vtx_last; 3357bf215546Sopenharmony_ci } 3358bf215546Sopenharmony_ci 3359bf215546Sopenharmony_ci if (device->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE) 3360bf215546Sopenharmony_ci key.ps.lower_discard_to_demote = true; 3361bf215546Sopenharmony_ci 3362bf215546Sopenharmony_ci if (device->instance->enable_mrt_output_nan_fixup) 3363bf215546Sopenharmony_ci key.ps.enable_mrt_output_nan_fixup = blend->col_format_is_float32; 3364bf215546Sopenharmony_ci 3365bf215546Sopenharmony_ci 3366bf215546Sopenharmony_ci key.ps.force_vrs_enabled = device->force_vrs_enabled; 3367bf215546Sopenharmony_ci 3368bf215546Sopenharmony_ci if (device->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM) 3369bf215546Sopenharmony_ci key.invariant_geom = true; 3370bf215546Sopenharmony_ci 3371bf215546Sopenharmony_ci key.use_ngg = device->physical_device->use_ngg; 3372bf215546Sopenharmony_ci 3373bf215546Sopenharmony_ci if ((radv_is_vrs_enabled(pipeline, info) || device->force_vrs_enabled) && 3374bf215546Sopenharmony_ci (device->physical_device->rad_info.family == CHIP_NAVI21 || 3375bf215546Sopenharmony_ci device->physical_device->rad_info.family == CHIP_NAVI22 || 3376bf215546Sopenharmony_ci device->physical_device->rad_info.family == CHIP_VANGOGH)) 3377bf215546Sopenharmony_ci key.adjust_frag_coord_z = true; 3378bf215546Sopenharmony_ci 3379bf215546Sopenharmony_ci if (device->instance->disable_sinking_load_input_fs) 3380bf215546Sopenharmony_ci key.disable_sinking_load_input_fs = true; 3381bf215546Sopenharmony_ci 3382bf215546Sopenharmony_ci if (device->primitives_generated_query) 3383bf215546Sopenharmony_ci key.primitives_generated_query = true; 3384bf215546Sopenharmony_ci 3385bf215546Sopenharmony_ci key.ps.has_epilog = false; /* TODO: hook up PS epilogs */ 3386bf215546Sopenharmony_ci 3387bf215546Sopenharmony_ci return key; 3388bf215546Sopenharmony_ci} 3389bf215546Sopenharmony_ci 3390bf215546Sopenharmony_cistatic uint8_t 3391bf215546Sopenharmony_ciradv_get_wave_size(struct radv_device *device, gl_shader_stage stage, 3392bf215546Sopenharmony_ci const struct radv_shader_info *info) 3393bf215546Sopenharmony_ci{ 3394bf215546Sopenharmony_ci if (stage == MESA_SHADER_GEOMETRY && !info->is_ngg) 3395bf215546Sopenharmony_ci return 64; 3396bf215546Sopenharmony_ci else if (stage == MESA_SHADER_COMPUTE) { 3397bf215546Sopenharmony_ci return info->cs.subgroup_size; 3398bf215546Sopenharmony_ci } else if (stage == MESA_SHADER_FRAGMENT) 3399bf215546Sopenharmony_ci return device->physical_device->ps_wave_size; 3400bf215546Sopenharmony_ci else if (stage == MESA_SHADER_TASK) 3401bf215546Sopenharmony_ci return device->physical_device->cs_wave_size; 3402bf215546Sopenharmony_ci else 3403bf215546Sopenharmony_ci return device->physical_device->ge_wave_size; 3404bf215546Sopenharmony_ci} 3405bf215546Sopenharmony_ci 3406bf215546Sopenharmony_cistatic uint8_t 3407bf215546Sopenharmony_ciradv_get_ballot_bit_size(struct radv_device *device, gl_shader_stage stage, 3408bf215546Sopenharmony_ci const struct radv_shader_info *info) 3409bf215546Sopenharmony_ci{ 3410bf215546Sopenharmony_ci if (stage == MESA_SHADER_COMPUTE && info->cs.subgroup_size) 3411bf215546Sopenharmony_ci return info->cs.subgroup_size; 3412bf215546Sopenharmony_ci return 64; 3413bf215546Sopenharmony_ci} 3414bf215546Sopenharmony_ci 3415bf215546Sopenharmony_cistatic void 3416bf215546Sopenharmony_ciradv_determine_ngg_settings(struct radv_pipeline *pipeline, 3417bf215546Sopenharmony_ci const struct radv_pipeline_key *pipeline_key, 3418bf215546Sopenharmony_ci struct radv_pipeline_stage *stages, 3419bf215546Sopenharmony_ci gl_shader_stage last_vgt_api_stage) 3420bf215546Sopenharmony_ci{ 3421bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->device->physical_device; 3422bf215546Sopenharmony_ci 3423bf215546Sopenharmony_ci /* Shader settings for VS or TES without GS. */ 3424bf215546Sopenharmony_ci if (last_vgt_api_stage == MESA_SHADER_VERTEX || 3425bf215546Sopenharmony_ci last_vgt_api_stage == MESA_SHADER_TESS_EVAL) { 3426bf215546Sopenharmony_ci uint64_t ps_inputs_read = 3427bf215546Sopenharmony_ci stages[MESA_SHADER_FRAGMENT].nir ? stages[MESA_SHADER_FRAGMENT].nir->info.inputs_read : 0; 3428bf215546Sopenharmony_ci gl_shader_stage es_stage = last_vgt_api_stage; 3429bf215546Sopenharmony_ci 3430bf215546Sopenharmony_ci unsigned num_vertices_per_prim = si_conv_prim_to_gs_out(pipeline_key->vs.topology) + 1; 3431bf215546Sopenharmony_ci if (es_stage == MESA_SHADER_TESS_EVAL) 3432bf215546Sopenharmony_ci num_vertices_per_prim = stages[es_stage].nir->info.tess.point_mode ? 1 3433bf215546Sopenharmony_ci : stages[es_stage].nir->info.tess._primitive_mode == TESS_PRIMITIVE_ISOLINES ? 2 3434bf215546Sopenharmony_ci : 3; 3435bf215546Sopenharmony_ci /* TODO: Enable culling for LLVM. */ 3436bf215546Sopenharmony_ci stages[es_stage].info.has_ngg_culling = radv_consider_culling( 3437bf215546Sopenharmony_ci pdevice, stages[es_stage].nir, ps_inputs_read, num_vertices_per_prim, &stages[es_stage].info) && 3438bf215546Sopenharmony_ci !radv_use_llvm_for_stage(pipeline->device, es_stage); 3439bf215546Sopenharmony_ci 3440bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(stages[es_stage].nir); 3441bf215546Sopenharmony_ci stages[es_stage].info.has_ngg_early_prim_export = exec_list_is_singular(&impl->body); 3442bf215546Sopenharmony_ci 3443bf215546Sopenharmony_ci /* Invocations that process an input vertex */ 3444bf215546Sopenharmony_ci const struct gfx10_ngg_info *ngg_info = &stages[es_stage].info.ngg_info; 3445bf215546Sopenharmony_ci unsigned max_vtx_in = MIN2(256, ngg_info->enable_vertex_grouping ? ngg_info->hw_max_esverts : num_vertices_per_prim * ngg_info->max_gsprims); 3446bf215546Sopenharmony_ci 3447bf215546Sopenharmony_ci unsigned lds_bytes_if_culling_off = 0; 3448bf215546Sopenharmony_ci /* We need LDS space when VS needs to export the primitive ID. */ 3449bf215546Sopenharmony_ci if (es_stage == MESA_SHADER_VERTEX && stages[es_stage].info.vs.outinfo.export_prim_id) 3450bf215546Sopenharmony_ci lds_bytes_if_culling_off = max_vtx_in * 4u; 3451bf215546Sopenharmony_ci stages[es_stage].info.num_lds_blocks_when_not_culling = 3452bf215546Sopenharmony_ci DIV_ROUND_UP(lds_bytes_if_culling_off, pdevice->rad_info.lds_encode_granularity); 3453bf215546Sopenharmony_ci 3454bf215546Sopenharmony_ci /* NGG passthrough mode should be disabled when culling and when the vertex shader exports the 3455bf215546Sopenharmony_ci * primitive ID. 3456bf215546Sopenharmony_ci */ 3457bf215546Sopenharmony_ci stages[es_stage].info.is_ngg_passthrough = stages[es_stage].info.is_ngg_passthrough && 3458bf215546Sopenharmony_ci !stages[es_stage].info.has_ngg_culling && 3459bf215546Sopenharmony_ci !(es_stage == MESA_SHADER_VERTEX && 3460bf215546Sopenharmony_ci stages[es_stage].info.vs.outinfo.export_prim_id); 3461bf215546Sopenharmony_ci } 3462bf215546Sopenharmony_ci} 3463bf215546Sopenharmony_ci 3464bf215546Sopenharmony_cistatic void 3465bf215546Sopenharmony_ciradv_fill_shader_info_ngg(struct radv_pipeline *pipeline, 3466bf215546Sopenharmony_ci const struct radv_pipeline_key *pipeline_key, 3467bf215546Sopenharmony_ci struct radv_pipeline_stage *stages) 3468bf215546Sopenharmony_ci{ 3469bf215546Sopenharmony_ci struct radv_device *device = pipeline->device; 3470bf215546Sopenharmony_ci 3471bf215546Sopenharmony_ci if (pipeline_key->use_ngg) { 3472bf215546Sopenharmony_ci if (stages[MESA_SHADER_TESS_CTRL].nir) { 3473bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].info.is_ngg = true; 3474bf215546Sopenharmony_ci } else if (stages[MESA_SHADER_VERTEX].nir) { 3475bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.is_ngg = true; 3476bf215546Sopenharmony_ci } else if (stages[MESA_SHADER_MESH].nir) { 3477bf215546Sopenharmony_ci stages[MESA_SHADER_MESH].info.is_ngg = true; 3478bf215546Sopenharmony_ci } 3479bf215546Sopenharmony_ci 3480bf215546Sopenharmony_ci if (stages[MESA_SHADER_TESS_CTRL].nir && stages[MESA_SHADER_GEOMETRY].nir && 3481bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].nir->info.gs.invocations * 3482bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].nir->info.gs.vertices_out > 3483bf215546Sopenharmony_ci 256) { 3484bf215546Sopenharmony_ci /* Fallback to the legacy path if tessellation is 3485bf215546Sopenharmony_ci * enabled with extreme geometry because 3486bf215546Sopenharmony_ci * EN_MAX_VERT_OUT_PER_GS_INSTANCE doesn't work and it 3487bf215546Sopenharmony_ci * might hang. 3488bf215546Sopenharmony_ci */ 3489bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false; 3490bf215546Sopenharmony_ci 3491bf215546Sopenharmony_ci /* GFX11+ requires NGG. */ 3492bf215546Sopenharmony_ci assert(device->physical_device->rad_info.gfx_level < GFX11); 3493bf215546Sopenharmony_ci } 3494bf215546Sopenharmony_ci 3495bf215546Sopenharmony_ci gl_shader_stage last_xfb_stage = MESA_SHADER_VERTEX; 3496bf215546Sopenharmony_ci 3497bf215546Sopenharmony_ci for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) { 3498bf215546Sopenharmony_ci if (stages[i].nir) 3499bf215546Sopenharmony_ci last_xfb_stage = i; 3500bf215546Sopenharmony_ci } 3501bf215546Sopenharmony_ci 3502bf215546Sopenharmony_ci bool uses_xfb = stages[last_xfb_stage].nir && 3503bf215546Sopenharmony_ci stages[last_xfb_stage].nir->xfb_info; 3504bf215546Sopenharmony_ci 3505bf215546Sopenharmony_ci if (!device->physical_device->use_ngg_streamout && uses_xfb) { 3506bf215546Sopenharmony_ci /* GFX11+ requires NGG. */ 3507bf215546Sopenharmony_ci assert(device->physical_device->rad_info.gfx_level < GFX11); 3508bf215546Sopenharmony_ci 3509bf215546Sopenharmony_ci if (stages[MESA_SHADER_TESS_CTRL].nir) 3510bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].info.is_ngg = false; 3511bf215546Sopenharmony_ci else 3512bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.is_ngg = false; 3513bf215546Sopenharmony_ci } 3514bf215546Sopenharmony_ci 3515bf215546Sopenharmony_ci /* Determine if the pipeline is eligible for the NGG passthrough 3516bf215546Sopenharmony_ci * mode. It can't be enabled for geometry shaders, for NGG 3517bf215546Sopenharmony_ci * streamout or for vertex shaders that export the primitive ID 3518bf215546Sopenharmony_ci * (this is checked later because we don't have the info here.) 3519bf215546Sopenharmony_ci */ 3520bf215546Sopenharmony_ci if (!stages[MESA_SHADER_GEOMETRY].nir && !uses_xfb) { 3521bf215546Sopenharmony_ci if (stages[MESA_SHADER_TESS_CTRL].nir && stages[MESA_SHADER_TESS_EVAL].info.is_ngg) { 3522bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].info.is_ngg_passthrough = true; 3523bf215546Sopenharmony_ci } else if (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.is_ngg) { 3524bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.is_ngg_passthrough = true; 3525bf215546Sopenharmony_ci } 3526bf215546Sopenharmony_ci } 3527bf215546Sopenharmony_ci } 3528bf215546Sopenharmony_ci} 3529bf215546Sopenharmony_ci 3530bf215546Sopenharmony_cistatic void 3531bf215546Sopenharmony_ciradv_fill_shader_info(struct radv_pipeline *pipeline, 3532bf215546Sopenharmony_ci struct radv_pipeline_layout *pipeline_layout, 3533bf215546Sopenharmony_ci const struct radv_pipeline_key *pipeline_key, 3534bf215546Sopenharmony_ci struct radv_pipeline_stage *stages, 3535bf215546Sopenharmony_ci gl_shader_stage last_vgt_api_stage) 3536bf215546Sopenharmony_ci{ 3537bf215546Sopenharmony_ci struct radv_device *device = pipeline->device; 3538bf215546Sopenharmony_ci unsigned active_stages = 0; 3539bf215546Sopenharmony_ci unsigned filled_stages = 0; 3540bf215546Sopenharmony_ci 3541bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { 3542bf215546Sopenharmony_ci if (stages[i].nir) 3543bf215546Sopenharmony_ci active_stages |= (1 << i); 3544bf215546Sopenharmony_ci } 3545bf215546Sopenharmony_ci 3546bf215546Sopenharmony_ci if (stages[MESA_SHADER_TESS_CTRL].nir) { 3547bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.vs.as_ls = true; 3548bf215546Sopenharmony_ci } 3549bf215546Sopenharmony_ci 3550bf215546Sopenharmony_ci if (stages[MESA_SHADER_GEOMETRY].nir) { 3551bf215546Sopenharmony_ci if (stages[MESA_SHADER_TESS_CTRL].nir) 3552bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].info.tes.as_es = true; 3553bf215546Sopenharmony_ci else 3554bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.vs.as_es = true; 3555bf215546Sopenharmony_ci } 3556bf215546Sopenharmony_ci 3557bf215546Sopenharmony_ci if (stages[MESA_SHADER_FRAGMENT].nir) { 3558bf215546Sopenharmony_ci radv_nir_shader_info_init(&stages[MESA_SHADER_FRAGMENT].info); 3559bf215546Sopenharmony_ci radv_nir_shader_info_pass(device, stages[MESA_SHADER_FRAGMENT].nir, pipeline_layout, 3560bf215546Sopenharmony_ci pipeline_key, &stages[MESA_SHADER_FRAGMENT].info); 3561bf215546Sopenharmony_ci 3562bf215546Sopenharmony_ci assert(last_vgt_api_stage != MESA_SHADER_NONE); 3563bf215546Sopenharmony_ci struct radv_shader_info *pre_ps_info = &stages[last_vgt_api_stage].info; 3564bf215546Sopenharmony_ci struct radv_vs_output_info *outinfo = NULL; 3565bf215546Sopenharmony_ci if (last_vgt_api_stage == MESA_SHADER_VERTEX || 3566bf215546Sopenharmony_ci last_vgt_api_stage == MESA_SHADER_GEOMETRY) { 3567bf215546Sopenharmony_ci outinfo = &pre_ps_info->vs.outinfo; 3568bf215546Sopenharmony_ci } else if (last_vgt_api_stage == MESA_SHADER_TESS_EVAL) { 3569bf215546Sopenharmony_ci outinfo = &pre_ps_info->tes.outinfo; 3570bf215546Sopenharmony_ci } else if (last_vgt_api_stage == MESA_SHADER_MESH) { 3571bf215546Sopenharmony_ci outinfo = &pre_ps_info->ms.outinfo; 3572bf215546Sopenharmony_ci } 3573bf215546Sopenharmony_ci 3574bf215546Sopenharmony_ci /* Add PS input requirements to the output of the pre-PS stage. */ 3575bf215546Sopenharmony_ci bool ps_prim_id_in = stages[MESA_SHADER_FRAGMENT].info.ps.prim_id_input; 3576bf215546Sopenharmony_ci bool ps_clip_dists_in = !!stages[MESA_SHADER_FRAGMENT].info.ps.num_input_clips_culls; 3577bf215546Sopenharmony_ci 3578bf215546Sopenharmony_ci assert(outinfo); 3579bf215546Sopenharmony_ci outinfo->export_clip_dists |= ps_clip_dists_in; 3580bf215546Sopenharmony_ci if (last_vgt_api_stage == MESA_SHADER_VERTEX || 3581bf215546Sopenharmony_ci last_vgt_api_stage == MESA_SHADER_TESS_EVAL) { 3582bf215546Sopenharmony_ci outinfo->export_prim_id |= ps_prim_id_in; 3583bf215546Sopenharmony_ci } 3584bf215546Sopenharmony_ci 3585bf215546Sopenharmony_ci filled_stages |= (1 << MESA_SHADER_FRAGMENT); 3586bf215546Sopenharmony_ci } 3587bf215546Sopenharmony_ci 3588bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX9 && 3589bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].nir) { 3590bf215546Sopenharmony_ci struct nir_shader *combined_nir[] = {stages[MESA_SHADER_VERTEX].nir, stages[MESA_SHADER_TESS_CTRL].nir}; 3591bf215546Sopenharmony_ci 3592bf215546Sopenharmony_ci radv_nir_shader_info_init(&stages[MESA_SHADER_TESS_CTRL].info); 3593bf215546Sopenharmony_ci 3594bf215546Sopenharmony_ci /* Copy data to merged stage. */ 3595bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.vs.as_ls = true; 3596bf215546Sopenharmony_ci 3597bf215546Sopenharmony_ci for (int i = 0; i < 2; i++) { 3598bf215546Sopenharmony_ci radv_nir_shader_info_pass(device, combined_nir[i], pipeline_layout, pipeline_key, 3599bf215546Sopenharmony_ci &stages[MESA_SHADER_TESS_CTRL].info); 3600bf215546Sopenharmony_ci } 3601bf215546Sopenharmony_ci 3602bf215546Sopenharmony_ci filled_stages |= (1 << MESA_SHADER_VERTEX); 3603bf215546Sopenharmony_ci filled_stages |= (1 << MESA_SHADER_TESS_CTRL); 3604bf215546Sopenharmony_ci } 3605bf215546Sopenharmony_ci 3606bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX9 && 3607bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].nir) { 3608bf215546Sopenharmony_ci gl_shader_stage pre_stage = 3609bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 3610bf215546Sopenharmony_ci struct nir_shader *combined_nir[] = {stages[pre_stage].nir, stages[MESA_SHADER_GEOMETRY].nir}; 3611bf215546Sopenharmony_ci 3612bf215546Sopenharmony_ci radv_nir_shader_info_init(&stages[MESA_SHADER_GEOMETRY].info); 3613bf215546Sopenharmony_ci 3614bf215546Sopenharmony_ci /* Copy data to merged stage. */ 3615bf215546Sopenharmony_ci if (pre_stage == MESA_SHADER_VERTEX) { 3616bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.vs.as_es = stages[MESA_SHADER_VERTEX].info.vs.as_es; 3617bf215546Sopenharmony_ci } else { 3618bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.tes.as_es = stages[MESA_SHADER_TESS_EVAL].info.tes.as_es; 3619bf215546Sopenharmony_ci } 3620bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.is_ngg = stages[pre_stage].info.is_ngg; 3621bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.gs.es_type = pre_stage; 3622bf215546Sopenharmony_ci 3623bf215546Sopenharmony_ci for (int i = 0; i < 2; i++) { 3624bf215546Sopenharmony_ci radv_nir_shader_info_pass(device, combined_nir[i], pipeline_layout, pipeline_key, 3625bf215546Sopenharmony_ci &stages[MESA_SHADER_GEOMETRY].info); 3626bf215546Sopenharmony_ci } 3627bf215546Sopenharmony_ci 3628bf215546Sopenharmony_ci filled_stages |= (1 << pre_stage); 3629bf215546Sopenharmony_ci filled_stages |= (1 << MESA_SHADER_GEOMETRY); 3630bf215546Sopenharmony_ci } 3631bf215546Sopenharmony_ci 3632bf215546Sopenharmony_ci active_stages ^= filled_stages; 3633bf215546Sopenharmony_ci while (active_stages) { 3634bf215546Sopenharmony_ci int i = u_bit_scan(&active_stages); 3635bf215546Sopenharmony_ci radv_nir_shader_info_init(&stages[i].info); 3636bf215546Sopenharmony_ci radv_nir_shader_info_pass(device, stages[i].nir, pipeline_layout, pipeline_key, 3637bf215546Sopenharmony_ci &stages[i].info); 3638bf215546Sopenharmony_ci } 3639bf215546Sopenharmony_ci 3640bf215546Sopenharmony_ci if (stages[MESA_SHADER_COMPUTE].nir) { 3641bf215546Sopenharmony_ci unsigned subgroup_size = pipeline_key->cs.compute_subgroup_size; 3642bf215546Sopenharmony_ci unsigned req_subgroup_size = subgroup_size; 3643bf215546Sopenharmony_ci bool require_full_subgroups = pipeline_key->cs.require_full_subgroups; 3644bf215546Sopenharmony_ci 3645bf215546Sopenharmony_ci if (!subgroup_size) 3646bf215546Sopenharmony_ci subgroup_size = device->physical_device->cs_wave_size; 3647bf215546Sopenharmony_ci 3648bf215546Sopenharmony_ci unsigned local_size = stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size[0] * 3649bf215546Sopenharmony_ci stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size[1] * 3650bf215546Sopenharmony_ci stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size[2]; 3651bf215546Sopenharmony_ci 3652bf215546Sopenharmony_ci /* Games don't always request full subgroups when they should, 3653bf215546Sopenharmony_ci * which can cause bugs if cswave32 is enabled. 3654bf215546Sopenharmony_ci */ 3655bf215546Sopenharmony_ci if (device->physical_device->cs_wave_size == 32 && 3656bf215546Sopenharmony_ci stages[MESA_SHADER_COMPUTE].nir->info.cs.uses_wide_subgroup_intrinsics && !req_subgroup_size && 3657bf215546Sopenharmony_ci local_size % RADV_SUBGROUP_SIZE == 0) 3658bf215546Sopenharmony_ci require_full_subgroups = true; 3659bf215546Sopenharmony_ci 3660bf215546Sopenharmony_ci if (require_full_subgroups && !req_subgroup_size) { 3661bf215546Sopenharmony_ci /* don't use wave32 pretending to be wave64 */ 3662bf215546Sopenharmony_ci subgroup_size = RADV_SUBGROUP_SIZE; 3663bf215546Sopenharmony_ci } 3664bf215546Sopenharmony_ci 3665bf215546Sopenharmony_ci stages[MESA_SHADER_COMPUTE].info.cs.subgroup_size = subgroup_size; 3666bf215546Sopenharmony_ci } 3667bf215546Sopenharmony_ci 3668bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { 3669bf215546Sopenharmony_ci if (stages[i].nir) { 3670bf215546Sopenharmony_ci stages[i].info.wave_size = radv_get_wave_size(device, i, &stages[i].info); 3671bf215546Sopenharmony_ci stages[i].info.ballot_bit_size = radv_get_ballot_bit_size(device, i, &stages[i].info); 3672bf215546Sopenharmony_ci } 3673bf215546Sopenharmony_ci } 3674bf215546Sopenharmony_ci 3675bf215546Sopenharmony_ci /* PS always operates without workgroups. */ 3676bf215546Sopenharmony_ci if (stages[MESA_SHADER_FRAGMENT].nir) 3677bf215546Sopenharmony_ci stages[MESA_SHADER_FRAGMENT].info.workgroup_size = stages[MESA_SHADER_FRAGMENT].info.wave_size; 3678bf215546Sopenharmony_ci 3679bf215546Sopenharmony_ci if (stages[MESA_SHADER_COMPUTE].nir) { 3680bf215546Sopenharmony_ci /* Variable workgroup size is not supported by Vulkan. */ 3681bf215546Sopenharmony_ci assert(!stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size_variable); 3682bf215546Sopenharmony_ci 3683bf215546Sopenharmony_ci stages[MESA_SHADER_COMPUTE].info.workgroup_size = 3684bf215546Sopenharmony_ci ac_compute_cs_workgroup_size( 3685bf215546Sopenharmony_ci stages[MESA_SHADER_COMPUTE].nir->info.workgroup_size, false, UINT32_MAX); 3686bf215546Sopenharmony_ci } 3687bf215546Sopenharmony_ci 3688bf215546Sopenharmony_ci if (stages[MESA_SHADER_TASK].nir) { 3689bf215546Sopenharmony_ci /* Task/mesh I/O uses the task ring buffers. */ 3690bf215546Sopenharmony_ci stages[MESA_SHADER_TASK].info.cs.uses_task_rings = true; 3691bf215546Sopenharmony_ci stages[MESA_SHADER_MESH].info.cs.uses_task_rings = true; 3692bf215546Sopenharmony_ci 3693bf215546Sopenharmony_ci stages[MESA_SHADER_TASK].info.workgroup_size = 3694bf215546Sopenharmony_ci ac_compute_cs_workgroup_size( 3695bf215546Sopenharmony_ci stages[MESA_SHADER_TASK].nir->info.workgroup_size, false, UINT32_MAX); 3696bf215546Sopenharmony_ci } 3697bf215546Sopenharmony_ci} 3698bf215546Sopenharmony_ci 3699bf215546Sopenharmony_cistatic void 3700bf215546Sopenharmony_ciradv_declare_pipeline_args(struct radv_device *device, struct radv_pipeline_stage *stages, 3701bf215546Sopenharmony_ci const struct radv_pipeline_key *pipeline_key) 3702bf215546Sopenharmony_ci{ 3703bf215546Sopenharmony_ci enum amd_gfx_level gfx_level = device->physical_device->rad_info.gfx_level; 3704bf215546Sopenharmony_ci unsigned active_stages = 0; 3705bf215546Sopenharmony_ci 3706bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { 3707bf215546Sopenharmony_ci if (stages[i].nir) 3708bf215546Sopenharmony_ci active_stages |= (1 << i); 3709bf215546Sopenharmony_ci } 3710bf215546Sopenharmony_ci 3711bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { 3712bf215546Sopenharmony_ci stages[i].args.is_gs_copy_shader = false; 3713bf215546Sopenharmony_ci stages[i].args.explicit_scratch_args = !radv_use_llvm_for_stage(device, i); 3714bf215546Sopenharmony_ci stages[i].args.remap_spi_ps_input = !radv_use_llvm_for_stage(device, i); 3715bf215546Sopenharmony_ci stages[i].args.load_grid_size_from_user_sgpr = device->load_grid_size_from_user_sgpr; 3716bf215546Sopenharmony_ci } 3717bf215546Sopenharmony_ci 3718bf215546Sopenharmony_ci if (gfx_level >= GFX9 && stages[MESA_SHADER_TESS_CTRL].nir) { 3719bf215546Sopenharmony_ci radv_declare_shader_args(gfx_level, pipeline_key, &stages[MESA_SHADER_TESS_CTRL].info, 3720bf215546Sopenharmony_ci MESA_SHADER_TESS_CTRL, true, MESA_SHADER_VERTEX, 3721bf215546Sopenharmony_ci &stages[MESA_SHADER_TESS_CTRL].args); 3722bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.user_sgprs_locs = stages[MESA_SHADER_TESS_CTRL].args.user_sgprs_locs; 3723bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.inline_push_constant_mask = 3724bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].args.ac.inline_push_const_mask; 3725bf215546Sopenharmony_ci 3726bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].args = stages[MESA_SHADER_TESS_CTRL].args; 3727bf215546Sopenharmony_ci active_stages &= ~(1 << MESA_SHADER_VERTEX); 3728bf215546Sopenharmony_ci active_stages &= ~(1 << MESA_SHADER_TESS_CTRL); 3729bf215546Sopenharmony_ci } 3730bf215546Sopenharmony_ci 3731bf215546Sopenharmony_ci if (gfx_level >= GFX9 && stages[MESA_SHADER_GEOMETRY].nir) { 3732bf215546Sopenharmony_ci gl_shader_stage pre_stage = 3733bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 3734bf215546Sopenharmony_ci radv_declare_shader_args(gfx_level, pipeline_key, &stages[MESA_SHADER_GEOMETRY].info, 3735bf215546Sopenharmony_ci MESA_SHADER_GEOMETRY, true, pre_stage, 3736bf215546Sopenharmony_ci &stages[MESA_SHADER_GEOMETRY].args); 3737bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.user_sgprs_locs = stages[MESA_SHADER_GEOMETRY].args.user_sgprs_locs; 3738bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.inline_push_constant_mask = 3739bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].args.ac.inline_push_const_mask; 3740bf215546Sopenharmony_ci 3741bf215546Sopenharmony_ci stages[pre_stage].args = stages[MESA_SHADER_GEOMETRY].args; 3742bf215546Sopenharmony_ci active_stages &= ~(1 << pre_stage); 3743bf215546Sopenharmony_ci active_stages &= ~(1 << MESA_SHADER_GEOMETRY); 3744bf215546Sopenharmony_ci } 3745bf215546Sopenharmony_ci 3746bf215546Sopenharmony_ci u_foreach_bit(i, active_stages) { 3747bf215546Sopenharmony_ci radv_declare_shader_args(gfx_level, pipeline_key, &stages[i].info, i, false, 3748bf215546Sopenharmony_ci MESA_SHADER_VERTEX, &stages[i].args); 3749bf215546Sopenharmony_ci stages[i].info.user_sgprs_locs = stages[i].args.user_sgprs_locs; 3750bf215546Sopenharmony_ci stages[i].info.inline_push_constant_mask = stages[i].args.ac.inline_push_const_mask; 3751bf215546Sopenharmony_ci } 3752bf215546Sopenharmony_ci} 3753bf215546Sopenharmony_ci 3754bf215546Sopenharmony_cistatic void 3755bf215546Sopenharmony_cimerge_tess_info(struct shader_info *tes_info, struct shader_info *tcs_info) 3756bf215546Sopenharmony_ci{ 3757bf215546Sopenharmony_ci /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: 3758bf215546Sopenharmony_ci * 3759bf215546Sopenharmony_ci * "PointMode. Controls generation of points rather than triangles 3760bf215546Sopenharmony_ci * or lines. This functionality defaults to disabled, and is 3761bf215546Sopenharmony_ci * enabled if either shader stage includes the execution mode. 3762bf215546Sopenharmony_ci * 3763bf215546Sopenharmony_ci * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, 3764bf215546Sopenharmony_ci * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, 3765bf215546Sopenharmony_ci * and OutputVertices, it says: 3766bf215546Sopenharmony_ci * 3767bf215546Sopenharmony_ci * "One mode must be set in at least one of the tessellation 3768bf215546Sopenharmony_ci * shader stages." 3769bf215546Sopenharmony_ci * 3770bf215546Sopenharmony_ci * So, the fields can be set in either the TCS or TES, but they must 3771bf215546Sopenharmony_ci * agree if set in both. Our backend looks at TES, so bitwise-or in 3772bf215546Sopenharmony_ci * the values from the TCS. 3773bf215546Sopenharmony_ci */ 3774bf215546Sopenharmony_ci assert(tcs_info->tess.tcs_vertices_out == 0 || tes_info->tess.tcs_vertices_out == 0 || 3775bf215546Sopenharmony_ci tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out); 3776bf215546Sopenharmony_ci tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out; 3777bf215546Sopenharmony_ci 3778bf215546Sopenharmony_ci assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 3779bf215546Sopenharmony_ci tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || 3780bf215546Sopenharmony_ci tcs_info->tess.spacing == tes_info->tess.spacing); 3781bf215546Sopenharmony_ci tes_info->tess.spacing |= tcs_info->tess.spacing; 3782bf215546Sopenharmony_ci 3783bf215546Sopenharmony_ci assert(tcs_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED || 3784bf215546Sopenharmony_ci tes_info->tess._primitive_mode == TESS_PRIMITIVE_UNSPECIFIED || 3785bf215546Sopenharmony_ci tcs_info->tess._primitive_mode == tes_info->tess._primitive_mode); 3786bf215546Sopenharmony_ci tes_info->tess._primitive_mode |= tcs_info->tess._primitive_mode; 3787bf215546Sopenharmony_ci tes_info->tess.ccw |= tcs_info->tess.ccw; 3788bf215546Sopenharmony_ci tes_info->tess.point_mode |= tcs_info->tess.point_mode; 3789bf215546Sopenharmony_ci 3790bf215546Sopenharmony_ci /* Copy the merged info back to the TCS */ 3791bf215546Sopenharmony_ci tcs_info->tess.tcs_vertices_out = tes_info->tess.tcs_vertices_out; 3792bf215546Sopenharmony_ci tcs_info->tess.spacing = tes_info->tess.spacing; 3793bf215546Sopenharmony_ci tcs_info->tess._primitive_mode = tes_info->tess._primitive_mode; 3794bf215546Sopenharmony_ci tcs_info->tess.ccw = tes_info->tess.ccw; 3795bf215546Sopenharmony_ci tcs_info->tess.point_mode = tes_info->tess.point_mode; 3796bf215546Sopenharmony_ci} 3797bf215546Sopenharmony_ci 3798bf215546Sopenharmony_cistatic void 3799bf215546Sopenharmony_cigather_tess_info(struct radv_device *device, struct radv_pipeline_stage *stages, 3800bf215546Sopenharmony_ci const struct radv_pipeline_key *pipeline_key) 3801bf215546Sopenharmony_ci{ 3802bf215546Sopenharmony_ci merge_tess_info(&stages[MESA_SHADER_TESS_EVAL].nir->info, 3803bf215546Sopenharmony_ci &stages[MESA_SHADER_TESS_CTRL].nir->info); 3804bf215546Sopenharmony_ci 3805bf215546Sopenharmony_ci unsigned tess_in_patch_size = pipeline_key->tcs.tess_input_vertices; 3806bf215546Sopenharmony_ci unsigned tess_out_patch_size = stages[MESA_SHADER_TESS_CTRL].nir->info.tess.tcs_vertices_out; 3807bf215546Sopenharmony_ci 3808bf215546Sopenharmony_ci /* Number of tessellation patches per workgroup processed by the current pipeline. */ 3809bf215546Sopenharmony_ci unsigned num_patches = get_tcs_num_patches( 3810bf215546Sopenharmony_ci tess_in_patch_size, tess_out_patch_size, 3811bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs, 3812bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs, 3813bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs, 3814bf215546Sopenharmony_ci device->physical_device->hs.tess_offchip_block_dw_size, device->physical_device->rad_info.gfx_level, 3815bf215546Sopenharmony_ci device->physical_device->rad_info.family); 3816bf215546Sopenharmony_ci 3817bf215546Sopenharmony_ci /* LDS size used by VS+TCS for storing TCS inputs and outputs. */ 3818bf215546Sopenharmony_ci unsigned tcs_lds_size = calculate_tess_lds_size( 3819bf215546Sopenharmony_ci device->physical_device->rad_info.gfx_level, tess_in_patch_size, tess_out_patch_size, 3820bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_inputs, num_patches, 3821bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_outputs, 3822bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.num_linked_patch_outputs); 3823bf215546Sopenharmony_ci 3824bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.num_tess_patches = num_patches; 3825bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.num_lds_blocks = tcs_lds_size; 3826bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.tes_reads_tess_factors = 3827bf215546Sopenharmony_ci !!(stages[MESA_SHADER_TESS_EVAL].nir->info.inputs_read & 3828bf215546Sopenharmony_ci (VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER)); 3829bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.tes_inputs_read = stages[MESA_SHADER_TESS_EVAL].nir->info.inputs_read; 3830bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.tes_patch_inputs_read = 3831bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].nir->info.patch_inputs_read; 3832bf215546Sopenharmony_ci 3833bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].info.num_tess_patches = num_patches; 3834bf215546Sopenharmony_ci stages[MESA_SHADER_GEOMETRY].info.num_tess_patches = num_patches; 3835bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.num_tess_patches = num_patches; 3836bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.tcs.tcs_vertices_out = tess_out_patch_size; 3837bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.tcs.tcs_vertices_out = tess_out_patch_size; 3838bf215546Sopenharmony_ci 3839bf215546Sopenharmony_ci if (!radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX)) { 3840bf215546Sopenharmony_ci /* When the number of TCS input and output vertices are the same (typically 3): 3841bf215546Sopenharmony_ci * - There is an equal amount of LS and HS invocations 3842bf215546Sopenharmony_ci * - In case of merged LSHS shaders, the LS and HS halves of the shader 3843bf215546Sopenharmony_ci * always process the exact same vertex. We can use this knowledge to optimize them. 3844bf215546Sopenharmony_ci * 3845bf215546Sopenharmony_ci * We don't set tcs_in_out_eq if the float controls differ because that might 3846bf215546Sopenharmony_ci * involve different float modes for the same block and our optimizer 3847bf215546Sopenharmony_ci * doesn't handle a instruction dominating another with a different mode. 3848bf215546Sopenharmony_ci */ 3849bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.vs.tcs_in_out_eq = 3850bf215546Sopenharmony_ci device->physical_device->rad_info.gfx_level >= GFX9 && 3851bf215546Sopenharmony_ci tess_in_patch_size == tess_out_patch_size && 3852bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].nir->info.float_controls_execution_mode == 3853bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].nir->info.float_controls_execution_mode; 3854bf215546Sopenharmony_ci 3855bf215546Sopenharmony_ci if (stages[MESA_SHADER_VERTEX].info.vs.tcs_in_out_eq) 3856bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.vs.tcs_temp_only_input_mask = 3857bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].nir->info.inputs_read & 3858bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].nir->info.outputs_written & 3859bf215546Sopenharmony_ci ~stages[MESA_SHADER_TESS_CTRL].nir->info.tess.tcs_cross_invocation_inputs_read & 3860bf215546Sopenharmony_ci ~stages[MESA_SHADER_TESS_CTRL].nir->info.inputs_read_indirectly & 3861bf215546Sopenharmony_ci ~stages[MESA_SHADER_VERTEX].nir->info.outputs_accessed_indirectly; 3862bf215546Sopenharmony_ci 3863bf215546Sopenharmony_ci /* Copy data to TCS so it can be accessed by the backend if they are merged. */ 3864bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.vs.tcs_in_out_eq = stages[MESA_SHADER_VERTEX].info.vs.tcs_in_out_eq; 3865bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].info.vs.tcs_temp_only_input_mask = 3866bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].info.vs.tcs_temp_only_input_mask; 3867bf215546Sopenharmony_ci } 3868bf215546Sopenharmony_ci 3869bf215546Sopenharmony_ci for (gl_shader_stage s = MESA_SHADER_VERTEX; s <= MESA_SHADER_TESS_CTRL; ++s) 3870bf215546Sopenharmony_ci stages[s].info.workgroup_size = 3871bf215546Sopenharmony_ci ac_compute_lshs_workgroup_size(device->physical_device->rad_info.gfx_level, s, num_patches, 3872bf215546Sopenharmony_ci tess_in_patch_size, tess_out_patch_size); 3873bf215546Sopenharmony_ci} 3874bf215546Sopenharmony_ci 3875bf215546Sopenharmony_cistatic bool 3876bf215546Sopenharmony_cimem_vectorize_callback(unsigned align_mul, unsigned align_offset, unsigned bit_size, 3877bf215546Sopenharmony_ci unsigned num_components, nir_intrinsic_instr *low, nir_intrinsic_instr *high, 3878bf215546Sopenharmony_ci void *data) 3879bf215546Sopenharmony_ci{ 3880bf215546Sopenharmony_ci if (num_components > 4) 3881bf215546Sopenharmony_ci return false; 3882bf215546Sopenharmony_ci 3883bf215546Sopenharmony_ci /* >128 bit loads are split except with SMEM */ 3884bf215546Sopenharmony_ci if (bit_size * num_components > 128) 3885bf215546Sopenharmony_ci return false; 3886bf215546Sopenharmony_ci 3887bf215546Sopenharmony_ci uint32_t align; 3888bf215546Sopenharmony_ci if (align_offset) 3889bf215546Sopenharmony_ci align = 1 << (ffs(align_offset) - 1); 3890bf215546Sopenharmony_ci else 3891bf215546Sopenharmony_ci align = align_mul; 3892bf215546Sopenharmony_ci 3893bf215546Sopenharmony_ci switch (low->intrinsic) { 3894bf215546Sopenharmony_ci case nir_intrinsic_load_global: 3895bf215546Sopenharmony_ci case nir_intrinsic_store_global: 3896bf215546Sopenharmony_ci case nir_intrinsic_store_ssbo: 3897bf215546Sopenharmony_ci case nir_intrinsic_load_ssbo: 3898bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 3899bf215546Sopenharmony_ci case nir_intrinsic_load_push_constant: { 3900bf215546Sopenharmony_ci unsigned max_components; 3901bf215546Sopenharmony_ci if (align % 4 == 0) 3902bf215546Sopenharmony_ci max_components = NIR_MAX_VEC_COMPONENTS; 3903bf215546Sopenharmony_ci else if (align % 2 == 0) 3904bf215546Sopenharmony_ci max_components = 16u / bit_size; 3905bf215546Sopenharmony_ci else 3906bf215546Sopenharmony_ci max_components = 8u / bit_size; 3907bf215546Sopenharmony_ci return (align % (bit_size / 8u)) == 0 && num_components <= max_components; 3908bf215546Sopenharmony_ci } 3909bf215546Sopenharmony_ci case nir_intrinsic_load_deref: 3910bf215546Sopenharmony_ci case nir_intrinsic_store_deref: 3911bf215546Sopenharmony_ci assert(nir_deref_mode_is(nir_src_as_deref(low->src[0]), nir_var_mem_shared)); 3912bf215546Sopenharmony_ci FALLTHROUGH; 3913bf215546Sopenharmony_ci case nir_intrinsic_load_shared: 3914bf215546Sopenharmony_ci case nir_intrinsic_store_shared: 3915bf215546Sopenharmony_ci if (bit_size * num_components == 3916bf215546Sopenharmony_ci 96) { /* 96 bit loads require 128 bit alignment and are split otherwise */ 3917bf215546Sopenharmony_ci return align % 16 == 0; 3918bf215546Sopenharmony_ci } else if (bit_size == 16 && (align % 4)) { 3919bf215546Sopenharmony_ci /* AMD hardware can't do 2-byte aligned f16vec2 loads, but they are useful for ALU 3920bf215546Sopenharmony_ci * vectorization, because our vectorizer requires the scalar IR to already contain vectors. 3921bf215546Sopenharmony_ci */ 3922bf215546Sopenharmony_ci return (align % 2 == 0) && num_components <= 2; 3923bf215546Sopenharmony_ci } else { 3924bf215546Sopenharmony_ci if (num_components == 3) { 3925bf215546Sopenharmony_ci /* AMD hardware can't do 3-component loads except for 96-bit loads, handled above. */ 3926bf215546Sopenharmony_ci return false; 3927bf215546Sopenharmony_ci } 3928bf215546Sopenharmony_ci unsigned req = bit_size * num_components; 3929bf215546Sopenharmony_ci if (req == 64 || req == 128) /* 64-bit and 128-bit loads can use ds_read2_b{32,64} */ 3930bf215546Sopenharmony_ci req /= 2u; 3931bf215546Sopenharmony_ci return align % (req / 8u) == 0; 3932bf215546Sopenharmony_ci } 3933bf215546Sopenharmony_ci default: 3934bf215546Sopenharmony_ci return false; 3935bf215546Sopenharmony_ci } 3936bf215546Sopenharmony_ci return false; 3937bf215546Sopenharmony_ci} 3938bf215546Sopenharmony_ci 3939bf215546Sopenharmony_cistatic unsigned 3940bf215546Sopenharmony_cilower_bit_size_callback(const nir_instr *instr, void *_) 3941bf215546Sopenharmony_ci{ 3942bf215546Sopenharmony_ci struct radv_device *device = _; 3943bf215546Sopenharmony_ci enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level; 3944bf215546Sopenharmony_ci 3945bf215546Sopenharmony_ci if (instr->type != nir_instr_type_alu) 3946bf215546Sopenharmony_ci return 0; 3947bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(instr); 3948bf215546Sopenharmony_ci 3949bf215546Sopenharmony_ci /* If an instruction is not scalarized by this point, 3950bf215546Sopenharmony_ci * it can be emitted as packed instruction */ 3951bf215546Sopenharmony_ci if (alu->dest.dest.ssa.num_components > 1) 3952bf215546Sopenharmony_ci return 0; 3953bf215546Sopenharmony_ci 3954bf215546Sopenharmony_ci if (alu->dest.dest.ssa.bit_size & (8 | 16)) { 3955bf215546Sopenharmony_ci unsigned bit_size = alu->dest.dest.ssa.bit_size; 3956bf215546Sopenharmony_ci switch (alu->op) { 3957bf215546Sopenharmony_ci case nir_op_bitfield_select: 3958bf215546Sopenharmony_ci case nir_op_imul_high: 3959bf215546Sopenharmony_ci case nir_op_umul_high: 3960bf215546Sopenharmony_ci return 32; 3961bf215546Sopenharmony_ci case nir_op_iabs: 3962bf215546Sopenharmony_ci case nir_op_imax: 3963bf215546Sopenharmony_ci case nir_op_umax: 3964bf215546Sopenharmony_ci case nir_op_imin: 3965bf215546Sopenharmony_ci case nir_op_umin: 3966bf215546Sopenharmony_ci case nir_op_ishr: 3967bf215546Sopenharmony_ci case nir_op_ushr: 3968bf215546Sopenharmony_ci case nir_op_ishl: 3969bf215546Sopenharmony_ci case nir_op_isign: 3970bf215546Sopenharmony_ci case nir_op_uadd_sat: 3971bf215546Sopenharmony_ci case nir_op_usub_sat: 3972bf215546Sopenharmony_ci return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 3973bf215546Sopenharmony_ci : 0; 3974bf215546Sopenharmony_ci case nir_op_iadd_sat: 3975bf215546Sopenharmony_ci case nir_op_isub_sat: 3976bf215546Sopenharmony_ci return bit_size == 8 || !nir_dest_is_divergent(alu->dest.dest) ? 32 : 0; 3977bf215546Sopenharmony_ci 3978bf215546Sopenharmony_ci default: 3979bf215546Sopenharmony_ci return 0; 3980bf215546Sopenharmony_ci } 3981bf215546Sopenharmony_ci } 3982bf215546Sopenharmony_ci 3983bf215546Sopenharmony_ci if (nir_src_bit_size(alu->src[0].src) & (8 | 16)) { 3984bf215546Sopenharmony_ci unsigned bit_size = nir_src_bit_size(alu->src[0].src); 3985bf215546Sopenharmony_ci switch (alu->op) { 3986bf215546Sopenharmony_ci case nir_op_bit_count: 3987bf215546Sopenharmony_ci case nir_op_find_lsb: 3988bf215546Sopenharmony_ci case nir_op_ufind_msb: 3989bf215546Sopenharmony_ci case nir_op_i2b1: 3990bf215546Sopenharmony_ci return 32; 3991bf215546Sopenharmony_ci case nir_op_ilt: 3992bf215546Sopenharmony_ci case nir_op_ige: 3993bf215546Sopenharmony_ci case nir_op_ieq: 3994bf215546Sopenharmony_ci case nir_op_ine: 3995bf215546Sopenharmony_ci case nir_op_ult: 3996bf215546Sopenharmony_ci case nir_op_uge: 3997bf215546Sopenharmony_ci return (bit_size == 8 || !(chip >= GFX8 && nir_dest_is_divergent(alu->dest.dest))) ? 32 3998bf215546Sopenharmony_ci : 0; 3999bf215546Sopenharmony_ci default: 4000bf215546Sopenharmony_ci return 0; 4001bf215546Sopenharmony_ci } 4002bf215546Sopenharmony_ci } 4003bf215546Sopenharmony_ci 4004bf215546Sopenharmony_ci return 0; 4005bf215546Sopenharmony_ci} 4006bf215546Sopenharmony_ci 4007bf215546Sopenharmony_cistatic uint8_t 4008bf215546Sopenharmony_ciopt_vectorize_callback(const nir_instr *instr, const void *_) 4009bf215546Sopenharmony_ci{ 4010bf215546Sopenharmony_ci if (instr->type != nir_instr_type_alu) 4011bf215546Sopenharmony_ci return 0; 4012bf215546Sopenharmony_ci 4013bf215546Sopenharmony_ci const struct radv_device *device = _; 4014bf215546Sopenharmony_ci enum amd_gfx_level chip = device->physical_device->rad_info.gfx_level; 4015bf215546Sopenharmony_ci if (chip < GFX9) 4016bf215546Sopenharmony_ci return 1; 4017bf215546Sopenharmony_ci 4018bf215546Sopenharmony_ci const nir_alu_instr *alu = nir_instr_as_alu(instr); 4019bf215546Sopenharmony_ci const unsigned bit_size = alu->dest.dest.ssa.bit_size; 4020bf215546Sopenharmony_ci if (bit_size != 16) 4021bf215546Sopenharmony_ci return 1; 4022bf215546Sopenharmony_ci 4023bf215546Sopenharmony_ci switch (alu->op) { 4024bf215546Sopenharmony_ci case nir_op_fadd: 4025bf215546Sopenharmony_ci case nir_op_fsub: 4026bf215546Sopenharmony_ci case nir_op_fmul: 4027bf215546Sopenharmony_ci case nir_op_ffma: 4028bf215546Sopenharmony_ci case nir_op_fdiv: 4029bf215546Sopenharmony_ci case nir_op_flrp: 4030bf215546Sopenharmony_ci case nir_op_fabs: 4031bf215546Sopenharmony_ci case nir_op_fneg: 4032bf215546Sopenharmony_ci case nir_op_fsat: 4033bf215546Sopenharmony_ci case nir_op_fmin: 4034bf215546Sopenharmony_ci case nir_op_fmax: 4035bf215546Sopenharmony_ci case nir_op_iabs: 4036bf215546Sopenharmony_ci case nir_op_iadd: 4037bf215546Sopenharmony_ci case nir_op_iadd_sat: 4038bf215546Sopenharmony_ci case nir_op_uadd_sat: 4039bf215546Sopenharmony_ci case nir_op_isub: 4040bf215546Sopenharmony_ci case nir_op_isub_sat: 4041bf215546Sopenharmony_ci case nir_op_usub_sat: 4042bf215546Sopenharmony_ci case nir_op_ineg: 4043bf215546Sopenharmony_ci case nir_op_imul: 4044bf215546Sopenharmony_ci case nir_op_imin: 4045bf215546Sopenharmony_ci case nir_op_imax: 4046bf215546Sopenharmony_ci case nir_op_umin: 4047bf215546Sopenharmony_ci case nir_op_umax: 4048bf215546Sopenharmony_ci return 2; 4049bf215546Sopenharmony_ci case nir_op_ishl: /* TODO: in NIR, these have 32bit shift operands */ 4050bf215546Sopenharmony_ci case nir_op_ishr: /* while Radeon needs 16bit operands when vectorized */ 4051bf215546Sopenharmony_ci case nir_op_ushr: 4052bf215546Sopenharmony_ci default: 4053bf215546Sopenharmony_ci return 1; 4054bf215546Sopenharmony_ci } 4055bf215546Sopenharmony_ci} 4056bf215546Sopenharmony_ci 4057bf215546Sopenharmony_cistatic nir_component_mask_t 4058bf215546Sopenharmony_cinon_uniform_access_callback(const nir_src *src, void *_) 4059bf215546Sopenharmony_ci{ 4060bf215546Sopenharmony_ci if (src->ssa->num_components == 1) 4061bf215546Sopenharmony_ci return 0x1; 4062bf215546Sopenharmony_ci return nir_chase_binding(*src).success ? 0x2 : 0x3; 4063bf215546Sopenharmony_ci} 4064bf215546Sopenharmony_ci 4065bf215546Sopenharmony_ci 4066bf215546Sopenharmony_ciVkResult 4067bf215546Sopenharmony_ciradv_upload_shaders(struct radv_device *device, struct radv_pipeline *pipeline, 4068bf215546Sopenharmony_ci struct radv_shader_binary **binaries, struct radv_shader_binary *gs_copy_binary) 4069bf215546Sopenharmony_ci{ 4070bf215546Sopenharmony_ci uint32_t code_size = 0; 4071bf215546Sopenharmony_ci 4072bf215546Sopenharmony_ci /* Compute the total code size. */ 4073bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { 4074bf215546Sopenharmony_ci struct radv_shader *shader = pipeline->shaders[i]; 4075bf215546Sopenharmony_ci if (!shader) 4076bf215546Sopenharmony_ci continue; 4077bf215546Sopenharmony_ci 4078bf215546Sopenharmony_ci code_size += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT); 4079bf215546Sopenharmony_ci } 4080bf215546Sopenharmony_ci 4081bf215546Sopenharmony_ci if (pipeline->gs_copy_shader) { 4082bf215546Sopenharmony_ci code_size += align(pipeline->gs_copy_shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT); 4083bf215546Sopenharmony_ci } 4084bf215546Sopenharmony_ci 4085bf215546Sopenharmony_ci /* Allocate memory for all shader binaries. */ 4086bf215546Sopenharmony_ci pipeline->slab = radv_pipeline_slab_create(device, pipeline, code_size); 4087bf215546Sopenharmony_ci if (!pipeline->slab) 4088bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_DEVICE_MEMORY; 4089bf215546Sopenharmony_ci 4090bf215546Sopenharmony_ci pipeline->slab_bo = pipeline->slab->alloc->arena->bo; 4091bf215546Sopenharmony_ci 4092bf215546Sopenharmony_ci /* Upload shader binaries. */ 4093bf215546Sopenharmony_ci uint64_t slab_va = radv_buffer_get_va(pipeline->slab_bo); 4094bf215546Sopenharmony_ci uint32_t slab_offset = pipeline->slab->alloc->offset; 4095bf215546Sopenharmony_ci char *slab_ptr = pipeline->slab->alloc->arena->ptr; 4096bf215546Sopenharmony_ci 4097bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { 4098bf215546Sopenharmony_ci struct radv_shader *shader = pipeline->shaders[i]; 4099bf215546Sopenharmony_ci if (!shader) 4100bf215546Sopenharmony_ci continue; 4101bf215546Sopenharmony_ci 4102bf215546Sopenharmony_ci shader->va = slab_va + slab_offset; 4103bf215546Sopenharmony_ci 4104bf215546Sopenharmony_ci void *dest_ptr = slab_ptr + slab_offset; 4105bf215546Sopenharmony_ci if (!radv_shader_binary_upload(device, binaries[i], shader, dest_ptr)) 4106bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 4107bf215546Sopenharmony_ci 4108bf215546Sopenharmony_ci slab_offset += align(shader->code_size, RADV_SHADER_ALLOC_ALIGNMENT); 4109bf215546Sopenharmony_ci } 4110bf215546Sopenharmony_ci 4111bf215546Sopenharmony_ci if (pipeline->gs_copy_shader) { 4112bf215546Sopenharmony_ci pipeline->gs_copy_shader->va = slab_va + slab_offset; 4113bf215546Sopenharmony_ci 4114bf215546Sopenharmony_ci void *dest_ptr = slab_ptr + slab_offset; 4115bf215546Sopenharmony_ci if (!radv_shader_binary_upload(device, gs_copy_binary, pipeline->gs_copy_shader, dest_ptr)) 4116bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 4117bf215546Sopenharmony_ci } 4118bf215546Sopenharmony_ci 4119bf215546Sopenharmony_ci return VK_SUCCESS; 4120bf215546Sopenharmony_ci} 4121bf215546Sopenharmony_ci 4122bf215546Sopenharmony_cistatic bool 4123bf215546Sopenharmony_ciradv_consider_force_vrs(const struct radv_pipeline *pipeline, bool noop_fs, 4124bf215546Sopenharmony_ci const struct radv_pipeline_stage *stages, 4125bf215546Sopenharmony_ci gl_shader_stage last_vgt_api_stage) 4126bf215546Sopenharmony_ci{ 4127bf215546Sopenharmony_ci struct radv_device *device = pipeline->device; 4128bf215546Sopenharmony_ci 4129bf215546Sopenharmony_ci if (!device->force_vrs_enabled) 4130bf215546Sopenharmony_ci return false; 4131bf215546Sopenharmony_ci 4132bf215546Sopenharmony_ci if (last_vgt_api_stage != MESA_SHADER_VERTEX && 4133bf215546Sopenharmony_ci last_vgt_api_stage != MESA_SHADER_TESS_EVAL && 4134bf215546Sopenharmony_ci last_vgt_api_stage != MESA_SHADER_GEOMETRY) 4135bf215546Sopenharmony_ci return false; 4136bf215546Sopenharmony_ci 4137bf215546Sopenharmony_ci nir_shader *last_vgt_shader = stages[last_vgt_api_stage].nir; 4138bf215546Sopenharmony_ci if (last_vgt_shader->info.outputs_written & BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_SHADING_RATE)) 4139bf215546Sopenharmony_ci return false; 4140bf215546Sopenharmony_ci 4141bf215546Sopenharmony_ci /* VRS has no effect if there is no pixel shader. */ 4142bf215546Sopenharmony_ci if (noop_fs) 4143bf215546Sopenharmony_ci return false; 4144bf215546Sopenharmony_ci 4145bf215546Sopenharmony_ci /* Do not enable if the PS uses gl_FragCoord because it breaks postprocessing in some games. */ 4146bf215546Sopenharmony_ci nir_shader *fs_shader = stages[MESA_SHADER_FRAGMENT].nir; 4147bf215546Sopenharmony_ci if (fs_shader && 4148bf215546Sopenharmony_ci BITSET_TEST(fs_shader->info.system_values_read, SYSTEM_VALUE_FRAG_COORD)) { 4149bf215546Sopenharmony_ci return false; 4150bf215546Sopenharmony_ci } 4151bf215546Sopenharmony_ci 4152bf215546Sopenharmony_ci return true; 4153bf215546Sopenharmony_ci} 4154bf215546Sopenharmony_ci 4155bf215546Sopenharmony_cistatic nir_ssa_def * 4156bf215546Sopenharmony_ciradv_adjust_vertex_fetch_alpha(nir_builder *b, 4157bf215546Sopenharmony_ci enum radv_vs_input_alpha_adjust alpha_adjust, 4158bf215546Sopenharmony_ci nir_ssa_def *alpha) 4159bf215546Sopenharmony_ci{ 4160bf215546Sopenharmony_ci if (alpha_adjust == ALPHA_ADJUST_SSCALED) 4161bf215546Sopenharmony_ci alpha = nir_f2u32(b, alpha); 4162bf215546Sopenharmony_ci 4163bf215546Sopenharmony_ci /* For the integer-like cases, do a natural sign extension. 4164bf215546Sopenharmony_ci * 4165bf215546Sopenharmony_ci * For the SNORM case, the values are 0.0, 0.333, 0.666, 1.0 and happen to contain 0, 1, 2, 3 as 4166bf215546Sopenharmony_ci * the two LSBs of the exponent. 4167bf215546Sopenharmony_ci */ 4168bf215546Sopenharmony_ci unsigned offset = alpha_adjust == ALPHA_ADJUST_SNORM ? 23u : 0u; 4169bf215546Sopenharmony_ci 4170bf215546Sopenharmony_ci alpha = nir_ibfe_imm(b, alpha, offset, 2u); 4171bf215546Sopenharmony_ci 4172bf215546Sopenharmony_ci /* Convert back to the right type. */ 4173bf215546Sopenharmony_ci if (alpha_adjust == ALPHA_ADJUST_SNORM) { 4174bf215546Sopenharmony_ci alpha = nir_i2f32(b, alpha); 4175bf215546Sopenharmony_ci alpha = nir_fmax(b, alpha, nir_imm_float(b, -1.0f)); 4176bf215546Sopenharmony_ci } else if (alpha_adjust == ALPHA_ADJUST_SSCALED) { 4177bf215546Sopenharmony_ci alpha = nir_i2f32(b, alpha); 4178bf215546Sopenharmony_ci } 4179bf215546Sopenharmony_ci 4180bf215546Sopenharmony_ci return alpha; 4181bf215546Sopenharmony_ci} 4182bf215546Sopenharmony_ci 4183bf215546Sopenharmony_cistatic bool 4184bf215546Sopenharmony_ciradv_lower_vs_input(nir_shader *nir, const struct radv_pipeline_key *pipeline_key) 4185bf215546Sopenharmony_ci{ 4186bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(nir); 4187bf215546Sopenharmony_ci bool progress = false; 4188bf215546Sopenharmony_ci 4189bf215546Sopenharmony_ci if (pipeline_key->vs.dynamic_input_state) 4190bf215546Sopenharmony_ci return false; 4191bf215546Sopenharmony_ci 4192bf215546Sopenharmony_ci nir_builder b; 4193bf215546Sopenharmony_ci nir_builder_init(&b, impl); 4194bf215546Sopenharmony_ci 4195bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 4196bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 4197bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 4198bf215546Sopenharmony_ci continue; 4199bf215546Sopenharmony_ci 4200bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 4201bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_load_input) 4202bf215546Sopenharmony_ci continue; 4203bf215546Sopenharmony_ci 4204bf215546Sopenharmony_ci unsigned location = nir_intrinsic_base(intrin) - VERT_ATTRIB_GENERIC0; 4205bf215546Sopenharmony_ci enum radv_vs_input_alpha_adjust alpha_adjust = pipeline_key->vs.vertex_alpha_adjust[location]; 4206bf215546Sopenharmony_ci bool post_shuffle = pipeline_key->vs.vertex_post_shuffle & (1 << location); 4207bf215546Sopenharmony_ci 4208bf215546Sopenharmony_ci unsigned component = nir_intrinsic_component(intrin); 4209bf215546Sopenharmony_ci unsigned num_components = intrin->dest.ssa.num_components; 4210bf215546Sopenharmony_ci 4211bf215546Sopenharmony_ci unsigned attrib_format = pipeline_key->vs.vertex_attribute_formats[location]; 4212bf215546Sopenharmony_ci unsigned dfmt = attrib_format & 0xf; 4213bf215546Sopenharmony_ci unsigned nfmt = (attrib_format >> 4) & 0x7; 4214bf215546Sopenharmony_ci const struct ac_data_format_info *vtx_info = ac_get_data_format_info(dfmt); 4215bf215546Sopenharmony_ci bool is_float = 4216bf215546Sopenharmony_ci nfmt != V_008F0C_BUF_NUM_FORMAT_UINT && nfmt != V_008F0C_BUF_NUM_FORMAT_SINT; 4217bf215546Sopenharmony_ci 4218bf215546Sopenharmony_ci unsigned mask = nir_ssa_def_components_read(&intrin->dest.ssa) << component; 4219bf215546Sopenharmony_ci unsigned num_channels = MIN2(util_last_bit(mask), vtx_info->num_channels); 4220bf215546Sopenharmony_ci 4221bf215546Sopenharmony_ci static const unsigned swizzle_normal[4] = {0, 1, 2, 3}; 4222bf215546Sopenharmony_ci static const unsigned swizzle_post_shuffle[4] = {2, 1, 0, 3}; 4223bf215546Sopenharmony_ci const unsigned *swizzle = post_shuffle ? swizzle_post_shuffle : swizzle_normal; 4224bf215546Sopenharmony_ci 4225bf215546Sopenharmony_ci b.cursor = nir_after_instr(instr); 4226bf215546Sopenharmony_ci nir_ssa_def *channels[4]; 4227bf215546Sopenharmony_ci 4228bf215546Sopenharmony_ci if (post_shuffle) { 4229bf215546Sopenharmony_ci /* Expand to load 3 components because it's shuffled like X<->Z. */ 4230bf215546Sopenharmony_ci intrin->num_components = MAX2(component + num_components, 3); 4231bf215546Sopenharmony_ci intrin->dest.ssa.num_components = intrin->num_components; 4232bf215546Sopenharmony_ci 4233bf215546Sopenharmony_ci nir_intrinsic_set_component(intrin, 0); 4234bf215546Sopenharmony_ci 4235bf215546Sopenharmony_ci num_channels = MAX2(num_channels, 3); 4236bf215546Sopenharmony_ci } 4237bf215546Sopenharmony_ci 4238bf215546Sopenharmony_ci for (uint32_t i = 0; i < num_components; i++) { 4239bf215546Sopenharmony_ci unsigned idx = i + (post_shuffle ? component : 0); 4240bf215546Sopenharmony_ci 4241bf215546Sopenharmony_ci if (swizzle[i + component] < num_channels) { 4242bf215546Sopenharmony_ci channels[i] = nir_channel(&b, &intrin->dest.ssa, swizzle[idx]); 4243bf215546Sopenharmony_ci } else if (i + component == 3) { 4244bf215546Sopenharmony_ci channels[i] = is_float ? nir_imm_floatN_t(&b, 1.0f, intrin->dest.ssa.bit_size) 4245bf215546Sopenharmony_ci : nir_imm_intN_t(&b, 1u, intrin->dest.ssa.bit_size); 4246bf215546Sopenharmony_ci } else { 4247bf215546Sopenharmony_ci channels[i] = nir_imm_zero(&b, 1, intrin->dest.ssa.bit_size); 4248bf215546Sopenharmony_ci } 4249bf215546Sopenharmony_ci } 4250bf215546Sopenharmony_ci 4251bf215546Sopenharmony_ci if (alpha_adjust != ALPHA_ADJUST_NONE && component + num_components == 4) { 4252bf215546Sopenharmony_ci unsigned idx = num_components - 1; 4253bf215546Sopenharmony_ci channels[idx] = radv_adjust_vertex_fetch_alpha(&b, alpha_adjust, channels[idx]); 4254bf215546Sopenharmony_ci } 4255bf215546Sopenharmony_ci 4256bf215546Sopenharmony_ci nir_ssa_def *new_dest = nir_vec(&b, channels, num_components); 4257bf215546Sopenharmony_ci 4258bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, new_dest, 4259bf215546Sopenharmony_ci new_dest->parent_instr); 4260bf215546Sopenharmony_ci 4261bf215546Sopenharmony_ci progress = true; 4262bf215546Sopenharmony_ci } 4263bf215546Sopenharmony_ci } 4264bf215546Sopenharmony_ci 4265bf215546Sopenharmony_ci if (progress) 4266bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); 4267bf215546Sopenharmony_ci else 4268bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 4269bf215546Sopenharmony_ci 4270bf215546Sopenharmony_ci return progress; 4271bf215546Sopenharmony_ci} 4272bf215546Sopenharmony_ci 4273bf215546Sopenharmony_cistatic bool 4274bf215546Sopenharmony_ciradv_lower_fs_output(nir_shader *nir, const struct radv_pipeline_key *pipeline_key) 4275bf215546Sopenharmony_ci{ 4276bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(nir); 4277bf215546Sopenharmony_ci bool progress = false; 4278bf215546Sopenharmony_ci 4279bf215546Sopenharmony_ci nir_builder b; 4280bf215546Sopenharmony_ci nir_builder_init(&b, impl); 4281bf215546Sopenharmony_ci 4282bf215546Sopenharmony_ci nir_foreach_block(block, impl) { 4283bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 4284bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 4285bf215546Sopenharmony_ci continue; 4286bf215546Sopenharmony_ci 4287bf215546Sopenharmony_ci nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 4288bf215546Sopenharmony_ci if (intrin->intrinsic != nir_intrinsic_store_output) 4289bf215546Sopenharmony_ci continue; 4290bf215546Sopenharmony_ci 4291bf215546Sopenharmony_ci int slot = nir_intrinsic_base(intrin) - FRAG_RESULT_DATA0; 4292bf215546Sopenharmony_ci if (slot < 0) 4293bf215546Sopenharmony_ci continue; 4294bf215546Sopenharmony_ci 4295bf215546Sopenharmony_ci unsigned write_mask = nir_intrinsic_write_mask(intrin); 4296bf215546Sopenharmony_ci unsigned col_format = (pipeline_key->ps.col_format >> (4 * slot)) & 0xf; 4297bf215546Sopenharmony_ci bool is_int8 = (pipeline_key->ps.is_int8 >> slot) & 1; 4298bf215546Sopenharmony_ci bool is_int10 = (pipeline_key->ps.is_int10 >> slot) & 1; 4299bf215546Sopenharmony_ci bool enable_mrt_output_nan_fixup = (pipeline_key->ps.enable_mrt_output_nan_fixup >> slot) & 1; 4300bf215546Sopenharmony_ci bool is_16bit = intrin->src[0].ssa->bit_size == 16; 4301bf215546Sopenharmony_ci 4302bf215546Sopenharmony_ci if (col_format == V_028714_SPI_SHADER_ZERO) 4303bf215546Sopenharmony_ci continue; 4304bf215546Sopenharmony_ci 4305bf215546Sopenharmony_ci b.cursor = nir_before_instr(instr); 4306bf215546Sopenharmony_ci nir_ssa_def *values[4]; 4307bf215546Sopenharmony_ci 4308bf215546Sopenharmony_ci /* Extract the export values. */ 4309bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 4310bf215546Sopenharmony_ci if (write_mask & (1 << i)) { 4311bf215546Sopenharmony_ci values[i] = nir_channel(&b, intrin->src[0].ssa, i); 4312bf215546Sopenharmony_ci } else { 4313bf215546Sopenharmony_ci values[i] = nir_ssa_undef(&b, 1, 32); 4314bf215546Sopenharmony_ci } 4315bf215546Sopenharmony_ci } 4316bf215546Sopenharmony_ci 4317bf215546Sopenharmony_ci /* Replace NaN by zero (for 32-bit float formats) to fix game bugs if requested. */ 4318bf215546Sopenharmony_ci if (enable_mrt_output_nan_fixup && !nir->info.internal && !is_16bit) { 4319bf215546Sopenharmony_ci u_foreach_bit(i, write_mask) { 4320bf215546Sopenharmony_ci const bool save_exact = b.exact; 4321bf215546Sopenharmony_ci 4322bf215546Sopenharmony_ci b.exact = true; 4323bf215546Sopenharmony_ci nir_ssa_def *isnan = nir_fneu(&b, values[i], values[i]); 4324bf215546Sopenharmony_ci b.exact = save_exact; 4325bf215546Sopenharmony_ci 4326bf215546Sopenharmony_ci values[i] = nir_bcsel(&b, isnan, nir_imm_zero(&b, 1, 32), values[i]); 4327bf215546Sopenharmony_ci } 4328bf215546Sopenharmony_ci } 4329bf215546Sopenharmony_ci 4330bf215546Sopenharmony_ci if (col_format == V_028714_SPI_SHADER_FP16_ABGR || 4331bf215546Sopenharmony_ci col_format == V_028714_SPI_SHADER_UNORM16_ABGR || 4332bf215546Sopenharmony_ci col_format == V_028714_SPI_SHADER_SNORM16_ABGR || 4333bf215546Sopenharmony_ci col_format == V_028714_SPI_SHADER_UINT16_ABGR || 4334bf215546Sopenharmony_ci col_format == V_028714_SPI_SHADER_SINT16_ABGR) { 4335bf215546Sopenharmony_ci /* Convert and/or clamp the export values. */ 4336bf215546Sopenharmony_ci switch (col_format) { 4337bf215546Sopenharmony_ci case V_028714_SPI_SHADER_UINT16_ABGR: { 4338bf215546Sopenharmony_ci unsigned max_rgb = is_int8 ? 255 : is_int10 ? 1023 : 0; 4339bf215546Sopenharmony_ci u_foreach_bit(i, write_mask) { 4340bf215546Sopenharmony_ci if (is_int8 || is_int10) { 4341bf215546Sopenharmony_ci values[i] = nir_umin(&b, values[i], i == 3 && is_int10 ? nir_imm_int(&b, 3u) 4342bf215546Sopenharmony_ci : nir_imm_int(&b, max_rgb)); 4343bf215546Sopenharmony_ci } else if (is_16bit) { 4344bf215546Sopenharmony_ci values[i] = nir_u2u32(&b, values[i]); 4345bf215546Sopenharmony_ci } 4346bf215546Sopenharmony_ci } 4347bf215546Sopenharmony_ci break; 4348bf215546Sopenharmony_ci } 4349bf215546Sopenharmony_ci case V_028714_SPI_SHADER_SINT16_ABGR: { 4350bf215546Sopenharmony_ci unsigned max_rgb = is_int8 ? 127 : is_int10 ? 511 : 0; 4351bf215546Sopenharmony_ci unsigned min_rgb = is_int8 ? -128 : is_int10 ? -512 : 0; 4352bf215546Sopenharmony_ci u_foreach_bit(i, write_mask) { 4353bf215546Sopenharmony_ci if (is_int8 || is_int10) { 4354bf215546Sopenharmony_ci values[i] = nir_imin(&b, values[i], i == 3 && is_int10 ? nir_imm_int(&b, 1u) 4355bf215546Sopenharmony_ci : nir_imm_int(&b, max_rgb)); 4356bf215546Sopenharmony_ci values[i] = nir_imax(&b, values[i], i == 3 && is_int10 ? nir_imm_int(&b, -2u) 4357bf215546Sopenharmony_ci : nir_imm_int(&b, min_rgb)); 4358bf215546Sopenharmony_ci } else if (is_16bit) { 4359bf215546Sopenharmony_ci values[i] = nir_i2i32(&b, values[i]); 4360bf215546Sopenharmony_ci } 4361bf215546Sopenharmony_ci } 4362bf215546Sopenharmony_ci break; 4363bf215546Sopenharmony_ci } 4364bf215546Sopenharmony_ci case V_028714_SPI_SHADER_UNORM16_ABGR: 4365bf215546Sopenharmony_ci case V_028714_SPI_SHADER_SNORM16_ABGR: 4366bf215546Sopenharmony_ci u_foreach_bit(i, write_mask) { 4367bf215546Sopenharmony_ci if (is_16bit) { 4368bf215546Sopenharmony_ci values[i] = nir_f2f32(&b, values[i]); 4369bf215546Sopenharmony_ci } 4370bf215546Sopenharmony_ci } 4371bf215546Sopenharmony_ci break; 4372bf215546Sopenharmony_ci default: 4373bf215546Sopenharmony_ci break; 4374bf215546Sopenharmony_ci } 4375bf215546Sopenharmony_ci 4376bf215546Sopenharmony_ci /* Only nir_pack_32_2x16_split needs 16-bit inputs. */ 4377bf215546Sopenharmony_ci bool input_16_bit = col_format == V_028714_SPI_SHADER_FP16_ABGR && is_16bit; 4378bf215546Sopenharmony_ci unsigned new_write_mask = 0; 4379bf215546Sopenharmony_ci 4380bf215546Sopenharmony_ci /* Pack the export values. */ 4381bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 4382bf215546Sopenharmony_ci bool enabled = (write_mask >> (i * 2)) & 0x3; 4383bf215546Sopenharmony_ci 4384bf215546Sopenharmony_ci if (!enabled) { 4385bf215546Sopenharmony_ci values[i] = nir_ssa_undef(&b, 1, 32); 4386bf215546Sopenharmony_ci continue; 4387bf215546Sopenharmony_ci } 4388bf215546Sopenharmony_ci 4389bf215546Sopenharmony_ci nir_ssa_def *src0 = values[i * 2]; 4390bf215546Sopenharmony_ci nir_ssa_def *src1 = values[i * 2 + 1]; 4391bf215546Sopenharmony_ci 4392bf215546Sopenharmony_ci if (!(write_mask & (1 << (i * 2)))) 4393bf215546Sopenharmony_ci src0 = nir_imm_zero(&b, 1, input_16_bit ? 16 : 32); 4394bf215546Sopenharmony_ci if (!(write_mask & (1 << (i * 2 + 1)))) 4395bf215546Sopenharmony_ci src1 = nir_imm_zero(&b, 1, input_16_bit ? 16 : 32); 4396bf215546Sopenharmony_ci 4397bf215546Sopenharmony_ci if (col_format == V_028714_SPI_SHADER_FP16_ABGR) { 4398bf215546Sopenharmony_ci if (is_16bit) { 4399bf215546Sopenharmony_ci values[i] = nir_pack_32_2x16_split(&b, src0, src1); 4400bf215546Sopenharmony_ci } else { 4401bf215546Sopenharmony_ci values[i] = nir_pack_half_2x16_split(&b, src0, src1); 4402bf215546Sopenharmony_ci } 4403bf215546Sopenharmony_ci } else if (col_format == V_028714_SPI_SHADER_UNORM16_ABGR) { 4404bf215546Sopenharmony_ci values[i] = nir_pack_unorm_2x16(&b, nir_vec2(&b, src0, src1)); 4405bf215546Sopenharmony_ci } else if (col_format == V_028714_SPI_SHADER_SNORM16_ABGR) { 4406bf215546Sopenharmony_ci values[i] = nir_pack_snorm_2x16(&b, nir_vec2(&b, src0, src1)); 4407bf215546Sopenharmony_ci } else if (col_format == V_028714_SPI_SHADER_UINT16_ABGR) { 4408bf215546Sopenharmony_ci values[i] = nir_pack_uint_2x16(&b, nir_vec2(&b, src0, src1)); 4409bf215546Sopenharmony_ci } else if (col_format == V_028714_SPI_SHADER_SINT16_ABGR) { 4410bf215546Sopenharmony_ci values[i] = nir_pack_sint_2x16(&b, nir_vec2(&b, src0, src1)); 4411bf215546Sopenharmony_ci } 4412bf215546Sopenharmony_ci 4413bf215546Sopenharmony_ci new_write_mask |= 1 << i; 4414bf215546Sopenharmony_ci } 4415bf215546Sopenharmony_ci 4416bf215546Sopenharmony_ci /* Update the write mask for compressed outputs. */ 4417bf215546Sopenharmony_ci nir_intrinsic_set_write_mask(intrin, new_write_mask); 4418bf215546Sopenharmony_ci intrin->num_components = util_last_bit(new_write_mask); 4419bf215546Sopenharmony_ci } 4420bf215546Sopenharmony_ci 4421bf215546Sopenharmony_ci nir_ssa_def *new_src = nir_vec(&b, values, intrin->num_components); 4422bf215546Sopenharmony_ci 4423bf215546Sopenharmony_ci nir_instr_rewrite_src(&intrin->instr, &intrin->src[0], nir_src_for_ssa(new_src)); 4424bf215546Sopenharmony_ci 4425bf215546Sopenharmony_ci progress = true; 4426bf215546Sopenharmony_ci } 4427bf215546Sopenharmony_ci } 4428bf215546Sopenharmony_ci 4429bf215546Sopenharmony_ci if (progress) 4430bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_block_index | nir_metadata_dominance); 4431bf215546Sopenharmony_ci else 4432bf215546Sopenharmony_ci nir_metadata_preserve(impl, nir_metadata_all); 4433bf215546Sopenharmony_ci 4434bf215546Sopenharmony_ci return progress; 4435bf215546Sopenharmony_ci} 4436bf215546Sopenharmony_ci 4437bf215546Sopenharmony_civoid 4438bf215546Sopenharmony_ciradv_pipeline_stage_init(const VkPipelineShaderStageCreateInfo *sinfo, 4439bf215546Sopenharmony_ci struct radv_pipeline_stage *out_stage, gl_shader_stage stage) 4440bf215546Sopenharmony_ci{ 4441bf215546Sopenharmony_ci const VkShaderModuleCreateInfo *minfo = 4442bf215546Sopenharmony_ci vk_find_struct_const(sinfo->pNext, SHADER_MODULE_CREATE_INFO); 4443bf215546Sopenharmony_ci const VkPipelineShaderStageModuleIdentifierCreateInfoEXT *iinfo = 4444bf215546Sopenharmony_ci vk_find_struct_const(sinfo->pNext, PIPELINE_SHADER_STAGE_MODULE_IDENTIFIER_CREATE_INFO_EXT); 4445bf215546Sopenharmony_ci 4446bf215546Sopenharmony_ci if (sinfo->module == VK_NULL_HANDLE && !minfo && !iinfo) 4447bf215546Sopenharmony_ci return; 4448bf215546Sopenharmony_ci 4449bf215546Sopenharmony_ci memset(out_stage, 0, sizeof(*out_stage)); 4450bf215546Sopenharmony_ci 4451bf215546Sopenharmony_ci out_stage->stage = stage; 4452bf215546Sopenharmony_ci out_stage->entrypoint = sinfo->pName; 4453bf215546Sopenharmony_ci out_stage->spec_info = sinfo->pSpecializationInfo; 4454bf215546Sopenharmony_ci out_stage->feedback.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 4455bf215546Sopenharmony_ci 4456bf215546Sopenharmony_ci if (sinfo->module != VK_NULL_HANDLE) { 4457bf215546Sopenharmony_ci struct vk_shader_module *module = vk_shader_module_from_handle(sinfo->module); 4458bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(out_stage->spirv.sha1) == sizeof(module->sha1)); 4459bf215546Sopenharmony_ci 4460bf215546Sopenharmony_ci out_stage->spirv.data = module->data; 4461bf215546Sopenharmony_ci out_stage->spirv.size = module->size; 4462bf215546Sopenharmony_ci out_stage->spirv.object = &module->base; 4463bf215546Sopenharmony_ci 4464bf215546Sopenharmony_ci if (module->nir) 4465bf215546Sopenharmony_ci out_stage->internal_nir = module->nir; 4466bf215546Sopenharmony_ci } else if (minfo) { 4467bf215546Sopenharmony_ci out_stage->spirv.data = (const char *) minfo->pCode; 4468bf215546Sopenharmony_ci out_stage->spirv.size = minfo->codeSize; 4469bf215546Sopenharmony_ci } 4470bf215546Sopenharmony_ci 4471bf215546Sopenharmony_ci vk_pipeline_hash_shader_stage(sinfo, out_stage->shader_sha1); 4472bf215546Sopenharmony_ci} 4473bf215546Sopenharmony_ci 4474bf215546Sopenharmony_cistatic struct radv_shader * 4475bf215546Sopenharmony_ciradv_pipeline_create_gs_copy_shader(struct radv_pipeline *pipeline, 4476bf215546Sopenharmony_ci struct radv_pipeline_stage *stages, 4477bf215546Sopenharmony_ci const struct radv_pipeline_key *pipeline_key, 4478bf215546Sopenharmony_ci const struct radv_pipeline_layout *pipeline_layout, 4479bf215546Sopenharmony_ci bool keep_executable_info, bool keep_statistic_info, 4480bf215546Sopenharmony_ci struct radv_shader_binary **gs_copy_binary) 4481bf215546Sopenharmony_ci{ 4482bf215546Sopenharmony_ci struct radv_device *device = pipeline->device; 4483bf215546Sopenharmony_ci struct radv_shader_info info = {0}; 4484bf215546Sopenharmony_ci 4485bf215546Sopenharmony_ci if (stages[MESA_SHADER_GEOMETRY].info.vs.outinfo.export_clip_dists) 4486bf215546Sopenharmony_ci info.vs.outinfo.export_clip_dists = true; 4487bf215546Sopenharmony_ci 4488bf215546Sopenharmony_ci radv_nir_shader_info_pass(device, stages[MESA_SHADER_GEOMETRY].nir, pipeline_layout, pipeline_key, 4489bf215546Sopenharmony_ci &info); 4490bf215546Sopenharmony_ci info.wave_size = 64; /* Wave32 not supported. */ 4491bf215546Sopenharmony_ci info.workgroup_size = 64; /* HW VS: separate waves, no workgroups */ 4492bf215546Sopenharmony_ci info.ballot_bit_size = 64; 4493bf215546Sopenharmony_ci 4494bf215546Sopenharmony_ci struct radv_shader_args gs_copy_args = {0}; 4495bf215546Sopenharmony_ci gs_copy_args.is_gs_copy_shader = true; 4496bf215546Sopenharmony_ci gs_copy_args.explicit_scratch_args = !radv_use_llvm_for_stage(device, MESA_SHADER_VERTEX); 4497bf215546Sopenharmony_ci radv_declare_shader_args(device->physical_device->rad_info.gfx_level, pipeline_key, &info, 4498bf215546Sopenharmony_ci MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX, &gs_copy_args); 4499bf215546Sopenharmony_ci info.user_sgprs_locs = gs_copy_args.user_sgprs_locs; 4500bf215546Sopenharmony_ci info.inline_push_constant_mask = gs_copy_args.ac.inline_push_const_mask; 4501bf215546Sopenharmony_ci 4502bf215546Sopenharmony_ci return radv_create_gs_copy_shader(device, stages[MESA_SHADER_GEOMETRY].nir, &info, &gs_copy_args, 4503bf215546Sopenharmony_ci gs_copy_binary, keep_executable_info, keep_statistic_info, 4504bf215546Sopenharmony_ci pipeline_key->optimisations_disabled); 4505bf215546Sopenharmony_ci} 4506bf215546Sopenharmony_ci 4507bf215546Sopenharmony_cistatic void 4508bf215546Sopenharmony_ciradv_pipeline_nir_to_asm(struct radv_pipeline *pipeline, struct radv_pipeline_stage *stages, 4509bf215546Sopenharmony_ci const struct radv_pipeline_key *pipeline_key, 4510bf215546Sopenharmony_ci const struct radv_pipeline_layout *pipeline_layout, 4511bf215546Sopenharmony_ci bool keep_executable_info, bool keep_statistic_info, 4512bf215546Sopenharmony_ci gl_shader_stage last_vgt_api_stage, 4513bf215546Sopenharmony_ci struct radv_shader_binary **binaries, 4514bf215546Sopenharmony_ci struct radv_shader_binary **gs_copy_binary) 4515bf215546Sopenharmony_ci{ 4516bf215546Sopenharmony_ci struct radv_device *device = pipeline->device; 4517bf215546Sopenharmony_ci unsigned active_stages = 0; 4518bf215546Sopenharmony_ci 4519bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { 4520bf215546Sopenharmony_ci if (stages[i].nir) 4521bf215546Sopenharmony_ci active_stages |= (1 << i); 4522bf215546Sopenharmony_ci } 4523bf215546Sopenharmony_ci 4524bf215546Sopenharmony_ci bool pipeline_has_ngg = last_vgt_api_stage != MESA_SHADER_NONE && 4525bf215546Sopenharmony_ci stages[last_vgt_api_stage].info.is_ngg; 4526bf215546Sopenharmony_ci 4527bf215546Sopenharmony_ci if (stages[MESA_SHADER_GEOMETRY].nir && !pipeline_has_ngg) { 4528bf215546Sopenharmony_ci pipeline->gs_copy_shader = 4529bf215546Sopenharmony_ci radv_pipeline_create_gs_copy_shader(pipeline, stages, pipeline_key, pipeline_layout, 4530bf215546Sopenharmony_ci keep_executable_info, keep_statistic_info, 4531bf215546Sopenharmony_ci gs_copy_binary); 4532bf215546Sopenharmony_ci } 4533bf215546Sopenharmony_ci 4534bf215546Sopenharmony_ci for (int s = MESA_VULKAN_SHADER_STAGES - 1; s >= 0; s--) { 4535bf215546Sopenharmony_ci if (!(active_stages & (1 << s)) || pipeline->shaders[s]) 4536bf215546Sopenharmony_ci continue; 4537bf215546Sopenharmony_ci 4538bf215546Sopenharmony_ci nir_shader *shaders[2] = { stages[s].nir, NULL }; 4539bf215546Sopenharmony_ci unsigned shader_count = 1; 4540bf215546Sopenharmony_ci 4541bf215546Sopenharmony_ci /* On GFX9+, TES is merged with GS and VS is merged with TCS or GS. */ 4542bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX9 && 4543bf215546Sopenharmony_ci (s == MESA_SHADER_TESS_CTRL || s == MESA_SHADER_GEOMETRY)) { 4544bf215546Sopenharmony_ci gl_shader_stage pre_stage; 4545bf215546Sopenharmony_ci 4546bf215546Sopenharmony_ci if (s == MESA_SHADER_GEOMETRY && stages[MESA_SHADER_TESS_EVAL].nir) { 4547bf215546Sopenharmony_ci pre_stage = MESA_SHADER_TESS_EVAL; 4548bf215546Sopenharmony_ci } else { 4549bf215546Sopenharmony_ci pre_stage = MESA_SHADER_VERTEX; 4550bf215546Sopenharmony_ci } 4551bf215546Sopenharmony_ci 4552bf215546Sopenharmony_ci shaders[0] = stages[pre_stage].nir; 4553bf215546Sopenharmony_ci shaders[1] = stages[s].nir; 4554bf215546Sopenharmony_ci shader_count = 2; 4555bf215546Sopenharmony_ci } 4556bf215546Sopenharmony_ci 4557bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 4558bf215546Sopenharmony_ci 4559bf215546Sopenharmony_ci pipeline->shaders[s] = radv_shader_nir_to_asm(device, &stages[s], shaders, shader_count, 4560bf215546Sopenharmony_ci pipeline_key, keep_executable_info, 4561bf215546Sopenharmony_ci keep_statistic_info, &binaries[s]); 4562bf215546Sopenharmony_ci 4563bf215546Sopenharmony_ci stages[s].feedback.duration += os_time_get_nano() - stage_start; 4564bf215546Sopenharmony_ci 4565bf215546Sopenharmony_ci active_stages &= ~(1 << shaders[0]->info.stage); 4566bf215546Sopenharmony_ci if (shaders[1]) 4567bf215546Sopenharmony_ci active_stages &= ~(1 << shaders[1]->info.stage); 4568bf215546Sopenharmony_ci } 4569bf215546Sopenharmony_ci} 4570bf215546Sopenharmony_ci 4571bf215546Sopenharmony_ciVkResult 4572bf215546Sopenharmony_ciradv_create_shaders(struct radv_pipeline *pipeline, struct radv_pipeline_layout *pipeline_layout, 4573bf215546Sopenharmony_ci struct radv_device *device, struct radv_pipeline_cache *cache, 4574bf215546Sopenharmony_ci const struct radv_pipeline_key *pipeline_key, 4575bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *pStages, 4576bf215546Sopenharmony_ci uint32_t stageCount, 4577bf215546Sopenharmony_ci const VkPipelineCreateFlags flags, const uint8_t *custom_hash, 4578bf215546Sopenharmony_ci const VkPipelineCreationFeedbackCreateInfo *creation_feedback, 4579bf215546Sopenharmony_ci struct radv_pipeline_shader_stack_size **stack_sizes, 4580bf215546Sopenharmony_ci uint32_t *num_stack_sizes, 4581bf215546Sopenharmony_ci gl_shader_stage *last_vgt_api_stage) 4582bf215546Sopenharmony_ci{ 4583bf215546Sopenharmony_ci const char *noop_fs_entrypoint = "noop_fs"; 4584bf215546Sopenharmony_ci struct radv_shader_binary *binaries[MESA_VULKAN_SHADER_STAGES] = {NULL}; 4585bf215546Sopenharmony_ci struct radv_shader_binary *gs_copy_binary = NULL; 4586bf215546Sopenharmony_ci unsigned char hash[20]; 4587bf215546Sopenharmony_ci bool keep_executable_info = 4588bf215546Sopenharmony_ci (flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR) || 4589bf215546Sopenharmony_ci device->keep_shader_info; 4590bf215546Sopenharmony_ci bool keep_statistic_info = (flags & VK_PIPELINE_CREATE_CAPTURE_STATISTICS_BIT_KHR) || 4591bf215546Sopenharmony_ci (device->instance->debug_flags & RADV_DEBUG_DUMP_SHADER_STATS) || 4592bf215546Sopenharmony_ci device->keep_shader_info; 4593bf215546Sopenharmony_ci struct radv_pipeline_stage stages[MESA_VULKAN_SHADER_STAGES] = {0}; 4594bf215546Sopenharmony_ci VkPipelineCreationFeedback pipeline_feedback = { 4595bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 4596bf215546Sopenharmony_ci }; 4597bf215546Sopenharmony_ci bool noop_fs = false; 4598bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 4599bf215546Sopenharmony_ci 4600bf215546Sopenharmony_ci int64_t pipeline_start = os_time_get_nano(); 4601bf215546Sopenharmony_ci 4602bf215546Sopenharmony_ci for (uint32_t i = 0; i < stageCount; i++) { 4603bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *sinfo = &pStages[i]; 4604bf215546Sopenharmony_ci gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage); 4605bf215546Sopenharmony_ci 4606bf215546Sopenharmony_ci radv_pipeline_stage_init(sinfo, &stages[stage], stage); 4607bf215546Sopenharmony_ci } 4608bf215546Sopenharmony_ci 4609bf215546Sopenharmony_ci for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { 4610bf215546Sopenharmony_ci if (!stages[s].entrypoint) 4611bf215546Sopenharmony_ci continue; 4612bf215546Sopenharmony_ci 4613bf215546Sopenharmony_ci if (stages[s].stage < MESA_SHADER_FRAGMENT || stages[s].stage == MESA_SHADER_MESH) 4614bf215546Sopenharmony_ci *last_vgt_api_stage = stages[s].stage; 4615bf215546Sopenharmony_ci } 4616bf215546Sopenharmony_ci 4617bf215546Sopenharmony_ci ASSERTED bool primitive_shading = 4618bf215546Sopenharmony_ci stages[MESA_SHADER_VERTEX].entrypoint || stages[MESA_SHADER_TESS_CTRL].entrypoint || 4619bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].entrypoint || stages[MESA_SHADER_GEOMETRY].entrypoint; 4620bf215546Sopenharmony_ci ASSERTED bool mesh_shading = 4621bf215546Sopenharmony_ci stages[MESA_SHADER_MESH].entrypoint; 4622bf215546Sopenharmony_ci 4623bf215546Sopenharmony_ci /* Primitive and mesh shading must not be mixed in the same pipeline. */ 4624bf215546Sopenharmony_ci assert(!primitive_shading || !mesh_shading); 4625bf215546Sopenharmony_ci /* Mesh shaders are mandatory in mesh shading pipelines. */ 4626bf215546Sopenharmony_ci assert(mesh_shading == !!stages[MESA_SHADER_MESH].entrypoint); 4627bf215546Sopenharmony_ci /* Mesh shaders always need NGG. */ 4628bf215546Sopenharmony_ci assert(!mesh_shading || pipeline_key->use_ngg); 4629bf215546Sopenharmony_ci 4630bf215546Sopenharmony_ci if (custom_hash) 4631bf215546Sopenharmony_ci memcpy(hash, custom_hash, 20); 4632bf215546Sopenharmony_ci else { 4633bf215546Sopenharmony_ci radv_hash_shaders(hash, stages, pipeline_layout, pipeline_key, 4634bf215546Sopenharmony_ci radv_get_hash_flags(device, keep_statistic_info)); 4635bf215546Sopenharmony_ci } 4636bf215546Sopenharmony_ci 4637bf215546Sopenharmony_ci pipeline->pipeline_hash = *(uint64_t *)hash; 4638bf215546Sopenharmony_ci 4639bf215546Sopenharmony_ci bool found_in_application_cache = true; 4640bf215546Sopenharmony_ci if (!keep_executable_info && 4641bf215546Sopenharmony_ci radv_create_shaders_from_pipeline_cache(device, cache, hash, pipeline, 4642bf215546Sopenharmony_ci stack_sizes, num_stack_sizes, 4643bf215546Sopenharmony_ci &found_in_application_cache)) { 4644bf215546Sopenharmony_ci if (found_in_application_cache) 4645bf215546Sopenharmony_ci pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 4646bf215546Sopenharmony_ci result = VK_SUCCESS; 4647bf215546Sopenharmony_ci goto done; 4648bf215546Sopenharmony_ci } 4649bf215546Sopenharmony_ci 4650bf215546Sopenharmony_ci if (flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) { 4651bf215546Sopenharmony_ci if (found_in_application_cache) 4652bf215546Sopenharmony_ci pipeline_feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 4653bf215546Sopenharmony_ci result = VK_PIPELINE_COMPILE_REQUIRED; 4654bf215546Sopenharmony_ci goto done; 4655bf215546Sopenharmony_ci } 4656bf215546Sopenharmony_ci 4657bf215546Sopenharmony_ci if (pipeline->type == RADV_PIPELINE_GRAPHICS && !stages[MESA_SHADER_FRAGMENT].entrypoint) { 4658bf215546Sopenharmony_ci nir_builder fs_b = radv_meta_init_shader(device, MESA_SHADER_FRAGMENT, "noop_fs"); 4659bf215546Sopenharmony_ci 4660bf215546Sopenharmony_ci stages[MESA_SHADER_FRAGMENT] = (struct radv_pipeline_stage) { 4661bf215546Sopenharmony_ci .stage = MESA_SHADER_FRAGMENT, 4662bf215546Sopenharmony_ci .internal_nir = fs_b.shader, 4663bf215546Sopenharmony_ci .entrypoint = noop_fs_entrypoint, 4664bf215546Sopenharmony_ci .feedback = { 4665bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 4666bf215546Sopenharmony_ci }, 4667bf215546Sopenharmony_ci }; 4668bf215546Sopenharmony_ci 4669bf215546Sopenharmony_ci noop_fs = true; 4670bf215546Sopenharmony_ci } 4671bf215546Sopenharmony_ci 4672bf215546Sopenharmony_ci for (unsigned s = 0; s < MESA_VULKAN_SHADER_STAGES; s++) { 4673bf215546Sopenharmony_ci if (!stages[s].entrypoint) 4674bf215546Sopenharmony_ci continue; 4675bf215546Sopenharmony_ci 4676bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 4677bf215546Sopenharmony_ci 4678bf215546Sopenharmony_ci stages[s].nir = radv_shader_spirv_to_nir(device, &stages[s], pipeline_key); 4679bf215546Sopenharmony_ci 4680bf215546Sopenharmony_ci stages[s].feedback.duration += os_time_get_nano() - stage_start; 4681bf215546Sopenharmony_ci } 4682bf215546Sopenharmony_ci 4683bf215546Sopenharmony_ci /* Force per-vertex VRS. */ 4684bf215546Sopenharmony_ci if (radv_consider_force_vrs(pipeline, noop_fs, stages, *last_vgt_api_stage)) { 4685bf215546Sopenharmony_ci assert(*last_vgt_api_stage == MESA_SHADER_VERTEX || 4686bf215546Sopenharmony_ci *last_vgt_api_stage == MESA_SHADER_TESS_EVAL || 4687bf215546Sopenharmony_ci *last_vgt_api_stage == MESA_SHADER_GEOMETRY); 4688bf215546Sopenharmony_ci nir_shader *last_vgt_shader = stages[*last_vgt_api_stage].nir; 4689bf215546Sopenharmony_ci NIR_PASS(_, last_vgt_shader, radv_force_primitive_shading_rate, device); 4690bf215546Sopenharmony_ci } 4691bf215546Sopenharmony_ci 4692bf215546Sopenharmony_ci bool optimize_conservatively = pipeline_key->optimisations_disabled; 4693bf215546Sopenharmony_ci 4694bf215546Sopenharmony_ci /* Determine if shaders uses NGG before linking because it's needed for some NIR pass. */ 4695bf215546Sopenharmony_ci radv_fill_shader_info_ngg(pipeline, pipeline_key, stages); 4696bf215546Sopenharmony_ci 4697bf215546Sopenharmony_ci bool pipeline_has_ngg = (stages[MESA_SHADER_VERTEX].nir && stages[MESA_SHADER_VERTEX].info.is_ngg) || 4698bf215546Sopenharmony_ci (stages[MESA_SHADER_TESS_EVAL].nir && stages[MESA_SHADER_TESS_EVAL].info.is_ngg) || 4699bf215546Sopenharmony_ci (stages[MESA_SHADER_MESH].nir && stages[MESA_SHADER_MESH].info.is_ngg); 4700bf215546Sopenharmony_ci 4701bf215546Sopenharmony_ci if (stages[MESA_SHADER_GEOMETRY].nir) { 4702bf215546Sopenharmony_ci unsigned nir_gs_flags = nir_lower_gs_intrinsics_per_stream; 4703bf215546Sopenharmony_ci 4704bf215546Sopenharmony_ci if (pipeline_has_ngg) { 4705bf215546Sopenharmony_ci nir_gs_flags |= nir_lower_gs_intrinsics_count_primitives | 4706bf215546Sopenharmony_ci nir_lower_gs_intrinsics_count_vertices_per_primitive | 4707bf215546Sopenharmony_ci nir_lower_gs_intrinsics_overwrite_incomplete; 4708bf215546Sopenharmony_ci } 4709bf215546Sopenharmony_ci 4710bf215546Sopenharmony_ci NIR_PASS(_, stages[MESA_SHADER_GEOMETRY].nir, nir_lower_gs_intrinsics, nir_gs_flags); 4711bf215546Sopenharmony_ci } 4712bf215546Sopenharmony_ci 4713bf215546Sopenharmony_ci radv_link_shaders(pipeline, pipeline_key, stages, optimize_conservatively, *last_vgt_api_stage); 4714bf215546Sopenharmony_ci radv_set_driver_locations(pipeline, stages, *last_vgt_api_stage); 4715bf215546Sopenharmony_ci 4716bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { 4717bf215546Sopenharmony_ci if (stages[i].nir) { 4718bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 4719bf215546Sopenharmony_ci 4720bf215546Sopenharmony_ci radv_optimize_nir(stages[i].nir, optimize_conservatively, false); 4721bf215546Sopenharmony_ci 4722bf215546Sopenharmony_ci /* Gather info again, information such as outputs_read can be out-of-date. */ 4723bf215546Sopenharmony_ci nir_shader_gather_info(stages[i].nir, nir_shader_get_entrypoint(stages[i].nir)); 4724bf215546Sopenharmony_ci radv_lower_io(device, stages[i].nir, stages[MESA_SHADER_MESH].nir); 4725bf215546Sopenharmony_ci 4726bf215546Sopenharmony_ci stages[i].feedback.duration += os_time_get_nano() - stage_start; 4727bf215546Sopenharmony_ci } 4728bf215546Sopenharmony_ci } 4729bf215546Sopenharmony_ci 4730bf215546Sopenharmony_ci if (stages[MESA_SHADER_TESS_CTRL].nir) { 4731bf215546Sopenharmony_ci nir_lower_patch_vertices(stages[MESA_SHADER_TESS_EVAL].nir, 4732bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_CTRL].nir->info.tess.tcs_vertices_out, NULL); 4733bf215546Sopenharmony_ci gather_tess_info(device, stages, pipeline_key); 4734bf215546Sopenharmony_ci } 4735bf215546Sopenharmony_ci 4736bf215546Sopenharmony_ci if (stages[MESA_SHADER_VERTEX].nir) { 4737bf215546Sopenharmony_ci NIR_PASS(_, stages[MESA_SHADER_VERTEX].nir, radv_lower_vs_input, pipeline_key); 4738bf215546Sopenharmony_ci } 4739bf215546Sopenharmony_ci 4740bf215546Sopenharmony_ci if (stages[MESA_SHADER_FRAGMENT].nir && !radv_use_llvm_for_stage(device, MESA_SHADER_FRAGMENT)) { 4741bf215546Sopenharmony_ci /* TODO: Convert the LLVM backend. */ 4742bf215546Sopenharmony_ci NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_lower_fs_output, pipeline_key); 4743bf215546Sopenharmony_ci } 4744bf215546Sopenharmony_ci 4745bf215546Sopenharmony_ci radv_fill_shader_info(pipeline, pipeline_layout, pipeline_key, stages, *last_vgt_api_stage); 4746bf215546Sopenharmony_ci 4747bf215546Sopenharmony_ci if (pipeline_has_ngg) { 4748bf215546Sopenharmony_ci struct gfx10_ngg_info *ngg_info; 4749bf215546Sopenharmony_ci 4750bf215546Sopenharmony_ci if (stages[MESA_SHADER_GEOMETRY].nir) 4751bf215546Sopenharmony_ci ngg_info = &stages[MESA_SHADER_GEOMETRY].info.ngg_info; 4752bf215546Sopenharmony_ci else if (stages[MESA_SHADER_TESS_CTRL].nir) 4753bf215546Sopenharmony_ci ngg_info = &stages[MESA_SHADER_TESS_EVAL].info.ngg_info; 4754bf215546Sopenharmony_ci else if (stages[MESA_SHADER_VERTEX].nir) 4755bf215546Sopenharmony_ci ngg_info = &stages[MESA_SHADER_VERTEX].info.ngg_info; 4756bf215546Sopenharmony_ci else if (stages[MESA_SHADER_MESH].nir) 4757bf215546Sopenharmony_ci ngg_info = &stages[MESA_SHADER_MESH].info.ngg_info; 4758bf215546Sopenharmony_ci else 4759bf215546Sopenharmony_ci unreachable("Missing NGG shader stage."); 4760bf215546Sopenharmony_ci 4761bf215546Sopenharmony_ci if (*last_vgt_api_stage == MESA_SHADER_MESH) 4762bf215546Sopenharmony_ci gfx10_get_ngg_ms_info(&stages[MESA_SHADER_MESH], ngg_info); 4763bf215546Sopenharmony_ci else 4764bf215546Sopenharmony_ci gfx10_get_ngg_info(pipeline_key, pipeline, stages, ngg_info); 4765bf215546Sopenharmony_ci } else if (stages[MESA_SHADER_GEOMETRY].nir) { 4766bf215546Sopenharmony_ci struct gfx9_gs_info *gs_info = &stages[MESA_SHADER_GEOMETRY].info.gs_ring_info; 4767bf215546Sopenharmony_ci 4768bf215546Sopenharmony_ci gfx9_get_gs_info(pipeline_key, pipeline, stages, gs_info); 4769bf215546Sopenharmony_ci } else { 4770bf215546Sopenharmony_ci gl_shader_stage hw_vs_api_stage = 4771bf215546Sopenharmony_ci stages[MESA_SHADER_TESS_EVAL].nir ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 4772bf215546Sopenharmony_ci stages[hw_vs_api_stage].info.workgroup_size = stages[hw_vs_api_stage].info.wave_size; 4773bf215546Sopenharmony_ci } 4774bf215546Sopenharmony_ci 4775bf215546Sopenharmony_ci radv_determine_ngg_settings(pipeline, pipeline_key, stages, *last_vgt_api_stage); 4776bf215546Sopenharmony_ci 4777bf215546Sopenharmony_ci radv_declare_pipeline_args(device, stages, pipeline_key); 4778bf215546Sopenharmony_ci 4779bf215546Sopenharmony_ci if (stages[MESA_SHADER_FRAGMENT].nir) { 4780bf215546Sopenharmony_ci NIR_PASS(_, stages[MESA_SHADER_FRAGMENT].nir, radv_lower_fs_intrinsics, 4781bf215546Sopenharmony_ci &stages[MESA_SHADER_FRAGMENT], pipeline_key); 4782bf215546Sopenharmony_ci } 4783bf215546Sopenharmony_ci 4784bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { 4785bf215546Sopenharmony_ci if (stages[i].nir) { 4786bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 4787bf215546Sopenharmony_ci 4788bf215546Sopenharmony_ci /* Wave and workgroup size should already be filled. */ 4789bf215546Sopenharmony_ci assert(stages[i].info.wave_size && stages[i].info.workgroup_size); 4790bf215546Sopenharmony_ci 4791bf215546Sopenharmony_ci if (!radv_use_llvm_for_stage(device, i)) { 4792bf215546Sopenharmony_ci nir_lower_non_uniform_access_options options = { 4793bf215546Sopenharmony_ci .types = nir_lower_non_uniform_ubo_access | nir_lower_non_uniform_ssbo_access | 4794bf215546Sopenharmony_ci nir_lower_non_uniform_texture_access | nir_lower_non_uniform_image_access, 4795bf215546Sopenharmony_ci .callback = &non_uniform_access_callback, 4796bf215546Sopenharmony_ci .callback_data = NULL, 4797bf215546Sopenharmony_ci }; 4798bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_lower_non_uniform_access, &options); 4799bf215546Sopenharmony_ci } 4800bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_lower_memory_model); 4801bf215546Sopenharmony_ci 4802bf215546Sopenharmony_ci nir_load_store_vectorize_options vectorize_opts = { 4803bf215546Sopenharmony_ci .modes = nir_var_mem_ssbo | nir_var_mem_ubo | nir_var_mem_push_const | 4804bf215546Sopenharmony_ci nir_var_mem_shared | nir_var_mem_global, 4805bf215546Sopenharmony_ci .callback = mem_vectorize_callback, 4806bf215546Sopenharmony_ci .robust_modes = 0, 4807bf215546Sopenharmony_ci /* On GFX6, read2/write2 is out-of-bounds if the offset register is negative, even if 4808bf215546Sopenharmony_ci * the final offset is not. 4809bf215546Sopenharmony_ci */ 4810bf215546Sopenharmony_ci .has_shared2_amd = device->physical_device->rad_info.gfx_level >= GFX7, 4811bf215546Sopenharmony_ci }; 4812bf215546Sopenharmony_ci 4813bf215546Sopenharmony_ci if (device->robust_buffer_access2) { 4814bf215546Sopenharmony_ci vectorize_opts.robust_modes = 4815bf215546Sopenharmony_ci nir_var_mem_ubo | nir_var_mem_ssbo | nir_var_mem_push_const; 4816bf215546Sopenharmony_ci } 4817bf215546Sopenharmony_ci 4818bf215546Sopenharmony_ci bool progress = false; 4819bf215546Sopenharmony_ci NIR_PASS(progress, stages[i].nir, nir_opt_load_store_vectorize, &vectorize_opts); 4820bf215546Sopenharmony_ci if (progress) { 4821bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_copy_prop); 4822bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_shrink_stores, 4823bf215546Sopenharmony_ci !device->instance->disable_shrink_image_store); 4824bf215546Sopenharmony_ci 4825bf215546Sopenharmony_ci /* Gather info again, to update whether 8/16-bit are used. */ 4826bf215546Sopenharmony_ci nir_shader_gather_info(stages[i].nir, nir_shader_get_entrypoint(stages[i].nir)); 4827bf215546Sopenharmony_ci } 4828bf215546Sopenharmony_ci 4829bf215546Sopenharmony_ci struct radv_shader_info *info = &stages[i].info; 4830bf215546Sopenharmony_ci if (pipeline->device->physical_device->rad_info.gfx_level >= GFX9) { 4831bf215546Sopenharmony_ci if (i == MESA_SHADER_VERTEX && stages[MESA_SHADER_TESS_CTRL].nir) 4832bf215546Sopenharmony_ci info = &stages[MESA_SHADER_TESS_CTRL].info; 4833bf215546Sopenharmony_ci else if (i == MESA_SHADER_VERTEX && stages[MESA_SHADER_GEOMETRY].nir) 4834bf215546Sopenharmony_ci info = &stages[MESA_SHADER_GEOMETRY].info; 4835bf215546Sopenharmony_ci else if (i == MESA_SHADER_TESS_EVAL && stages[MESA_SHADER_GEOMETRY].nir) 4836bf215546Sopenharmony_ci info = &stages[MESA_SHADER_GEOMETRY].info; 4837bf215546Sopenharmony_ci } 4838bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, radv_nir_lower_ycbcr_textures, pipeline_layout); 4839bf215546Sopenharmony_ci NIR_PASS_V(stages[i].nir, radv_nir_apply_pipeline_layout, device, pipeline_layout, info, 4840bf215546Sopenharmony_ci &stages[i].args); 4841bf215546Sopenharmony_ci 4842bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_shrink_vectors); 4843bf215546Sopenharmony_ci 4844bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_lower_alu_width, opt_vectorize_callback, device); 4845bf215546Sopenharmony_ci 4846bf215546Sopenharmony_ci /* lower ALU operations */ 4847bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_lower_int64); 4848bf215546Sopenharmony_ci 4849bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_idiv_const, 8); 4850bf215546Sopenharmony_ci 4851bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_lower_idiv, 4852bf215546Sopenharmony_ci &(nir_lower_idiv_options){ 4853bf215546Sopenharmony_ci .imprecise_32bit_lowering = false, 4854bf215546Sopenharmony_ci .allow_fp16 = device->physical_device->rad_info.gfx_level >= GFX9, 4855bf215546Sopenharmony_ci }); 4856bf215546Sopenharmony_ci 4857bf215546Sopenharmony_ci nir_move_options sink_opts = nir_move_const_undef | nir_move_copies; 4858bf215546Sopenharmony_ci if (i != MESA_SHADER_FRAGMENT || !pipeline_key->disable_sinking_load_input_fs) 4859bf215546Sopenharmony_ci sink_opts |= nir_move_load_input; 4860bf215546Sopenharmony_ci 4861bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_sink, sink_opts); 4862bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_move, 4863bf215546Sopenharmony_ci nir_move_load_input | nir_move_const_undef | nir_move_copies); 4864bf215546Sopenharmony_ci 4865bf215546Sopenharmony_ci /* Lower I/O intrinsics to memory instructions. */ 4866bf215546Sopenharmony_ci bool io_to_mem = radv_lower_io_to_mem(device, &stages[i], pipeline_key); 4867bf215546Sopenharmony_ci bool lowered_ngg = pipeline_has_ngg && i == *last_vgt_api_stage; 4868bf215546Sopenharmony_ci if (lowered_ngg) 4869bf215546Sopenharmony_ci radv_lower_ngg(device, &stages[i], pipeline_key); 4870bf215546Sopenharmony_ci 4871bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, ac_nir_lower_global_access); 4872bf215546Sopenharmony_ci NIR_PASS_V(stages[i].nir, radv_nir_lower_abi, device->physical_device->rad_info.gfx_level, 4873bf215546Sopenharmony_ci &stages[i].info, &stages[i].args, pipeline_key, 4874bf215546Sopenharmony_ci radv_use_llvm_for_stage(device, i)); 4875bf215546Sopenharmony_ci radv_optimize_nir_algebraic( 4876bf215546Sopenharmony_ci stages[i].nir, io_to_mem || lowered_ngg || i == MESA_SHADER_COMPUTE || i == MESA_SHADER_TASK); 4877bf215546Sopenharmony_ci 4878bf215546Sopenharmony_ci if (stages[i].nir->info.bit_sizes_int & (8 | 16)) { 4879bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX8) { 4880bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_convert_to_lcssa, true, true); 4881bf215546Sopenharmony_ci nir_divergence_analysis(stages[i].nir); 4882bf215546Sopenharmony_ci } 4883bf215546Sopenharmony_ci 4884bf215546Sopenharmony_ci if (nir_lower_bit_size(stages[i].nir, lower_bit_size_callback, device)) { 4885bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_constant_folding); 4886bf215546Sopenharmony_ci } 4887bf215546Sopenharmony_ci 4888bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX8) 4889bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_remove_phis); /* cleanup LCSSA phis */ 4890bf215546Sopenharmony_ci } 4891bf215546Sopenharmony_ci if (((stages[i].nir->info.bit_sizes_int | stages[i].nir->info.bit_sizes_float) & 16) && 4892bf215546Sopenharmony_ci device->physical_device->rad_info.gfx_level >= GFX9) { 4893bf215546Sopenharmony_ci bool separate_g16 = device->physical_device->rad_info.gfx_level >= GFX10; 4894bf215546Sopenharmony_ci struct nir_fold_tex_srcs_options fold_srcs_options[] = { 4895bf215546Sopenharmony_ci { 4896bf215546Sopenharmony_ci .sampler_dims = 4897bf215546Sopenharmony_ci ~(BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE) | BITFIELD_BIT(GLSL_SAMPLER_DIM_BUF)), 4898bf215546Sopenharmony_ci .src_types = (1 << nir_tex_src_coord) | (1 << nir_tex_src_lod) | 4899bf215546Sopenharmony_ci (1 << nir_tex_src_bias) | (1 << nir_tex_src_min_lod) | 4900bf215546Sopenharmony_ci (1 << nir_tex_src_ms_index) | 4901bf215546Sopenharmony_ci (separate_g16 ? 0 : (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy)), 4902bf215546Sopenharmony_ci }, 4903bf215546Sopenharmony_ci { 4904bf215546Sopenharmony_ci .sampler_dims = ~BITFIELD_BIT(GLSL_SAMPLER_DIM_CUBE), 4905bf215546Sopenharmony_ci .src_types = (1 << nir_tex_src_ddx) | (1 << nir_tex_src_ddy), 4906bf215546Sopenharmony_ci }, 4907bf215546Sopenharmony_ci }; 4908bf215546Sopenharmony_ci struct nir_fold_16bit_tex_image_options fold_16bit_options = { 4909bf215546Sopenharmony_ci .rounding_mode = nir_rounding_mode_rtne, 4910bf215546Sopenharmony_ci .fold_tex_dest = true, 4911bf215546Sopenharmony_ci .fold_image_load_store_data = true, 4912bf215546Sopenharmony_ci .fold_srcs_options_count = separate_g16 ? 2 : 1, 4913bf215546Sopenharmony_ci .fold_srcs_options = fold_srcs_options, 4914bf215546Sopenharmony_ci }; 4915bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_fold_16bit_tex_image, &fold_16bit_options); 4916bf215546Sopenharmony_ci 4917bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_vectorize, opt_vectorize_callback, device); 4918bf215546Sopenharmony_ci } 4919bf215546Sopenharmony_ci 4920bf215546Sopenharmony_ci /* cleanup passes */ 4921bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_lower_alu_width, opt_vectorize_callback, device); 4922bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_lower_load_const_to_scalar); 4923bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_copy_prop); 4924bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_dce); 4925bf215546Sopenharmony_ci 4926bf215546Sopenharmony_ci sink_opts |= nir_move_comparisons | nir_move_load_ubo | nir_move_load_ssbo; 4927bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_sink, sink_opts); 4928bf215546Sopenharmony_ci 4929bf215546Sopenharmony_ci nir_move_options move_opts = nir_move_const_undef | nir_move_load_ubo | 4930bf215546Sopenharmony_ci nir_move_load_input | nir_move_comparisons | nir_move_copies; 4931bf215546Sopenharmony_ci NIR_PASS(_, stages[i].nir, nir_opt_move, move_opts); 4932bf215546Sopenharmony_ci 4933bf215546Sopenharmony_ci stages[i].feedback.duration += os_time_get_nano() - stage_start; 4934bf215546Sopenharmony_ci } 4935bf215546Sopenharmony_ci } 4936bf215546Sopenharmony_ci 4937bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { 4938bf215546Sopenharmony_ci if (stages[i].nir) { 4939bf215546Sopenharmony_ci if (radv_can_dump_shader(device, stages[i].nir, false)) 4940bf215546Sopenharmony_ci nir_print_shader(stages[i].nir, stderr); 4941bf215546Sopenharmony_ci } 4942bf215546Sopenharmony_ci } 4943bf215546Sopenharmony_ci 4944bf215546Sopenharmony_ci /* Compile NIR shaders to AMD assembly. */ 4945bf215546Sopenharmony_ci radv_pipeline_nir_to_asm(pipeline, stages, pipeline_key, pipeline_layout, keep_executable_info, 4946bf215546Sopenharmony_ci keep_statistic_info, *last_vgt_api_stage, binaries, &gs_copy_binary); 4947bf215546Sopenharmony_ci 4948bf215546Sopenharmony_ci if (keep_executable_info) { 4949bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { 4950bf215546Sopenharmony_ci struct radv_shader *shader = pipeline->shaders[i]; 4951bf215546Sopenharmony_ci if (!shader) 4952bf215546Sopenharmony_ci continue; 4953bf215546Sopenharmony_ci 4954bf215546Sopenharmony_ci if (!stages[i].spirv.size) 4955bf215546Sopenharmony_ci continue; 4956bf215546Sopenharmony_ci 4957bf215546Sopenharmony_ci shader->spirv = malloc(stages[i].spirv.size); 4958bf215546Sopenharmony_ci memcpy(shader->spirv, stages[i].spirv.data, stages[i].spirv.size); 4959bf215546Sopenharmony_ci shader->spirv_size = stages[i].spirv.size; 4960bf215546Sopenharmony_ci } 4961bf215546Sopenharmony_ci } 4962bf215546Sopenharmony_ci 4963bf215546Sopenharmony_ci /* Upload shader binaries. */ 4964bf215546Sopenharmony_ci radv_upload_shaders(device, pipeline, binaries, gs_copy_binary); 4965bf215546Sopenharmony_ci 4966bf215546Sopenharmony_ci if (!keep_executable_info) { 4967bf215546Sopenharmony_ci if (pipeline->gs_copy_shader) { 4968bf215546Sopenharmony_ci assert(!binaries[MESA_SHADER_COMPUTE] && !pipeline->shaders[MESA_SHADER_COMPUTE]); 4969bf215546Sopenharmony_ci binaries[MESA_SHADER_COMPUTE] = gs_copy_binary; 4970bf215546Sopenharmony_ci pipeline->shaders[MESA_SHADER_COMPUTE] = pipeline->gs_copy_shader; 4971bf215546Sopenharmony_ci } 4972bf215546Sopenharmony_ci 4973bf215546Sopenharmony_ci radv_pipeline_cache_insert_shaders(device, cache, hash, pipeline, binaries, 4974bf215546Sopenharmony_ci stack_sizes ? *stack_sizes : NULL, 4975bf215546Sopenharmony_ci num_stack_sizes ? *num_stack_sizes : 0); 4976bf215546Sopenharmony_ci 4977bf215546Sopenharmony_ci if (pipeline->gs_copy_shader) { 4978bf215546Sopenharmony_ci pipeline->gs_copy_shader = pipeline->shaders[MESA_SHADER_COMPUTE]; 4979bf215546Sopenharmony_ci pipeline->shaders[MESA_SHADER_COMPUTE] = NULL; 4980bf215546Sopenharmony_ci binaries[MESA_SHADER_COMPUTE] = NULL; 4981bf215546Sopenharmony_ci } 4982bf215546Sopenharmony_ci } 4983bf215546Sopenharmony_ci 4984bf215546Sopenharmony_ci free(gs_copy_binary); 4985bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { 4986bf215546Sopenharmony_ci free(binaries[i]); 4987bf215546Sopenharmony_ci if (stages[i].nir) { 4988bf215546Sopenharmony_ci if (radv_can_dump_shader_stats(device, stages[i].nir) && pipeline->shaders[i]) { 4989bf215546Sopenharmony_ci radv_dump_shader_stats(device, pipeline, i, stderr); 4990bf215546Sopenharmony_ci } 4991bf215546Sopenharmony_ci 4992bf215546Sopenharmony_ci ralloc_free(stages[i].nir); 4993bf215546Sopenharmony_ci } 4994bf215546Sopenharmony_ci } 4995bf215546Sopenharmony_ci 4996bf215546Sopenharmony_cidone: 4997bf215546Sopenharmony_ci pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 4998bf215546Sopenharmony_ci 4999bf215546Sopenharmony_ci if (creation_feedback) { 5000bf215546Sopenharmony_ci *creation_feedback->pPipelineCreationFeedback = pipeline_feedback; 5001bf215546Sopenharmony_ci 5002bf215546Sopenharmony_ci uint32_t stage_count = creation_feedback->pipelineStageCreationFeedbackCount; 5003bf215546Sopenharmony_ci assert(stage_count == 0 || stageCount == stage_count); 5004bf215546Sopenharmony_ci for (uint32_t i = 0; i < stage_count; i++) { 5005bf215546Sopenharmony_ci gl_shader_stage s = vk_to_mesa_shader_stage(pStages[i].stage); 5006bf215546Sopenharmony_ci creation_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback; 5007bf215546Sopenharmony_ci } 5008bf215546Sopenharmony_ci } 5009bf215546Sopenharmony_ci 5010bf215546Sopenharmony_ci return result; 5011bf215546Sopenharmony_ci} 5012bf215546Sopenharmony_ci 5013bf215546Sopenharmony_cistatic uint32_t 5014bf215546Sopenharmony_ciradv_pipeline_stage_to_user_data_0(struct radv_graphics_pipeline *pipeline, gl_shader_stage stage, 5015bf215546Sopenharmony_ci enum amd_gfx_level gfx_level) 5016bf215546Sopenharmony_ci{ 5017bf215546Sopenharmony_ci bool has_gs = radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY); 5018bf215546Sopenharmony_ci bool has_tess = radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL); 5019bf215546Sopenharmony_ci bool has_ngg = radv_pipeline_has_ngg(pipeline); 5020bf215546Sopenharmony_ci 5021bf215546Sopenharmony_ci switch (stage) { 5022bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 5023bf215546Sopenharmony_ci return R_00B030_SPI_SHADER_USER_DATA_PS_0; 5024bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 5025bf215546Sopenharmony_ci if (has_tess) { 5026bf215546Sopenharmony_ci if (gfx_level >= GFX10) { 5027bf215546Sopenharmony_ci return R_00B430_SPI_SHADER_USER_DATA_HS_0; 5028bf215546Sopenharmony_ci } else if (gfx_level == GFX9) { 5029bf215546Sopenharmony_ci return R_00B430_SPI_SHADER_USER_DATA_LS_0; 5030bf215546Sopenharmony_ci } else { 5031bf215546Sopenharmony_ci return R_00B530_SPI_SHADER_USER_DATA_LS_0; 5032bf215546Sopenharmony_ci } 5033bf215546Sopenharmony_ci } 5034bf215546Sopenharmony_ci 5035bf215546Sopenharmony_ci if (has_gs) { 5036bf215546Sopenharmony_ci if (gfx_level >= GFX10) { 5037bf215546Sopenharmony_ci return R_00B230_SPI_SHADER_USER_DATA_GS_0; 5038bf215546Sopenharmony_ci } else { 5039bf215546Sopenharmony_ci return R_00B330_SPI_SHADER_USER_DATA_ES_0; 5040bf215546Sopenharmony_ci } 5041bf215546Sopenharmony_ci } 5042bf215546Sopenharmony_ci 5043bf215546Sopenharmony_ci if (has_ngg) 5044bf215546Sopenharmony_ci return R_00B230_SPI_SHADER_USER_DATA_GS_0; 5045bf215546Sopenharmony_ci 5046bf215546Sopenharmony_ci return R_00B130_SPI_SHADER_USER_DATA_VS_0; 5047bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 5048bf215546Sopenharmony_ci return gfx_level == GFX9 ? R_00B330_SPI_SHADER_USER_DATA_ES_0 5049bf215546Sopenharmony_ci : R_00B230_SPI_SHADER_USER_DATA_GS_0; 5050bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: 5051bf215546Sopenharmony_ci case MESA_SHADER_TASK: 5052bf215546Sopenharmony_ci return R_00B900_COMPUTE_USER_DATA_0; 5053bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 5054bf215546Sopenharmony_ci return gfx_level == GFX9 ? R_00B430_SPI_SHADER_USER_DATA_LS_0 5055bf215546Sopenharmony_ci : R_00B430_SPI_SHADER_USER_DATA_HS_0; 5056bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 5057bf215546Sopenharmony_ci if (has_gs) { 5058bf215546Sopenharmony_ci return gfx_level >= GFX10 ? R_00B230_SPI_SHADER_USER_DATA_GS_0 5059bf215546Sopenharmony_ci : R_00B330_SPI_SHADER_USER_DATA_ES_0; 5060bf215546Sopenharmony_ci } else if (has_ngg) { 5061bf215546Sopenharmony_ci return R_00B230_SPI_SHADER_USER_DATA_GS_0; 5062bf215546Sopenharmony_ci } else { 5063bf215546Sopenharmony_ci return R_00B130_SPI_SHADER_USER_DATA_VS_0; 5064bf215546Sopenharmony_ci } 5065bf215546Sopenharmony_ci case MESA_SHADER_MESH: 5066bf215546Sopenharmony_ci assert(has_ngg); 5067bf215546Sopenharmony_ci return R_00B230_SPI_SHADER_USER_DATA_GS_0; 5068bf215546Sopenharmony_ci default: 5069bf215546Sopenharmony_ci unreachable("unknown shader"); 5070bf215546Sopenharmony_ci } 5071bf215546Sopenharmony_ci} 5072bf215546Sopenharmony_ci 5073bf215546Sopenharmony_cistruct radv_bin_size_entry { 5074bf215546Sopenharmony_ci unsigned bpp; 5075bf215546Sopenharmony_ci VkExtent2D extent; 5076bf215546Sopenharmony_ci}; 5077bf215546Sopenharmony_ci 5078bf215546Sopenharmony_cistatic VkExtent2D 5079bf215546Sopenharmony_ciradv_gfx9_compute_bin_size(const struct radv_graphics_pipeline *pipeline, 5080bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 5081bf215546Sopenharmony_ci{ 5082bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5083bf215546Sopenharmony_ci static const struct radv_bin_size_entry color_size_table[][3][9] = { 5084bf215546Sopenharmony_ci { 5085bf215546Sopenharmony_ci /* One RB / SE */ 5086bf215546Sopenharmony_ci { 5087bf215546Sopenharmony_ci /* One shader engine */ 5088bf215546Sopenharmony_ci {0, {128, 128}}, 5089bf215546Sopenharmony_ci {1, {64, 128}}, 5090bf215546Sopenharmony_ci {2, {32, 128}}, 5091bf215546Sopenharmony_ci {3, {16, 128}}, 5092bf215546Sopenharmony_ci {17, {0, 0}}, 5093bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5094bf215546Sopenharmony_ci }, 5095bf215546Sopenharmony_ci { 5096bf215546Sopenharmony_ci /* Two shader engines */ 5097bf215546Sopenharmony_ci {0, {128, 128}}, 5098bf215546Sopenharmony_ci {2, {64, 128}}, 5099bf215546Sopenharmony_ci {3, {32, 128}}, 5100bf215546Sopenharmony_ci {5, {16, 128}}, 5101bf215546Sopenharmony_ci {17, {0, 0}}, 5102bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5103bf215546Sopenharmony_ci }, 5104bf215546Sopenharmony_ci { 5105bf215546Sopenharmony_ci /* Four shader engines */ 5106bf215546Sopenharmony_ci {0, {128, 128}}, 5107bf215546Sopenharmony_ci {3, {64, 128}}, 5108bf215546Sopenharmony_ci {5, {16, 128}}, 5109bf215546Sopenharmony_ci {17, {0, 0}}, 5110bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5111bf215546Sopenharmony_ci }, 5112bf215546Sopenharmony_ci }, 5113bf215546Sopenharmony_ci { 5114bf215546Sopenharmony_ci /* Two RB / SE */ 5115bf215546Sopenharmony_ci { 5116bf215546Sopenharmony_ci /* One shader engine */ 5117bf215546Sopenharmony_ci {0, {128, 128}}, 5118bf215546Sopenharmony_ci {2, {64, 128}}, 5119bf215546Sopenharmony_ci {3, {32, 128}}, 5120bf215546Sopenharmony_ci {5, {16, 128}}, 5121bf215546Sopenharmony_ci {33, {0, 0}}, 5122bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5123bf215546Sopenharmony_ci }, 5124bf215546Sopenharmony_ci { 5125bf215546Sopenharmony_ci /* Two shader engines */ 5126bf215546Sopenharmony_ci {0, {128, 128}}, 5127bf215546Sopenharmony_ci {3, {64, 128}}, 5128bf215546Sopenharmony_ci {5, {32, 128}}, 5129bf215546Sopenharmony_ci {9, {16, 128}}, 5130bf215546Sopenharmony_ci {33, {0, 0}}, 5131bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5132bf215546Sopenharmony_ci }, 5133bf215546Sopenharmony_ci { 5134bf215546Sopenharmony_ci /* Four shader engines */ 5135bf215546Sopenharmony_ci {0, {256, 256}}, 5136bf215546Sopenharmony_ci {2, {128, 256}}, 5137bf215546Sopenharmony_ci {3, {128, 128}}, 5138bf215546Sopenharmony_ci {5, {64, 128}}, 5139bf215546Sopenharmony_ci {9, {16, 128}}, 5140bf215546Sopenharmony_ci {33, {0, 0}}, 5141bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5142bf215546Sopenharmony_ci }, 5143bf215546Sopenharmony_ci }, 5144bf215546Sopenharmony_ci { 5145bf215546Sopenharmony_ci /* Four RB / SE */ 5146bf215546Sopenharmony_ci { 5147bf215546Sopenharmony_ci /* One shader engine */ 5148bf215546Sopenharmony_ci {0, {128, 256}}, 5149bf215546Sopenharmony_ci {2, {128, 128}}, 5150bf215546Sopenharmony_ci {3, {64, 128}}, 5151bf215546Sopenharmony_ci {5, {32, 128}}, 5152bf215546Sopenharmony_ci {9, {16, 128}}, 5153bf215546Sopenharmony_ci {33, {0, 0}}, 5154bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5155bf215546Sopenharmony_ci }, 5156bf215546Sopenharmony_ci { 5157bf215546Sopenharmony_ci /* Two shader engines */ 5158bf215546Sopenharmony_ci {0, {256, 256}}, 5159bf215546Sopenharmony_ci {2, {128, 256}}, 5160bf215546Sopenharmony_ci {3, {128, 128}}, 5161bf215546Sopenharmony_ci {5, {64, 128}}, 5162bf215546Sopenharmony_ci {9, {32, 128}}, 5163bf215546Sopenharmony_ci {17, {16, 128}}, 5164bf215546Sopenharmony_ci {33, {0, 0}}, 5165bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5166bf215546Sopenharmony_ci }, 5167bf215546Sopenharmony_ci { 5168bf215546Sopenharmony_ci /* Four shader engines */ 5169bf215546Sopenharmony_ci {0, {256, 512}}, 5170bf215546Sopenharmony_ci {2, {256, 256}}, 5171bf215546Sopenharmony_ci {3, {128, 256}}, 5172bf215546Sopenharmony_ci {5, {128, 128}}, 5173bf215546Sopenharmony_ci {9, {64, 128}}, 5174bf215546Sopenharmony_ci {17, {16, 128}}, 5175bf215546Sopenharmony_ci {33, {0, 0}}, 5176bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5177bf215546Sopenharmony_ci }, 5178bf215546Sopenharmony_ci }, 5179bf215546Sopenharmony_ci }; 5180bf215546Sopenharmony_ci static const struct radv_bin_size_entry ds_size_table[][3][9] = { 5181bf215546Sopenharmony_ci { 5182bf215546Sopenharmony_ci // One RB / SE 5183bf215546Sopenharmony_ci { 5184bf215546Sopenharmony_ci // One shader engine 5185bf215546Sopenharmony_ci {0, {128, 256}}, 5186bf215546Sopenharmony_ci {2, {128, 128}}, 5187bf215546Sopenharmony_ci {4, {64, 128}}, 5188bf215546Sopenharmony_ci {7, {32, 128}}, 5189bf215546Sopenharmony_ci {13, {16, 128}}, 5190bf215546Sopenharmony_ci {49, {0, 0}}, 5191bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5192bf215546Sopenharmony_ci }, 5193bf215546Sopenharmony_ci { 5194bf215546Sopenharmony_ci // Two shader engines 5195bf215546Sopenharmony_ci {0, {256, 256}}, 5196bf215546Sopenharmony_ci {2, {128, 256}}, 5197bf215546Sopenharmony_ci {4, {128, 128}}, 5198bf215546Sopenharmony_ci {7, {64, 128}}, 5199bf215546Sopenharmony_ci {13, {32, 128}}, 5200bf215546Sopenharmony_ci {25, {16, 128}}, 5201bf215546Sopenharmony_ci {49, {0, 0}}, 5202bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5203bf215546Sopenharmony_ci }, 5204bf215546Sopenharmony_ci { 5205bf215546Sopenharmony_ci // Four shader engines 5206bf215546Sopenharmony_ci {0, {256, 512}}, 5207bf215546Sopenharmony_ci {2, {256, 256}}, 5208bf215546Sopenharmony_ci {4, {128, 256}}, 5209bf215546Sopenharmony_ci {7, {128, 128}}, 5210bf215546Sopenharmony_ci {13, {64, 128}}, 5211bf215546Sopenharmony_ci {25, {16, 128}}, 5212bf215546Sopenharmony_ci {49, {0, 0}}, 5213bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5214bf215546Sopenharmony_ci }, 5215bf215546Sopenharmony_ci }, 5216bf215546Sopenharmony_ci { 5217bf215546Sopenharmony_ci // Two RB / SE 5218bf215546Sopenharmony_ci { 5219bf215546Sopenharmony_ci // One shader engine 5220bf215546Sopenharmony_ci {0, {256, 256}}, 5221bf215546Sopenharmony_ci {2, {128, 256}}, 5222bf215546Sopenharmony_ci {4, {128, 128}}, 5223bf215546Sopenharmony_ci {7, {64, 128}}, 5224bf215546Sopenharmony_ci {13, {32, 128}}, 5225bf215546Sopenharmony_ci {25, {16, 128}}, 5226bf215546Sopenharmony_ci {97, {0, 0}}, 5227bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5228bf215546Sopenharmony_ci }, 5229bf215546Sopenharmony_ci { 5230bf215546Sopenharmony_ci // Two shader engines 5231bf215546Sopenharmony_ci {0, {256, 512}}, 5232bf215546Sopenharmony_ci {2, {256, 256}}, 5233bf215546Sopenharmony_ci {4, {128, 256}}, 5234bf215546Sopenharmony_ci {7, {128, 128}}, 5235bf215546Sopenharmony_ci {13, {64, 128}}, 5236bf215546Sopenharmony_ci {25, {32, 128}}, 5237bf215546Sopenharmony_ci {49, {16, 128}}, 5238bf215546Sopenharmony_ci {97, {0, 0}}, 5239bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5240bf215546Sopenharmony_ci }, 5241bf215546Sopenharmony_ci { 5242bf215546Sopenharmony_ci // Four shader engines 5243bf215546Sopenharmony_ci {0, {512, 512}}, 5244bf215546Sopenharmony_ci {2, {256, 512}}, 5245bf215546Sopenharmony_ci {4, {256, 256}}, 5246bf215546Sopenharmony_ci {7, {128, 256}}, 5247bf215546Sopenharmony_ci {13, {128, 128}}, 5248bf215546Sopenharmony_ci {25, {64, 128}}, 5249bf215546Sopenharmony_ci {49, {16, 128}}, 5250bf215546Sopenharmony_ci {97, {0, 0}}, 5251bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5252bf215546Sopenharmony_ci }, 5253bf215546Sopenharmony_ci }, 5254bf215546Sopenharmony_ci { 5255bf215546Sopenharmony_ci // Four RB / SE 5256bf215546Sopenharmony_ci { 5257bf215546Sopenharmony_ci // One shader engine 5258bf215546Sopenharmony_ci {0, {256, 512}}, 5259bf215546Sopenharmony_ci {2, {256, 256}}, 5260bf215546Sopenharmony_ci {4, {128, 256}}, 5261bf215546Sopenharmony_ci {7, {128, 128}}, 5262bf215546Sopenharmony_ci {13, {64, 128}}, 5263bf215546Sopenharmony_ci {25, {32, 128}}, 5264bf215546Sopenharmony_ci {49, {16, 128}}, 5265bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5266bf215546Sopenharmony_ci }, 5267bf215546Sopenharmony_ci { 5268bf215546Sopenharmony_ci // Two shader engines 5269bf215546Sopenharmony_ci {0, {512, 512}}, 5270bf215546Sopenharmony_ci {2, {256, 512}}, 5271bf215546Sopenharmony_ci {4, {256, 256}}, 5272bf215546Sopenharmony_ci {7, {128, 256}}, 5273bf215546Sopenharmony_ci {13, {128, 128}}, 5274bf215546Sopenharmony_ci {25, {64, 128}}, 5275bf215546Sopenharmony_ci {49, {32, 128}}, 5276bf215546Sopenharmony_ci {97, {16, 128}}, 5277bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5278bf215546Sopenharmony_ci }, 5279bf215546Sopenharmony_ci { 5280bf215546Sopenharmony_ci // Four shader engines 5281bf215546Sopenharmony_ci {0, {512, 512}}, 5282bf215546Sopenharmony_ci {4, {256, 512}}, 5283bf215546Sopenharmony_ci {7, {256, 256}}, 5284bf215546Sopenharmony_ci {13, {128, 256}}, 5285bf215546Sopenharmony_ci {25, {128, 128}}, 5286bf215546Sopenharmony_ci {49, {64, 128}}, 5287bf215546Sopenharmony_ci {97, {16, 128}}, 5288bf215546Sopenharmony_ci {UINT_MAX, {0, 0}}, 5289bf215546Sopenharmony_ci }, 5290bf215546Sopenharmony_ci }, 5291bf215546Sopenharmony_ci }; 5292bf215546Sopenharmony_ci 5293bf215546Sopenharmony_ci VkExtent2D extent = {512, 512}; 5294bf215546Sopenharmony_ci 5295bf215546Sopenharmony_ci unsigned log_num_rb_per_se = 5296bf215546Sopenharmony_ci util_logbase2_ceil(pdevice->rad_info.max_render_backends / pdevice->rad_info.max_se); 5297bf215546Sopenharmony_ci unsigned log_num_se = util_logbase2_ceil(pdevice->rad_info.max_se); 5298bf215546Sopenharmony_ci 5299bf215546Sopenharmony_ci unsigned total_samples = 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->ms.pa_sc_aa_config); 5300bf215546Sopenharmony_ci unsigned ps_iter_samples = 1u << G_028804_PS_ITER_SAMPLES(pipeline->ms.db_eqaa); 5301bf215546Sopenharmony_ci unsigned effective_samples = total_samples; 5302bf215546Sopenharmony_ci unsigned color_bytes_per_pixel = 0; 5303bf215546Sopenharmony_ci 5304bf215546Sopenharmony_ci for (unsigned i = 0; i < info->ri.color_att_count; i++) { 5305bf215546Sopenharmony_ci if (!info->cb.att[i].color_write_mask) 5306bf215546Sopenharmony_ci continue; 5307bf215546Sopenharmony_ci 5308bf215546Sopenharmony_ci if (info->ri.color_att_formats[i] == VK_FORMAT_UNDEFINED) 5309bf215546Sopenharmony_ci continue; 5310bf215546Sopenharmony_ci 5311bf215546Sopenharmony_ci color_bytes_per_pixel += vk_format_get_blocksize(info->ri.color_att_formats[i]); 5312bf215546Sopenharmony_ci } 5313bf215546Sopenharmony_ci 5314bf215546Sopenharmony_ci /* MSAA images typically don't use all samples all the time. */ 5315bf215546Sopenharmony_ci if (effective_samples >= 2 && ps_iter_samples <= 1) 5316bf215546Sopenharmony_ci effective_samples = 2; 5317bf215546Sopenharmony_ci color_bytes_per_pixel *= effective_samples; 5318bf215546Sopenharmony_ci 5319bf215546Sopenharmony_ci const struct radv_bin_size_entry *color_entry = color_size_table[log_num_rb_per_se][log_num_se]; 5320bf215546Sopenharmony_ci while (color_entry[1].bpp <= color_bytes_per_pixel) 5321bf215546Sopenharmony_ci ++color_entry; 5322bf215546Sopenharmony_ci 5323bf215546Sopenharmony_ci extent = color_entry->extent; 5324bf215546Sopenharmony_ci 5325bf215546Sopenharmony_ci if (radv_pipeline_has_ds_attachments(&info->ri)) { 5326bf215546Sopenharmony_ci /* Coefficients taken from AMDVLK */ 5327bf215546Sopenharmony_ci unsigned depth_coeff = info->ri.depth_att_format != VK_FORMAT_UNDEFINED ? 5 : 0; 5328bf215546Sopenharmony_ci unsigned stencil_coeff = info->ri.stencil_att_format != VK_FORMAT_UNDEFINED ? 1 : 0; 5329bf215546Sopenharmony_ci unsigned ds_bytes_per_pixel = 4 * (depth_coeff + stencil_coeff) * total_samples; 5330bf215546Sopenharmony_ci 5331bf215546Sopenharmony_ci const struct radv_bin_size_entry *ds_entry = ds_size_table[log_num_rb_per_se][log_num_se]; 5332bf215546Sopenharmony_ci while (ds_entry[1].bpp <= ds_bytes_per_pixel) 5333bf215546Sopenharmony_ci ++ds_entry; 5334bf215546Sopenharmony_ci 5335bf215546Sopenharmony_ci if (ds_entry->extent.width * ds_entry->extent.height < extent.width * extent.height) 5336bf215546Sopenharmony_ci extent = ds_entry->extent; 5337bf215546Sopenharmony_ci } 5338bf215546Sopenharmony_ci 5339bf215546Sopenharmony_ci return extent; 5340bf215546Sopenharmony_ci} 5341bf215546Sopenharmony_ci 5342bf215546Sopenharmony_cistatic VkExtent2D 5343bf215546Sopenharmony_ciradv_gfx10_compute_bin_size(const struct radv_graphics_pipeline *pipeline, 5344bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 5345bf215546Sopenharmony_ci{ 5346bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5347bf215546Sopenharmony_ci VkExtent2D extent = {512, 512}; 5348bf215546Sopenharmony_ci 5349bf215546Sopenharmony_ci const unsigned db_tag_size = 64; 5350bf215546Sopenharmony_ci const unsigned db_tag_count = 312; 5351bf215546Sopenharmony_ci const unsigned color_tag_size = 1024; 5352bf215546Sopenharmony_ci const unsigned color_tag_count = 31; 5353bf215546Sopenharmony_ci const unsigned fmask_tag_size = 256; 5354bf215546Sopenharmony_ci const unsigned fmask_tag_count = 44; 5355bf215546Sopenharmony_ci 5356bf215546Sopenharmony_ci const unsigned rb_count = pdevice->rad_info.max_render_backends; 5357bf215546Sopenharmony_ci const unsigned pipe_count = MAX2(rb_count, pdevice->rad_info.num_tcc_blocks); 5358bf215546Sopenharmony_ci 5359bf215546Sopenharmony_ci const unsigned db_tag_part = (db_tag_count * rb_count / pipe_count) * db_tag_size * pipe_count; 5360bf215546Sopenharmony_ci const unsigned color_tag_part = 5361bf215546Sopenharmony_ci (color_tag_count * rb_count / pipe_count) * color_tag_size * pipe_count; 5362bf215546Sopenharmony_ci const unsigned fmask_tag_part = 5363bf215546Sopenharmony_ci (fmask_tag_count * rb_count / pipe_count) * fmask_tag_size * pipe_count; 5364bf215546Sopenharmony_ci 5365bf215546Sopenharmony_ci const unsigned total_samples = 5366bf215546Sopenharmony_ci 1u << G_028BE0_MSAA_NUM_SAMPLES(pipeline->ms.pa_sc_aa_config); 5367bf215546Sopenharmony_ci const unsigned samples_log = util_logbase2_ceil(total_samples); 5368bf215546Sopenharmony_ci 5369bf215546Sopenharmony_ci unsigned color_bytes_per_pixel = 0; 5370bf215546Sopenharmony_ci unsigned fmask_bytes_per_pixel = 0; 5371bf215546Sopenharmony_ci 5372bf215546Sopenharmony_ci for (unsigned i = 0; i < info->ri.color_att_count; i++) { 5373bf215546Sopenharmony_ci if (!info->cb.att[i].color_write_mask) 5374bf215546Sopenharmony_ci continue; 5375bf215546Sopenharmony_ci 5376bf215546Sopenharmony_ci if (info->ri.color_att_formats[i] == VK_FORMAT_UNDEFINED) 5377bf215546Sopenharmony_ci continue; 5378bf215546Sopenharmony_ci 5379bf215546Sopenharmony_ci color_bytes_per_pixel += vk_format_get_blocksize(info->ri.color_att_formats[i]); 5380bf215546Sopenharmony_ci 5381bf215546Sopenharmony_ci if (total_samples > 1) { 5382bf215546Sopenharmony_ci assert(samples_log <= 3); 5383bf215546Sopenharmony_ci const unsigned fmask_array[] = {0, 1, 1, 4}; 5384bf215546Sopenharmony_ci fmask_bytes_per_pixel += fmask_array[samples_log]; 5385bf215546Sopenharmony_ci } 5386bf215546Sopenharmony_ci } 5387bf215546Sopenharmony_ci 5388bf215546Sopenharmony_ci color_bytes_per_pixel *= total_samples; 5389bf215546Sopenharmony_ci color_bytes_per_pixel = MAX2(color_bytes_per_pixel, 1); 5390bf215546Sopenharmony_ci 5391bf215546Sopenharmony_ci const unsigned color_pixel_count_log = util_logbase2(color_tag_part / color_bytes_per_pixel); 5392bf215546Sopenharmony_ci extent.width = 1ull << ((color_pixel_count_log + 1) / 2); 5393bf215546Sopenharmony_ci extent.height = 1ull << (color_pixel_count_log / 2); 5394bf215546Sopenharmony_ci 5395bf215546Sopenharmony_ci if (fmask_bytes_per_pixel) { 5396bf215546Sopenharmony_ci const unsigned fmask_pixel_count_log = util_logbase2(fmask_tag_part / fmask_bytes_per_pixel); 5397bf215546Sopenharmony_ci 5398bf215546Sopenharmony_ci const VkExtent2D fmask_extent = 5399bf215546Sopenharmony_ci (VkExtent2D){.width = 1ull << ((fmask_pixel_count_log + 1) / 2), 5400bf215546Sopenharmony_ci .height = 1ull << (color_pixel_count_log / 2)}; 5401bf215546Sopenharmony_ci 5402bf215546Sopenharmony_ci if (fmask_extent.width * fmask_extent.height < extent.width * extent.height) 5403bf215546Sopenharmony_ci extent = fmask_extent; 5404bf215546Sopenharmony_ci } 5405bf215546Sopenharmony_ci 5406bf215546Sopenharmony_ci if (radv_pipeline_has_ds_attachments(&info->ri)) { 5407bf215546Sopenharmony_ci /* Coefficients taken from AMDVLK */ 5408bf215546Sopenharmony_ci unsigned depth_coeff = info->ri.depth_att_format != VK_FORMAT_UNDEFINED ? 5 : 0; 5409bf215546Sopenharmony_ci unsigned stencil_coeff = info->ri.stencil_att_format != VK_FORMAT_UNDEFINED ? 1 : 0; 5410bf215546Sopenharmony_ci unsigned db_bytes_per_pixel = (depth_coeff + stencil_coeff) * total_samples; 5411bf215546Sopenharmony_ci 5412bf215546Sopenharmony_ci const unsigned db_pixel_count_log = util_logbase2(db_tag_part / db_bytes_per_pixel); 5413bf215546Sopenharmony_ci 5414bf215546Sopenharmony_ci const VkExtent2D db_extent = (VkExtent2D){.width = 1ull << ((db_pixel_count_log + 1) / 2), 5415bf215546Sopenharmony_ci .height = 1ull << (color_pixel_count_log / 2)}; 5416bf215546Sopenharmony_ci 5417bf215546Sopenharmony_ci if (db_extent.width * db_extent.height < extent.width * extent.height) 5418bf215546Sopenharmony_ci extent = db_extent; 5419bf215546Sopenharmony_ci } 5420bf215546Sopenharmony_ci 5421bf215546Sopenharmony_ci extent.width = MAX2(extent.width, 128); 5422bf215546Sopenharmony_ci extent.height = MAX2(extent.width, 64); 5423bf215546Sopenharmony_ci 5424bf215546Sopenharmony_ci return extent; 5425bf215546Sopenharmony_ci} 5426bf215546Sopenharmony_ci 5427bf215546Sopenharmony_cistatic void 5428bf215546Sopenharmony_ciradv_pipeline_init_disabled_binning_state(struct radv_graphics_pipeline *pipeline, 5429bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 5430bf215546Sopenharmony_ci{ 5431bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5432bf215546Sopenharmony_ci uint32_t pa_sc_binner_cntl_0 = S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_LEGACY_SC) | 5433bf215546Sopenharmony_ci S_028C44_DISABLE_START_OF_PRIM(1); 5434bf215546Sopenharmony_ci 5435bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10) { 5436bf215546Sopenharmony_ci unsigned min_bytes_per_pixel = 0; 5437bf215546Sopenharmony_ci 5438bf215546Sopenharmony_ci for (unsigned i = 0; i < info->ri.color_att_count; i++) { 5439bf215546Sopenharmony_ci if (!info->cb.att[i].color_write_mask) 5440bf215546Sopenharmony_ci continue; 5441bf215546Sopenharmony_ci 5442bf215546Sopenharmony_ci if (info->ri.color_att_formats[i] == VK_FORMAT_UNDEFINED) 5443bf215546Sopenharmony_ci continue; 5444bf215546Sopenharmony_ci 5445bf215546Sopenharmony_ci unsigned bytes = vk_format_get_blocksize(info->ri.color_att_formats[i]); 5446bf215546Sopenharmony_ci if (!min_bytes_per_pixel || bytes < min_bytes_per_pixel) 5447bf215546Sopenharmony_ci min_bytes_per_pixel = bytes; 5448bf215546Sopenharmony_ci } 5449bf215546Sopenharmony_ci 5450bf215546Sopenharmony_ci pa_sc_binner_cntl_0 = 5451bf215546Sopenharmony_ci S_028C44_BINNING_MODE(V_028C44_DISABLE_BINNING_USE_NEW_SC) | S_028C44_BIN_SIZE_X(0) | 5452bf215546Sopenharmony_ci S_028C44_BIN_SIZE_Y(0) | S_028C44_BIN_SIZE_X_EXTEND(2) | /* 128 */ 5453bf215546Sopenharmony_ci S_028C44_BIN_SIZE_Y_EXTEND(min_bytes_per_pixel <= 4 ? 2 : 1) | /* 128 or 64 */ 5454bf215546Sopenharmony_ci S_028C44_DISABLE_START_OF_PRIM(1); 5455bf215546Sopenharmony_ci } 5456bf215546Sopenharmony_ci 5457bf215546Sopenharmony_ci pipeline->binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0; 5458bf215546Sopenharmony_ci} 5459bf215546Sopenharmony_ci 5460bf215546Sopenharmony_cistruct radv_binning_settings 5461bf215546Sopenharmony_ciradv_get_binning_settings(const struct radv_physical_device *pdev) 5462bf215546Sopenharmony_ci{ 5463bf215546Sopenharmony_ci struct radv_binning_settings settings; 5464bf215546Sopenharmony_ci if (pdev->rad_info.has_dedicated_vram) { 5465bf215546Sopenharmony_ci if (pdev->rad_info.max_render_backends > 4) { 5466bf215546Sopenharmony_ci settings.context_states_per_bin = 1; 5467bf215546Sopenharmony_ci settings.persistent_states_per_bin = 1; 5468bf215546Sopenharmony_ci } else { 5469bf215546Sopenharmony_ci settings.context_states_per_bin = 3; 5470bf215546Sopenharmony_ci settings.persistent_states_per_bin = 8; 5471bf215546Sopenharmony_ci } 5472bf215546Sopenharmony_ci settings.fpovs_per_batch = 63; 5473bf215546Sopenharmony_ci } else { 5474bf215546Sopenharmony_ci /* The context states are affected by the scissor bug. */ 5475bf215546Sopenharmony_ci settings.context_states_per_bin = 6; 5476bf215546Sopenharmony_ci /* 32 causes hangs for RAVEN. */ 5477bf215546Sopenharmony_ci settings.persistent_states_per_bin = 16; 5478bf215546Sopenharmony_ci settings.fpovs_per_batch = 63; 5479bf215546Sopenharmony_ci } 5480bf215546Sopenharmony_ci 5481bf215546Sopenharmony_ci if (pdev->rad_info.has_gfx9_scissor_bug) 5482bf215546Sopenharmony_ci settings.context_states_per_bin = 1; 5483bf215546Sopenharmony_ci 5484bf215546Sopenharmony_ci return settings; 5485bf215546Sopenharmony_ci} 5486bf215546Sopenharmony_ci 5487bf215546Sopenharmony_cistatic void 5488bf215546Sopenharmony_ciradv_pipeline_init_binning_state(struct radv_graphics_pipeline *pipeline, 5489bf215546Sopenharmony_ci const struct radv_blend_state *blend, 5490bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 5491bf215546Sopenharmony_ci{ 5492bf215546Sopenharmony_ci const struct radv_device *device = pipeline->base.device; 5493bf215546Sopenharmony_ci 5494bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level < GFX9) 5495bf215546Sopenharmony_ci return; 5496bf215546Sopenharmony_ci 5497bf215546Sopenharmony_ci VkExtent2D bin_size; 5498bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX10) { 5499bf215546Sopenharmony_ci bin_size = radv_gfx10_compute_bin_size(pipeline, info); 5500bf215546Sopenharmony_ci } else if (device->physical_device->rad_info.gfx_level == GFX9) { 5501bf215546Sopenharmony_ci bin_size = radv_gfx9_compute_bin_size(pipeline, info); 5502bf215546Sopenharmony_ci } else 5503bf215546Sopenharmony_ci unreachable("Unhandled generation for binning bin size calculation"); 5504bf215546Sopenharmony_ci 5505bf215546Sopenharmony_ci if (device->pbb_allowed && bin_size.width && bin_size.height) { 5506bf215546Sopenharmony_ci struct radv_binning_settings settings = radv_get_binning_settings(device->physical_device); 5507bf215546Sopenharmony_ci 5508bf215546Sopenharmony_ci const uint32_t pa_sc_binner_cntl_0 = 5509bf215546Sopenharmony_ci S_028C44_BINNING_MODE(V_028C44_BINNING_ALLOWED) | 5510bf215546Sopenharmony_ci S_028C44_BIN_SIZE_X(bin_size.width == 16) | S_028C44_BIN_SIZE_Y(bin_size.height == 16) | 5511bf215546Sopenharmony_ci S_028C44_BIN_SIZE_X_EXTEND(util_logbase2(MAX2(bin_size.width, 32)) - 5) | 5512bf215546Sopenharmony_ci S_028C44_BIN_SIZE_Y_EXTEND(util_logbase2(MAX2(bin_size.height, 32)) - 5) | 5513bf215546Sopenharmony_ci S_028C44_CONTEXT_STATES_PER_BIN(settings.context_states_per_bin - 1) | 5514bf215546Sopenharmony_ci S_028C44_PERSISTENT_STATES_PER_BIN(settings.persistent_states_per_bin - 1) | 5515bf215546Sopenharmony_ci S_028C44_DISABLE_START_OF_PRIM(1) | 5516bf215546Sopenharmony_ci S_028C44_FPOVS_PER_BATCH(settings.fpovs_per_batch) | S_028C44_OPTIMAL_BIN_SELECTION(1); 5517bf215546Sopenharmony_ci 5518bf215546Sopenharmony_ci pipeline->binning.pa_sc_binner_cntl_0 = pa_sc_binner_cntl_0; 5519bf215546Sopenharmony_ci } else 5520bf215546Sopenharmony_ci radv_pipeline_init_disabled_binning_state(pipeline, info); 5521bf215546Sopenharmony_ci} 5522bf215546Sopenharmony_ci 5523bf215546Sopenharmony_cistatic void 5524bf215546Sopenharmony_ciradv_pipeline_emit_depth_stencil_state(struct radeon_cmdbuf *ctx_cs, 5525bf215546Sopenharmony_ci const struct radv_depth_stencil_state *ds_state) 5526bf215546Sopenharmony_ci{ 5527bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028000_DB_RENDER_CONTROL, ds_state->db_render_control); 5528bf215546Sopenharmony_ci 5529bf215546Sopenharmony_ci radeon_set_context_reg_seq(ctx_cs, R_02800C_DB_RENDER_OVERRIDE, 2); 5530bf215546Sopenharmony_ci radeon_emit(ctx_cs, ds_state->db_render_override); 5531bf215546Sopenharmony_ci radeon_emit(ctx_cs, ds_state->db_render_override2); 5532bf215546Sopenharmony_ci} 5533bf215546Sopenharmony_ci 5534bf215546Sopenharmony_cistatic void 5535bf215546Sopenharmony_ciradv_pipeline_emit_blend_state(struct radeon_cmdbuf *ctx_cs, 5536bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, 5537bf215546Sopenharmony_ci const struct radv_blend_state *blend) 5538bf215546Sopenharmony_ci{ 5539bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5540bf215546Sopenharmony_ci 5541bf215546Sopenharmony_ci radeon_set_context_reg_seq(ctx_cs, R_028780_CB_BLEND0_CONTROL, 8); 5542bf215546Sopenharmony_ci radeon_emit_array(ctx_cs, blend->cb_blend_control, 8); 5543bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028B70_DB_ALPHA_TO_MASK, blend->db_alpha_to_mask); 5544bf215546Sopenharmony_ci 5545bf215546Sopenharmony_ci if (pdevice->rad_info.has_rbplus) { 5546bf215546Sopenharmony_ci 5547bf215546Sopenharmony_ci radeon_set_context_reg_seq(ctx_cs, R_028760_SX_MRT0_BLEND_OPT, 8); 5548bf215546Sopenharmony_ci radeon_emit_array(ctx_cs, blend->sx_mrt_blend_opt, 8); 5549bf215546Sopenharmony_ci } 5550bf215546Sopenharmony_ci 5551bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028714_SPI_SHADER_COL_FORMAT, blend->spi_shader_col_format); 5552bf215546Sopenharmony_ci 5553bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_02823C_CB_SHADER_MASK, blend->cb_shader_mask); 5554bf215546Sopenharmony_ci} 5555bf215546Sopenharmony_ci 5556bf215546Sopenharmony_cistatic void 5557bf215546Sopenharmony_ciradv_pipeline_emit_raster_state(struct radeon_cmdbuf *ctx_cs, 5558bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, 5559bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 5560bf215546Sopenharmony_ci{ 5561bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5562bf215546Sopenharmony_ci const VkConservativeRasterizationModeEXT mode = info->rs.conservative_mode; 5563bf215546Sopenharmony_ci uint32_t pa_sc_conservative_rast = S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1); 5564bf215546Sopenharmony_ci 5565bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX9) { 5566bf215546Sopenharmony_ci /* Conservative rasterization. */ 5567bf215546Sopenharmony_ci if (mode != VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT) { 5568bf215546Sopenharmony_ci pa_sc_conservative_rast = S_028C4C_PREZ_AA_MASK_ENABLE(1) | S_028C4C_POSTZ_AA_MASK_ENABLE(1) | 5569bf215546Sopenharmony_ci S_028C4C_CENTROID_SAMPLE_OVERRIDE(1); 5570bf215546Sopenharmony_ci 5571bf215546Sopenharmony_ci if (mode == VK_CONSERVATIVE_RASTERIZATION_MODE_OVERESTIMATE_EXT) { 5572bf215546Sopenharmony_ci pa_sc_conservative_rast |= 5573bf215546Sopenharmony_ci S_028C4C_OVER_RAST_ENABLE(1) | S_028C4C_OVER_RAST_SAMPLE_SELECT(0) | 5574bf215546Sopenharmony_ci S_028C4C_UNDER_RAST_ENABLE(0) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(1) | 5575bf215546Sopenharmony_ci S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(1); 5576bf215546Sopenharmony_ci } else { 5577bf215546Sopenharmony_ci assert(mode == VK_CONSERVATIVE_RASTERIZATION_MODE_UNDERESTIMATE_EXT); 5578bf215546Sopenharmony_ci pa_sc_conservative_rast |= 5579bf215546Sopenharmony_ci S_028C4C_OVER_RAST_ENABLE(0) | S_028C4C_OVER_RAST_SAMPLE_SELECT(1) | 5580bf215546Sopenharmony_ci S_028C4C_UNDER_RAST_ENABLE(1) | S_028C4C_UNDER_RAST_SAMPLE_SELECT(0) | 5581bf215546Sopenharmony_ci S_028C4C_PBB_UNCERTAINTY_REGION_ENABLE(0); 5582bf215546Sopenharmony_ci } 5583bf215546Sopenharmony_ci } 5584bf215546Sopenharmony_ci 5585bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 5586bf215546Sopenharmony_ci pa_sc_conservative_rast); 5587bf215546Sopenharmony_ci } 5588bf215546Sopenharmony_ci} 5589bf215546Sopenharmony_ci 5590bf215546Sopenharmony_cistatic void 5591bf215546Sopenharmony_ciradv_pipeline_emit_multisample_state(struct radeon_cmdbuf *ctx_cs, 5592bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 5593bf215546Sopenharmony_ci{ 5594bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5595bf215546Sopenharmony_ci const struct radv_multisample_state *ms = &pipeline->ms; 5596bf215546Sopenharmony_ci 5597bf215546Sopenharmony_ci radeon_set_context_reg_seq(ctx_cs, R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 5598bf215546Sopenharmony_ci radeon_emit(ctx_cs, ms->pa_sc_aa_mask[0]); 5599bf215546Sopenharmony_ci radeon_emit(ctx_cs, ms->pa_sc_aa_mask[1]); 5600bf215546Sopenharmony_ci 5601bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028804_DB_EQAA, ms->db_eqaa); 5602bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028BE0_PA_SC_AA_CONFIG, ms->pa_sc_aa_config); 5603bf215546Sopenharmony_ci 5604bf215546Sopenharmony_ci radeon_set_context_reg_seq(ctx_cs, R_028A48_PA_SC_MODE_CNTL_0, 2); 5605bf215546Sopenharmony_ci radeon_emit(ctx_cs, ms->pa_sc_mode_cntl_0); 5606bf215546Sopenharmony_ci radeon_emit(ctx_cs, ms->pa_sc_mode_cntl_1); 5607bf215546Sopenharmony_ci 5608bf215546Sopenharmony_ci /* The exclusion bits can be set to improve rasterization efficiency 5609bf215546Sopenharmony_ci * if no sample lies on the pixel boundary (-8 sample offset). It's 5610bf215546Sopenharmony_ci * currently always TRUE because the driver doesn't support 16 samples. 5611bf215546Sopenharmony_ci */ 5612bf215546Sopenharmony_ci bool exclusion = pdevice->rad_info.gfx_level >= GFX7; 5613bf215546Sopenharmony_ci radeon_set_context_reg( 5614bf215546Sopenharmony_ci ctx_cs, R_02882C_PA_SU_PRIM_FILTER_CNTL, 5615bf215546Sopenharmony_ci S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); 5616bf215546Sopenharmony_ci} 5617bf215546Sopenharmony_ci 5618bf215546Sopenharmony_cistatic void 5619bf215546Sopenharmony_ciradv_pipeline_emit_vgt_gs_mode(struct radeon_cmdbuf *ctx_cs, 5620bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 5621bf215546Sopenharmony_ci{ 5622bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5623bf215546Sopenharmony_ci const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); 5624bf215546Sopenharmony_ci const struct radv_shader *vs = pipeline->base.shaders[MESA_SHADER_TESS_EVAL] 5625bf215546Sopenharmony_ci ? pipeline->base.shaders[MESA_SHADER_TESS_EVAL] 5626bf215546Sopenharmony_ci : pipeline->base.shaders[MESA_SHADER_VERTEX]; 5627bf215546Sopenharmony_ci unsigned vgt_primitiveid_en = 0; 5628bf215546Sopenharmony_ci uint32_t vgt_gs_mode = 0; 5629bf215546Sopenharmony_ci 5630bf215546Sopenharmony_ci if (radv_pipeline_has_ngg(pipeline)) 5631bf215546Sopenharmony_ci return; 5632bf215546Sopenharmony_ci 5633bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 5634bf215546Sopenharmony_ci const struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY]; 5635bf215546Sopenharmony_ci 5636bf215546Sopenharmony_ci vgt_gs_mode = ac_vgt_gs_mode(gs->info.gs.vertices_out, pdevice->rad_info.gfx_level); 5637bf215546Sopenharmony_ci } else if (outinfo->export_prim_id || vs->info.uses_prim_id) { 5638bf215546Sopenharmony_ci vgt_gs_mode = S_028A40_MODE(V_028A40_GS_SCENARIO_A); 5639bf215546Sopenharmony_ci vgt_primitiveid_en |= S_028A84_PRIMITIVEID_EN(1); 5640bf215546Sopenharmony_ci } 5641bf215546Sopenharmony_ci 5642bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, vgt_primitiveid_en); 5643bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028A40_VGT_GS_MODE, vgt_gs_mode); 5644bf215546Sopenharmony_ci} 5645bf215546Sopenharmony_ci 5646bf215546Sopenharmony_cistatic void 5647bf215546Sopenharmony_ciradv_pipeline_emit_hw_vs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 5648bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, const struct radv_shader *shader) 5649bf215546Sopenharmony_ci{ 5650bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5651bf215546Sopenharmony_ci uint64_t va = radv_shader_get_va(shader); 5652bf215546Sopenharmony_ci 5653bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B120_SPI_SHADER_PGM_LO_VS, 4); 5654bf215546Sopenharmony_ci radeon_emit(cs, va >> 8); 5655bf215546Sopenharmony_ci radeon_emit(cs, S_00B124_MEM_BASE(va >> 40)); 5656bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc1); 5657bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc2); 5658bf215546Sopenharmony_ci 5659bf215546Sopenharmony_ci const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); 5660bf215546Sopenharmony_ci unsigned clip_dist_mask, cull_dist_mask, total_mask; 5661bf215546Sopenharmony_ci clip_dist_mask = outinfo->clip_dist_mask; 5662bf215546Sopenharmony_ci cull_dist_mask = outinfo->cull_dist_mask; 5663bf215546Sopenharmony_ci total_mask = clip_dist_mask | cull_dist_mask; 5664bf215546Sopenharmony_ci 5665bf215546Sopenharmony_ci bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || 5666bf215546Sopenharmony_ci outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate; 5667bf215546Sopenharmony_ci unsigned spi_vs_out_config, nparams; 5668bf215546Sopenharmony_ci 5669bf215546Sopenharmony_ci /* VS is required to export at least one param. */ 5670bf215546Sopenharmony_ci nparams = MAX2(outinfo->param_exports, 1); 5671bf215546Sopenharmony_ci spi_vs_out_config = S_0286C4_VS_EXPORT_COUNT(nparams - 1); 5672bf215546Sopenharmony_ci 5673bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10) { 5674bf215546Sopenharmony_ci spi_vs_out_config |= S_0286C4_NO_PC_EXPORT(outinfo->param_exports == 0); 5675bf215546Sopenharmony_ci } 5676bf215546Sopenharmony_ci 5677bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, spi_vs_out_config); 5678bf215546Sopenharmony_ci 5679bf215546Sopenharmony_ci radeon_set_context_reg( 5680bf215546Sopenharmony_ci ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT, 5681bf215546Sopenharmony_ci S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | 5682bf215546Sopenharmony_ci S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP 5683bf215546Sopenharmony_ci : V_02870C_SPI_SHADER_NONE) | 5684bf215546Sopenharmony_ci S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP 5685bf215546Sopenharmony_ci : V_02870C_SPI_SHADER_NONE) | 5686bf215546Sopenharmony_ci S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP 5687bf215546Sopenharmony_ci : V_02870C_SPI_SHADER_NONE)); 5688bf215546Sopenharmony_ci 5689bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL, 5690bf215546Sopenharmony_ci S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | 5691bf215546Sopenharmony_ci S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | 5692bf215546Sopenharmony_ci S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | 5693bf215546Sopenharmony_ci S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) | 5694bf215546Sopenharmony_ci S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | 5695bf215546Sopenharmony_ci S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | 5696bf215546Sopenharmony_ci S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | 5697bf215546Sopenharmony_ci S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | 5698bf215546Sopenharmony_ci total_mask << 8 | clip_dist_mask); 5699bf215546Sopenharmony_ci 5700bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level <= GFX8) 5701bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028AB4_VGT_REUSE_OFF, outinfo->writes_viewport_index); 5702bf215546Sopenharmony_ci 5703bf215546Sopenharmony_ci unsigned late_alloc_wave64, cu_mask; 5704bf215546Sopenharmony_ci ac_compute_late_alloc(&pdevice->rad_info, false, false, shader->config.scratch_bytes_per_wave > 0, 5705bf215546Sopenharmony_ci &late_alloc_wave64, &cu_mask); 5706bf215546Sopenharmony_ci 5707bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX7) { 5708bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10) { 5709bf215546Sopenharmony_ci ac_set_reg_cu_en(cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 5710bf215546Sopenharmony_ci S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F), 5711bf215546Sopenharmony_ci C_00B118_CU_EN, 0, &pdevice->rad_info, 5712bf215546Sopenharmony_ci (void*)gfx10_set_sh_reg_idx3); 5713bf215546Sopenharmony_ci } else { 5714bf215546Sopenharmony_ci radeon_set_sh_reg_idx(pdevice, cs, R_00B118_SPI_SHADER_PGM_RSRC3_VS, 3, 5715bf215546Sopenharmony_ci S_00B118_CU_EN(cu_mask) | S_00B118_WAVE_LIMIT(0x3F)); 5716bf215546Sopenharmony_ci } 5717bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B11C_SPI_SHADER_LATE_ALLOC_VS, S_00B11C_LIMIT(late_alloc_wave64)); 5718bf215546Sopenharmony_ci } 5719bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10) { 5720bf215546Sopenharmony_ci uint32_t oversub_pc_lines = late_alloc_wave64 ? pdevice->rad_info.pc_lines / 4 : 0; 5721bf215546Sopenharmony_ci gfx10_emit_ge_pc_alloc(cs, pdevice->rad_info.gfx_level, oversub_pc_lines); 5722bf215546Sopenharmony_ci } 5723bf215546Sopenharmony_ci} 5724bf215546Sopenharmony_ci 5725bf215546Sopenharmony_cistatic void 5726bf215546Sopenharmony_ciradv_pipeline_emit_hw_es(struct radeon_cmdbuf *cs, const struct radv_graphics_pipeline *pipeline, 5727bf215546Sopenharmony_ci const struct radv_shader *shader) 5728bf215546Sopenharmony_ci{ 5729bf215546Sopenharmony_ci uint64_t va = radv_shader_get_va(shader); 5730bf215546Sopenharmony_ci 5731bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B320_SPI_SHADER_PGM_LO_ES, 4); 5732bf215546Sopenharmony_ci radeon_emit(cs, va >> 8); 5733bf215546Sopenharmony_ci radeon_emit(cs, S_00B324_MEM_BASE(va >> 40)); 5734bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc1); 5735bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc2); 5736bf215546Sopenharmony_ci} 5737bf215546Sopenharmony_ci 5738bf215546Sopenharmony_cistatic void 5739bf215546Sopenharmony_ciradv_pipeline_emit_hw_ls(struct radeon_cmdbuf *cs, const struct radv_graphics_pipeline *pipeline, 5740bf215546Sopenharmony_ci const struct radv_shader *shader) 5741bf215546Sopenharmony_ci{ 5742bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5743bf215546Sopenharmony_ci unsigned num_lds_blocks = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.tcs.num_lds_blocks; 5744bf215546Sopenharmony_ci uint64_t va = radv_shader_get_va(shader); 5745bf215546Sopenharmony_ci uint32_t rsrc2 = shader->config.rsrc2; 5746bf215546Sopenharmony_ci 5747bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); 5748bf215546Sopenharmony_ci 5749bf215546Sopenharmony_ci rsrc2 |= S_00B52C_LDS_SIZE(num_lds_blocks); 5750bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level == GFX7 && pdevice->rad_info.family != CHIP_HAWAII) 5751bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B52C_SPI_SHADER_PGM_RSRC2_LS, rsrc2); 5752bf215546Sopenharmony_ci 5753bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B528_SPI_SHADER_PGM_RSRC1_LS, 2); 5754bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc1); 5755bf215546Sopenharmony_ci radeon_emit(cs, rsrc2); 5756bf215546Sopenharmony_ci} 5757bf215546Sopenharmony_ci 5758bf215546Sopenharmony_cistatic void 5759bf215546Sopenharmony_ciradv_pipeline_emit_hw_ngg(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 5760bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, 5761bf215546Sopenharmony_ci const struct radv_shader *shader) 5762bf215546Sopenharmony_ci{ 5763bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5764bf215546Sopenharmony_ci uint64_t va = radv_shader_get_va(shader); 5765bf215546Sopenharmony_ci gl_shader_stage es_type = 5766bf215546Sopenharmony_ci radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH) ? MESA_SHADER_MESH : 5767bf215546Sopenharmony_ci radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) ? MESA_SHADER_TESS_EVAL : MESA_SHADER_VERTEX; 5768bf215546Sopenharmony_ci struct radv_shader *es = pipeline->base.shaders[es_type]; 5769bf215546Sopenharmony_ci const struct gfx10_ngg_info *ngg_state = &shader->info.ngg_info; 5770bf215546Sopenharmony_ci 5771bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); 5772bf215546Sopenharmony_ci 5773bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2); 5774bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc1); 5775bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc2); 5776bf215546Sopenharmony_ci 5777bf215546Sopenharmony_ci const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); 5778bf215546Sopenharmony_ci unsigned clip_dist_mask, cull_dist_mask, total_mask; 5779bf215546Sopenharmony_ci clip_dist_mask = outinfo->clip_dist_mask; 5780bf215546Sopenharmony_ci cull_dist_mask = outinfo->cull_dist_mask; 5781bf215546Sopenharmony_ci total_mask = clip_dist_mask | cull_dist_mask; 5782bf215546Sopenharmony_ci 5783bf215546Sopenharmony_ci bool misc_vec_ena = outinfo->writes_pointsize || outinfo->writes_layer || 5784bf215546Sopenharmony_ci outinfo->writes_viewport_index || outinfo->writes_primitive_shading_rate; 5785bf215546Sopenharmony_ci bool es_enable_prim_id = outinfo->export_prim_id || (es && es->info.uses_prim_id); 5786bf215546Sopenharmony_ci bool break_wave_at_eoi = false; 5787bf215546Sopenharmony_ci unsigned ge_cntl; 5788bf215546Sopenharmony_ci 5789bf215546Sopenharmony_ci if (es_type == MESA_SHADER_TESS_EVAL) { 5790bf215546Sopenharmony_ci struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY]; 5791bf215546Sopenharmony_ci 5792bf215546Sopenharmony_ci if (es_enable_prim_id || (gs && gs->info.uses_prim_id)) 5793bf215546Sopenharmony_ci break_wave_at_eoi = true; 5794bf215546Sopenharmony_ci } 5795bf215546Sopenharmony_ci 5796bf215546Sopenharmony_ci bool no_pc_export = outinfo->param_exports == 0 && outinfo->prim_param_exports == 0; 5797bf215546Sopenharmony_ci unsigned num_params = MAX2(outinfo->param_exports, 1); 5798bf215546Sopenharmony_ci unsigned num_prim_params = outinfo->prim_param_exports; 5799bf215546Sopenharmony_ci radeon_set_context_reg( 5800bf215546Sopenharmony_ci ctx_cs, R_0286C4_SPI_VS_OUT_CONFIG, 5801bf215546Sopenharmony_ci S_0286C4_VS_EXPORT_COUNT(num_params - 1) | 5802bf215546Sopenharmony_ci S_0286C4_PRIM_EXPORT_COUNT(num_prim_params) | 5803bf215546Sopenharmony_ci S_0286C4_NO_PC_EXPORT(no_pc_export)); 5804bf215546Sopenharmony_ci 5805bf215546Sopenharmony_ci unsigned idx_format = V_028708_SPI_SHADER_1COMP; 5806bf215546Sopenharmony_ci if (outinfo->writes_layer_per_primitive || 5807bf215546Sopenharmony_ci outinfo->writes_viewport_index_per_primitive || 5808bf215546Sopenharmony_ci outinfo->writes_primitive_shading_rate_per_primitive) 5809bf215546Sopenharmony_ci idx_format = V_028708_SPI_SHADER_2COMP; 5810bf215546Sopenharmony_ci 5811bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028708_SPI_SHADER_IDX_FORMAT, 5812bf215546Sopenharmony_ci S_028708_IDX0_EXPORT_FORMAT(idx_format)); 5813bf215546Sopenharmony_ci radeon_set_context_reg( 5814bf215546Sopenharmony_ci ctx_cs, R_02870C_SPI_SHADER_POS_FORMAT, 5815bf215546Sopenharmony_ci S_02870C_POS0_EXPORT_FORMAT(V_02870C_SPI_SHADER_4COMP) | 5816bf215546Sopenharmony_ci S_02870C_POS1_EXPORT_FORMAT(outinfo->pos_exports > 1 ? V_02870C_SPI_SHADER_4COMP 5817bf215546Sopenharmony_ci : V_02870C_SPI_SHADER_NONE) | 5818bf215546Sopenharmony_ci S_02870C_POS2_EXPORT_FORMAT(outinfo->pos_exports > 2 ? V_02870C_SPI_SHADER_4COMP 5819bf215546Sopenharmony_ci : V_02870C_SPI_SHADER_NONE) | 5820bf215546Sopenharmony_ci S_02870C_POS3_EXPORT_FORMAT(outinfo->pos_exports > 3 ? V_02870C_SPI_SHADER_4COMP 5821bf215546Sopenharmony_ci : V_02870C_SPI_SHADER_NONE)); 5822bf215546Sopenharmony_ci 5823bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_02881C_PA_CL_VS_OUT_CNTL, 5824bf215546Sopenharmony_ci S_02881C_USE_VTX_POINT_SIZE(outinfo->writes_pointsize) | 5825bf215546Sopenharmony_ci S_02881C_USE_VTX_RENDER_TARGET_INDX(outinfo->writes_layer) | 5826bf215546Sopenharmony_ci S_02881C_USE_VTX_VIEWPORT_INDX(outinfo->writes_viewport_index) | 5827bf215546Sopenharmony_ci S_02881C_USE_VTX_VRS_RATE(outinfo->writes_primitive_shading_rate) | 5828bf215546Sopenharmony_ci S_02881C_VS_OUT_MISC_VEC_ENA(misc_vec_ena) | 5829bf215546Sopenharmony_ci S_02881C_VS_OUT_MISC_SIDE_BUS_ENA(misc_vec_ena) | 5830bf215546Sopenharmony_ci S_02881C_VS_OUT_CCDIST0_VEC_ENA((total_mask & 0x0f) != 0) | 5831bf215546Sopenharmony_ci S_02881C_VS_OUT_CCDIST1_VEC_ENA((total_mask & 0xf0) != 0) | 5832bf215546Sopenharmony_ci total_mask << 8 | clip_dist_mask); 5833bf215546Sopenharmony_ci 5834bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028A84_VGT_PRIMITIVEID_EN, 5835bf215546Sopenharmony_ci S_028A84_PRIMITIVEID_EN(es_enable_prim_id) | 5836bf215546Sopenharmony_ci S_028A84_NGG_DISABLE_PROVOK_REUSE(outinfo->export_prim_id)); 5837bf215546Sopenharmony_ci 5838bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 5839bf215546Sopenharmony_ci ngg_state->vgt_esgs_ring_itemsize); 5840bf215546Sopenharmony_ci 5841bf215546Sopenharmony_ci /* NGG specific registers. */ 5842bf215546Sopenharmony_ci struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY]; 5843bf215546Sopenharmony_ci uint32_t gs_num_invocations = gs ? gs->info.gs.invocations : 1; 5844bf215546Sopenharmony_ci 5845bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level < GFX11) { 5846bf215546Sopenharmony_ci radeon_set_context_reg( 5847bf215546Sopenharmony_ci ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, 5848bf215546Sopenharmony_ci S_028A44_ES_VERTS_PER_SUBGRP(ngg_state->hw_max_esverts) | 5849bf215546Sopenharmony_ci S_028A44_GS_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) | 5850bf215546Sopenharmony_ci S_028A44_GS_INST_PRIMS_IN_SUBGRP(ngg_state->max_gsprims * gs_num_invocations)); 5851bf215546Sopenharmony_ci } 5852bf215546Sopenharmony_ci 5853bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_0287FC_GE_MAX_OUTPUT_PER_SUBGROUP, 5854bf215546Sopenharmony_ci S_0287FC_MAX_VERTS_PER_SUBGROUP(ngg_state->max_out_verts)); 5855bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028B4C_GE_NGG_SUBGRP_CNTL, 5856bf215546Sopenharmony_ci S_028B4C_PRIM_AMP_FACTOR(ngg_state->prim_amp_factor) | 5857bf215546Sopenharmony_ci S_028B4C_THDS_PER_SUBGRP(0)); /* for fast launch */ 5858bf215546Sopenharmony_ci radeon_set_context_reg( 5859bf215546Sopenharmony_ci ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, 5860bf215546Sopenharmony_ci S_028B90_CNT(gs_num_invocations) | S_028B90_ENABLE(gs_num_invocations > 1) | 5861bf215546Sopenharmony_ci S_028B90_EN_MAX_VERT_OUT_PER_GS_INSTANCE(ngg_state->max_vert_out_per_gs_instance)); 5862bf215546Sopenharmony_ci 5863bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX11) { 5864bf215546Sopenharmony_ci ge_cntl = S_03096C_PRIMS_PER_SUBGRP(ngg_state->max_gsprims) | 5865bf215546Sopenharmony_ci S_03096C_VERTS_PER_SUBGRP(ngg_state->enable_vertex_grouping 5866bf215546Sopenharmony_ci ? ngg_state->hw_max_esverts 5867bf215546Sopenharmony_ci : 256) | /* 256 = disable vertex grouping */ 5868bf215546Sopenharmony_ci S_03096C_BREAK_PRIMGRP_AT_EOI(break_wave_at_eoi) | 5869bf215546Sopenharmony_ci S_03096C_PRIM_GRP_SIZE_GFX11(256); 5870bf215546Sopenharmony_ci } else { 5871bf215546Sopenharmony_ci ge_cntl = S_03096C_PRIM_GRP_SIZE_GFX10(ngg_state->max_gsprims) | 5872bf215546Sopenharmony_ci S_03096C_VERT_GRP_SIZE(ngg_state->enable_vertex_grouping 5873bf215546Sopenharmony_ci ? ngg_state->hw_max_esverts 5874bf215546Sopenharmony_ci : 256) | /* 256 = disable vertex grouping */ 5875bf215546Sopenharmony_ci S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi); 5876bf215546Sopenharmony_ci } 5877bf215546Sopenharmony_ci 5878bf215546Sopenharmony_ci /* Bug workaround for a possible hang with non-tessellation cases. 5879bf215546Sopenharmony_ci * Tessellation always sets GE_CNTL.VERT_GRP_SIZE = 0 5880bf215546Sopenharmony_ci * 5881bf215546Sopenharmony_ci * Requirement: GE_CNTL.VERT_GRP_SIZE = VGT_GS_ONCHIP_CNTL.ES_VERTS_PER_SUBGRP - 5 5882bf215546Sopenharmony_ci */ 5883bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level == GFX10 && 5884bf215546Sopenharmony_ci !radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) && ngg_state->hw_max_esverts != 256) { 5885bf215546Sopenharmony_ci ge_cntl &= C_03096C_VERT_GRP_SIZE; 5886bf215546Sopenharmony_ci 5887bf215546Sopenharmony_ci if (ngg_state->hw_max_esverts > 5) { 5888bf215546Sopenharmony_ci ge_cntl |= S_03096C_VERT_GRP_SIZE(ngg_state->hw_max_esverts - 5); 5889bf215546Sopenharmony_ci } 5890bf215546Sopenharmony_ci } 5891bf215546Sopenharmony_ci 5892bf215546Sopenharmony_ci radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL, ge_cntl); 5893bf215546Sopenharmony_ci 5894bf215546Sopenharmony_ci unsigned late_alloc_wave64, cu_mask; 5895bf215546Sopenharmony_ci ac_compute_late_alloc(&pdevice->rad_info, true, shader->info.has_ngg_culling, 5896bf215546Sopenharmony_ci shader->config.scratch_bytes_per_wave > 0, &late_alloc_wave64, &cu_mask); 5897bf215546Sopenharmony_ci 5898bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX11) { 5899bf215546Sopenharmony_ci /* TODO: figure out how S_00B204_CU_EN_GFX11 interacts with ac_set_reg_cu_en */ 5900bf215546Sopenharmony_ci gfx10_set_sh_reg_idx3(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 5901bf215546Sopenharmony_ci S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F)); 5902bf215546Sopenharmony_ci gfx10_set_sh_reg_idx3( 5903bf215546Sopenharmony_ci cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 5904bf215546Sopenharmony_ci S_00B204_CU_EN_GFX11(0x1) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64)); 5905bf215546Sopenharmony_ci } else if (pdevice->rad_info.gfx_level >= GFX10) { 5906bf215546Sopenharmony_ci ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 5907bf215546Sopenharmony_ci S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F), 5908bf215546Sopenharmony_ci C_00B21C_CU_EN, 0, &pdevice->rad_info, (void*)gfx10_set_sh_reg_idx3); 5909bf215546Sopenharmony_ci ac_set_reg_cu_en(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 5910bf215546Sopenharmony_ci S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64), 5911bf215546Sopenharmony_ci C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info, 5912bf215546Sopenharmony_ci (void*)gfx10_set_sh_reg_idx3); 5913bf215546Sopenharmony_ci } else { 5914bf215546Sopenharmony_ci radeon_set_sh_reg_idx( 5915bf215546Sopenharmony_ci pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, 5916bf215546Sopenharmony_ci S_00B21C_CU_EN(cu_mask) | S_00B21C_WAVE_LIMIT(0x3F)); 5917bf215546Sopenharmony_ci radeon_set_sh_reg_idx( 5918bf215546Sopenharmony_ci pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, 5919bf215546Sopenharmony_ci S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(late_alloc_wave64)); 5920bf215546Sopenharmony_ci } 5921bf215546Sopenharmony_ci 5922bf215546Sopenharmony_ci uint32_t oversub_pc_lines = late_alloc_wave64 ? pdevice->rad_info.pc_lines / 4 : 0; 5923bf215546Sopenharmony_ci if (shader->info.has_ngg_culling) { 5924bf215546Sopenharmony_ci unsigned oversub_factor = 2; 5925bf215546Sopenharmony_ci 5926bf215546Sopenharmony_ci if (outinfo->param_exports > 4) 5927bf215546Sopenharmony_ci oversub_factor = 4; 5928bf215546Sopenharmony_ci else if (outinfo->param_exports > 2) 5929bf215546Sopenharmony_ci oversub_factor = 3; 5930bf215546Sopenharmony_ci 5931bf215546Sopenharmony_ci oversub_pc_lines *= oversub_factor; 5932bf215546Sopenharmony_ci } 5933bf215546Sopenharmony_ci 5934bf215546Sopenharmony_ci gfx10_emit_ge_pc_alloc(cs, pdevice->rad_info.gfx_level, oversub_pc_lines); 5935bf215546Sopenharmony_ci} 5936bf215546Sopenharmony_ci 5937bf215546Sopenharmony_cistatic void 5938bf215546Sopenharmony_ciradv_pipeline_emit_hw_hs(struct radeon_cmdbuf *cs, const struct radv_graphics_pipeline *pipeline, 5939bf215546Sopenharmony_ci const struct radv_shader *shader) 5940bf215546Sopenharmony_ci{ 5941bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5942bf215546Sopenharmony_ci uint64_t va = radv_shader_get_va(shader); 5943bf215546Sopenharmony_ci 5944bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX9) { 5945bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10) { 5946bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B520_SPI_SHADER_PGM_LO_LS, va >> 8); 5947bf215546Sopenharmony_ci } else { 5948bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B410_SPI_SHADER_PGM_LO_LS, va >> 8); 5949bf215546Sopenharmony_ci } 5950bf215546Sopenharmony_ci 5951bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B428_SPI_SHADER_PGM_RSRC1_HS, 2); 5952bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc1); 5953bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc2); 5954bf215546Sopenharmony_ci } else { 5955bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B420_SPI_SHADER_PGM_LO_HS, 4); 5956bf215546Sopenharmony_ci radeon_emit(cs, va >> 8); 5957bf215546Sopenharmony_ci radeon_emit(cs, S_00B424_MEM_BASE(va >> 40)); 5958bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc1); 5959bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc2); 5960bf215546Sopenharmony_ci } 5961bf215546Sopenharmony_ci} 5962bf215546Sopenharmony_ci 5963bf215546Sopenharmony_cistatic void 5964bf215546Sopenharmony_ciradv_pipeline_emit_vertex_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 5965bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 5966bf215546Sopenharmony_ci{ 5967bf215546Sopenharmony_ci struct radv_shader *vs; 5968bf215546Sopenharmony_ci 5969bf215546Sopenharmony_ci /* Skip shaders merged into HS/GS */ 5970bf215546Sopenharmony_ci vs = pipeline->base.shaders[MESA_SHADER_VERTEX]; 5971bf215546Sopenharmony_ci if (!vs) 5972bf215546Sopenharmony_ci return; 5973bf215546Sopenharmony_ci 5974bf215546Sopenharmony_ci if (vs->info.vs.as_ls) 5975bf215546Sopenharmony_ci radv_pipeline_emit_hw_ls(cs, pipeline, vs); 5976bf215546Sopenharmony_ci else if (vs->info.vs.as_es) 5977bf215546Sopenharmony_ci radv_pipeline_emit_hw_es(cs, pipeline, vs); 5978bf215546Sopenharmony_ci else if (vs->info.is_ngg) 5979bf215546Sopenharmony_ci radv_pipeline_emit_hw_ngg(ctx_cs, cs, pipeline, vs); 5980bf215546Sopenharmony_ci else 5981bf215546Sopenharmony_ci radv_pipeline_emit_hw_vs(ctx_cs, cs, pipeline, vs); 5982bf215546Sopenharmony_ci} 5983bf215546Sopenharmony_ci 5984bf215546Sopenharmony_cistatic void 5985bf215546Sopenharmony_ciradv_pipeline_emit_tess_shaders(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 5986bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 5987bf215546Sopenharmony_ci{ 5988bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 5989bf215546Sopenharmony_ci struct radv_shader *tes, *tcs; 5990bf215546Sopenharmony_ci 5991bf215546Sopenharmony_ci tcs = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]; 5992bf215546Sopenharmony_ci tes = pipeline->base.shaders[MESA_SHADER_TESS_EVAL]; 5993bf215546Sopenharmony_ci 5994bf215546Sopenharmony_ci if (tes) { 5995bf215546Sopenharmony_ci if (tes->info.is_ngg) { 5996bf215546Sopenharmony_ci radv_pipeline_emit_hw_ngg(ctx_cs, cs, pipeline, tes); 5997bf215546Sopenharmony_ci } else if (tes->info.tes.as_es) 5998bf215546Sopenharmony_ci radv_pipeline_emit_hw_es(cs, pipeline, tes); 5999bf215546Sopenharmony_ci else 6000bf215546Sopenharmony_ci radv_pipeline_emit_hw_vs(ctx_cs, cs, pipeline, tes); 6001bf215546Sopenharmony_ci } 6002bf215546Sopenharmony_ci 6003bf215546Sopenharmony_ci radv_pipeline_emit_hw_hs(cs, pipeline, tcs); 6004bf215546Sopenharmony_ci 6005bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10 && 6006bf215546Sopenharmony_ci !radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && !radv_pipeline_has_ngg(pipeline)) { 6007bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, 6008bf215546Sopenharmony_ci S_028A44_ES_VERTS_PER_SUBGRP(250) | S_028A44_GS_PRIMS_PER_SUBGRP(126) | 6009bf215546Sopenharmony_ci S_028A44_GS_INST_PRIMS_IN_SUBGRP(126)); 6010bf215546Sopenharmony_ci } 6011bf215546Sopenharmony_ci} 6012bf215546Sopenharmony_ci 6013bf215546Sopenharmony_cistatic void 6014bf215546Sopenharmony_ciradv_pipeline_emit_tess_state(struct radeon_cmdbuf *ctx_cs, 6015bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, 6016bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 6017bf215546Sopenharmony_ci{ 6018bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 6019bf215546Sopenharmony_ci struct radv_shader *tes = radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL); 6020bf215546Sopenharmony_ci unsigned type = 0, partitioning = 0, topology = 0, distribution_mode = 0; 6021bf215546Sopenharmony_ci unsigned num_tcs_input_cp, num_tcs_output_cp, num_patches; 6022bf215546Sopenharmony_ci unsigned ls_hs_config; 6023bf215546Sopenharmony_ci 6024bf215546Sopenharmony_ci num_tcs_input_cp = info->ts.patch_control_points; 6025bf215546Sopenharmony_ci num_tcs_output_cp = 6026bf215546Sopenharmony_ci pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.tcs.tcs_vertices_out; // TCS VERTICES OUT 6027bf215546Sopenharmony_ci num_patches = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches; 6028bf215546Sopenharmony_ci 6029bf215546Sopenharmony_ci ls_hs_config = S_028B58_NUM_PATCHES(num_patches) | S_028B58_HS_NUM_INPUT_CP(num_tcs_input_cp) | 6030bf215546Sopenharmony_ci S_028B58_HS_NUM_OUTPUT_CP(num_tcs_output_cp); 6031bf215546Sopenharmony_ci 6032bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX7) { 6033bf215546Sopenharmony_ci radeon_set_context_reg_idx(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, 2, ls_hs_config); 6034bf215546Sopenharmony_ci } else { 6035bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028B58_VGT_LS_HS_CONFIG, ls_hs_config); 6036bf215546Sopenharmony_ci } 6037bf215546Sopenharmony_ci 6038bf215546Sopenharmony_ci switch (tes->info.tes._primitive_mode) { 6039bf215546Sopenharmony_ci case TESS_PRIMITIVE_TRIANGLES: 6040bf215546Sopenharmony_ci type = V_028B6C_TESS_TRIANGLE; 6041bf215546Sopenharmony_ci break; 6042bf215546Sopenharmony_ci case TESS_PRIMITIVE_QUADS: 6043bf215546Sopenharmony_ci type = V_028B6C_TESS_QUAD; 6044bf215546Sopenharmony_ci break; 6045bf215546Sopenharmony_ci case TESS_PRIMITIVE_ISOLINES: 6046bf215546Sopenharmony_ci type = V_028B6C_TESS_ISOLINE; 6047bf215546Sopenharmony_ci break; 6048bf215546Sopenharmony_ci default: 6049bf215546Sopenharmony_ci break; 6050bf215546Sopenharmony_ci } 6051bf215546Sopenharmony_ci 6052bf215546Sopenharmony_ci switch (tes->info.tes.spacing) { 6053bf215546Sopenharmony_ci case TESS_SPACING_EQUAL: 6054bf215546Sopenharmony_ci partitioning = V_028B6C_PART_INTEGER; 6055bf215546Sopenharmony_ci break; 6056bf215546Sopenharmony_ci case TESS_SPACING_FRACTIONAL_ODD: 6057bf215546Sopenharmony_ci partitioning = V_028B6C_PART_FRAC_ODD; 6058bf215546Sopenharmony_ci break; 6059bf215546Sopenharmony_ci case TESS_SPACING_FRACTIONAL_EVEN: 6060bf215546Sopenharmony_ci partitioning = V_028B6C_PART_FRAC_EVEN; 6061bf215546Sopenharmony_ci break; 6062bf215546Sopenharmony_ci default: 6063bf215546Sopenharmony_ci break; 6064bf215546Sopenharmony_ci } 6065bf215546Sopenharmony_ci 6066bf215546Sopenharmony_ci bool ccw = tes->info.tes.ccw; 6067bf215546Sopenharmony_ci if (info->ts.domain_origin != VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT) 6068bf215546Sopenharmony_ci ccw = !ccw; 6069bf215546Sopenharmony_ci 6070bf215546Sopenharmony_ci if (tes->info.tes.point_mode) 6071bf215546Sopenharmony_ci topology = V_028B6C_OUTPUT_POINT; 6072bf215546Sopenharmony_ci else if (tes->info.tes._primitive_mode == TESS_PRIMITIVE_ISOLINES) 6073bf215546Sopenharmony_ci topology = V_028B6C_OUTPUT_LINE; 6074bf215546Sopenharmony_ci else if (ccw) 6075bf215546Sopenharmony_ci topology = V_028B6C_OUTPUT_TRIANGLE_CCW; 6076bf215546Sopenharmony_ci else 6077bf215546Sopenharmony_ci topology = V_028B6C_OUTPUT_TRIANGLE_CW; 6078bf215546Sopenharmony_ci 6079bf215546Sopenharmony_ci if (pdevice->rad_info.has_distributed_tess) { 6080bf215546Sopenharmony_ci if (pdevice->rad_info.family == CHIP_FIJI || pdevice->rad_info.family >= CHIP_POLARIS10) 6081bf215546Sopenharmony_ci distribution_mode = V_028B6C_TRAPEZOIDS; 6082bf215546Sopenharmony_ci else 6083bf215546Sopenharmony_ci distribution_mode = V_028B6C_DONUTS; 6084bf215546Sopenharmony_ci } else 6085bf215546Sopenharmony_ci distribution_mode = V_028B6C_NO_DIST; 6086bf215546Sopenharmony_ci 6087bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028B6C_VGT_TF_PARAM, 6088bf215546Sopenharmony_ci S_028B6C_TYPE(type) | S_028B6C_PARTITIONING(partitioning) | 6089bf215546Sopenharmony_ci S_028B6C_TOPOLOGY(topology) | 6090bf215546Sopenharmony_ci S_028B6C_DISTRIBUTION_MODE(distribution_mode)); 6091bf215546Sopenharmony_ci} 6092bf215546Sopenharmony_ci 6093bf215546Sopenharmony_cistatic void 6094bf215546Sopenharmony_ciradv_pipeline_emit_hw_gs(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 6095bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, const struct radv_shader *gs) 6096bf215546Sopenharmony_ci{ 6097bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 6098bf215546Sopenharmony_ci const struct gfx9_gs_info *gs_state = &gs->info.gs_ring_info; 6099bf215546Sopenharmony_ci unsigned gs_max_out_vertices; 6100bf215546Sopenharmony_ci const uint8_t *num_components; 6101bf215546Sopenharmony_ci uint8_t max_stream; 6102bf215546Sopenharmony_ci unsigned offset; 6103bf215546Sopenharmony_ci uint64_t va; 6104bf215546Sopenharmony_ci 6105bf215546Sopenharmony_ci gs_max_out_vertices = gs->info.gs.vertices_out; 6106bf215546Sopenharmony_ci max_stream = gs->info.gs.max_stream; 6107bf215546Sopenharmony_ci num_components = gs->info.gs.num_stream_output_components; 6108bf215546Sopenharmony_ci 6109bf215546Sopenharmony_ci offset = num_components[0] * gs_max_out_vertices; 6110bf215546Sopenharmony_ci 6111bf215546Sopenharmony_ci radeon_set_context_reg_seq(ctx_cs, R_028A60_VGT_GSVS_RING_OFFSET_1, 3); 6112bf215546Sopenharmony_ci radeon_emit(ctx_cs, offset); 6113bf215546Sopenharmony_ci if (max_stream >= 1) 6114bf215546Sopenharmony_ci offset += num_components[1] * gs_max_out_vertices; 6115bf215546Sopenharmony_ci radeon_emit(ctx_cs, offset); 6116bf215546Sopenharmony_ci if (max_stream >= 2) 6117bf215546Sopenharmony_ci offset += num_components[2] * gs_max_out_vertices; 6118bf215546Sopenharmony_ci radeon_emit(ctx_cs, offset); 6119bf215546Sopenharmony_ci if (max_stream >= 3) 6120bf215546Sopenharmony_ci offset += num_components[3] * gs_max_out_vertices; 6121bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028AB0_VGT_GSVS_RING_ITEMSIZE, offset); 6122bf215546Sopenharmony_ci 6123bf215546Sopenharmony_ci radeon_set_context_reg_seq(ctx_cs, R_028B5C_VGT_GS_VERT_ITEMSIZE, 4); 6124bf215546Sopenharmony_ci radeon_emit(ctx_cs, num_components[0]); 6125bf215546Sopenharmony_ci radeon_emit(ctx_cs, (max_stream >= 1) ? num_components[1] : 0); 6126bf215546Sopenharmony_ci radeon_emit(ctx_cs, (max_stream >= 2) ? num_components[2] : 0); 6127bf215546Sopenharmony_ci radeon_emit(ctx_cs, (max_stream >= 3) ? num_components[3] : 0); 6128bf215546Sopenharmony_ci 6129bf215546Sopenharmony_ci uint32_t gs_num_invocations = gs->info.gs.invocations; 6130bf215546Sopenharmony_ci radeon_set_context_reg( 6131bf215546Sopenharmony_ci ctx_cs, R_028B90_VGT_GS_INSTANCE_CNT, 6132bf215546Sopenharmony_ci S_028B90_CNT(MIN2(gs_num_invocations, 127)) | S_028B90_ENABLE(gs_num_invocations > 0)); 6133bf215546Sopenharmony_ci 6134bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028AAC_VGT_ESGS_RING_ITEMSIZE, 6135bf215546Sopenharmony_ci gs_state->vgt_esgs_ring_itemsize); 6136bf215546Sopenharmony_ci 6137bf215546Sopenharmony_ci va = radv_shader_get_va(gs); 6138bf215546Sopenharmony_ci 6139bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX9) { 6140bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10) { 6141bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B320_SPI_SHADER_PGM_LO_ES, va >> 8); 6142bf215546Sopenharmony_ci } else { 6143bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B210_SPI_SHADER_PGM_LO_ES, va >> 8); 6144bf215546Sopenharmony_ci } 6145bf215546Sopenharmony_ci 6146bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B228_SPI_SHADER_PGM_RSRC1_GS, 2); 6147bf215546Sopenharmony_ci radeon_emit(cs, gs->config.rsrc1); 6148bf215546Sopenharmony_ci radeon_emit(cs, gs->config.rsrc2 | S_00B22C_LDS_SIZE(gs_state->lds_size)); 6149bf215546Sopenharmony_ci 6150bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028A44_VGT_GS_ONCHIP_CNTL, gs_state->vgt_gs_onchip_cntl); 6151bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP, 6152bf215546Sopenharmony_ci gs_state->vgt_gs_max_prims_per_subgroup); 6153bf215546Sopenharmony_ci } else { 6154bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B220_SPI_SHADER_PGM_LO_GS, 4); 6155bf215546Sopenharmony_ci radeon_emit(cs, va >> 8); 6156bf215546Sopenharmony_ci radeon_emit(cs, S_00B224_MEM_BASE(va >> 40)); 6157bf215546Sopenharmony_ci radeon_emit(cs, gs->config.rsrc1); 6158bf215546Sopenharmony_ci radeon_emit(cs, gs->config.rsrc2); 6159bf215546Sopenharmony_ci } 6160bf215546Sopenharmony_ci 6161bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10) { 6162bf215546Sopenharmony_ci ac_set_reg_cu_en(cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 6163bf215546Sopenharmony_ci S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F), 6164bf215546Sopenharmony_ci C_00B21C_CU_EN, 0, &pdevice->rad_info, 6165bf215546Sopenharmony_ci (void*)gfx10_set_sh_reg_idx3); 6166bf215546Sopenharmony_ci ac_set_reg_cu_en(cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 6167bf215546Sopenharmony_ci S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0), 6168bf215546Sopenharmony_ci C_00B204_CU_EN_GFX10, 16, &pdevice->rad_info, 6169bf215546Sopenharmony_ci (void*)gfx10_set_sh_reg_idx3); 6170bf215546Sopenharmony_ci } else if (pdevice->rad_info.gfx_level >= GFX7) { 6171bf215546Sopenharmony_ci radeon_set_sh_reg_idx( 6172bf215546Sopenharmony_ci pdevice, cs, R_00B21C_SPI_SHADER_PGM_RSRC3_GS, 3, 6173bf215546Sopenharmony_ci S_00B21C_CU_EN(0xffff) | S_00B21C_WAVE_LIMIT(0x3F)); 6174bf215546Sopenharmony_ci 6175bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10) { 6176bf215546Sopenharmony_ci radeon_set_sh_reg_idx( 6177bf215546Sopenharmony_ci pdevice, cs, R_00B204_SPI_SHADER_PGM_RSRC4_GS, 3, 6178bf215546Sopenharmony_ci S_00B204_CU_EN_GFX10(0xffff) | S_00B204_SPI_SHADER_LATE_ALLOC_GS_GFX10(0)); 6179bf215546Sopenharmony_ci } 6180bf215546Sopenharmony_ci } 6181bf215546Sopenharmony_ci 6182bf215546Sopenharmony_ci radv_pipeline_emit_hw_vs(ctx_cs, cs, pipeline, pipeline->base.gs_copy_shader); 6183bf215546Sopenharmony_ci} 6184bf215546Sopenharmony_ci 6185bf215546Sopenharmony_cistatic void 6186bf215546Sopenharmony_ciradv_pipeline_emit_geometry_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 6187bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 6188bf215546Sopenharmony_ci{ 6189bf215546Sopenharmony_ci struct radv_shader *gs; 6190bf215546Sopenharmony_ci 6191bf215546Sopenharmony_ci gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY]; 6192bf215546Sopenharmony_ci if (!gs) 6193bf215546Sopenharmony_ci return; 6194bf215546Sopenharmony_ci 6195bf215546Sopenharmony_ci if (gs->info.is_ngg) 6196bf215546Sopenharmony_ci radv_pipeline_emit_hw_ngg(ctx_cs, cs, pipeline, gs); 6197bf215546Sopenharmony_ci else 6198bf215546Sopenharmony_ci radv_pipeline_emit_hw_gs(ctx_cs, cs, pipeline, gs); 6199bf215546Sopenharmony_ci 6200bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, gs->info.gs.vertices_out); 6201bf215546Sopenharmony_ci} 6202bf215546Sopenharmony_ci 6203bf215546Sopenharmony_cistatic void 6204bf215546Sopenharmony_ciradv_pipeline_emit_mesh_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 6205bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 6206bf215546Sopenharmony_ci{ 6207bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 6208bf215546Sopenharmony_ci struct radv_shader *ms = pipeline->base.shaders[MESA_SHADER_MESH]; 6209bf215546Sopenharmony_ci if (!ms) 6210bf215546Sopenharmony_ci return; 6211bf215546Sopenharmony_ci 6212bf215546Sopenharmony_ci radv_pipeline_emit_hw_ngg(ctx_cs, cs, pipeline, ms); 6213bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028B38_VGT_GS_MAX_VERT_OUT, ms->info.workgroup_size); 6214bf215546Sopenharmony_ci radeon_set_uconfig_reg_idx(pdevice, ctx_cs, 6215bf215546Sopenharmony_ci R_030908_VGT_PRIMITIVE_TYPE, 1, V_008958_DI_PT_POINTLIST); 6216bf215546Sopenharmony_ci} 6217bf215546Sopenharmony_ci 6218bf215546Sopenharmony_cistatic uint32_t 6219bf215546Sopenharmony_cioffset_to_ps_input(uint32_t offset, bool flat_shade, bool explicit, bool float16) 6220bf215546Sopenharmony_ci{ 6221bf215546Sopenharmony_ci uint32_t ps_input_cntl; 6222bf215546Sopenharmony_ci if (offset <= AC_EXP_PARAM_OFFSET_31) { 6223bf215546Sopenharmony_ci ps_input_cntl = S_028644_OFFSET(offset); 6224bf215546Sopenharmony_ci if (flat_shade || explicit) 6225bf215546Sopenharmony_ci ps_input_cntl |= S_028644_FLAT_SHADE(1); 6226bf215546Sopenharmony_ci if (explicit) { 6227bf215546Sopenharmony_ci /* Force parameter cache to be read in passthrough 6228bf215546Sopenharmony_ci * mode. 6229bf215546Sopenharmony_ci */ 6230bf215546Sopenharmony_ci ps_input_cntl |= S_028644_OFFSET(1 << 5); 6231bf215546Sopenharmony_ci } 6232bf215546Sopenharmony_ci if (float16) { 6233bf215546Sopenharmony_ci ps_input_cntl |= S_028644_FP16_INTERP_MODE(1) | S_028644_ATTR0_VALID(1); 6234bf215546Sopenharmony_ci } 6235bf215546Sopenharmony_ci } else { 6236bf215546Sopenharmony_ci /* The input is a DEFAULT_VAL constant. */ 6237bf215546Sopenharmony_ci assert(offset >= AC_EXP_PARAM_DEFAULT_VAL_0000 && offset <= AC_EXP_PARAM_DEFAULT_VAL_1111); 6238bf215546Sopenharmony_ci offset -= AC_EXP_PARAM_DEFAULT_VAL_0000; 6239bf215546Sopenharmony_ci ps_input_cntl = S_028644_OFFSET(0x20) | S_028644_DEFAULT_VAL(offset); 6240bf215546Sopenharmony_ci } 6241bf215546Sopenharmony_ci return ps_input_cntl; 6242bf215546Sopenharmony_ci} 6243bf215546Sopenharmony_ci 6244bf215546Sopenharmony_cistatic void 6245bf215546Sopenharmony_cisingle_slot_to_ps_input(const struct radv_vs_output_info *outinfo, 6246bf215546Sopenharmony_ci unsigned slot, uint32_t *ps_input_cntl, unsigned *ps_offset, 6247bf215546Sopenharmony_ci bool skip_undef, bool use_default_0, bool flat_shade) 6248bf215546Sopenharmony_ci{ 6249bf215546Sopenharmony_ci unsigned vs_offset = outinfo->vs_output_param_offset[slot]; 6250bf215546Sopenharmony_ci 6251bf215546Sopenharmony_ci if (vs_offset == AC_EXP_PARAM_UNDEFINED) { 6252bf215546Sopenharmony_ci if (skip_undef) 6253bf215546Sopenharmony_ci return; 6254bf215546Sopenharmony_ci else if (use_default_0) 6255bf215546Sopenharmony_ci vs_offset = AC_EXP_PARAM_DEFAULT_VAL_0000; 6256bf215546Sopenharmony_ci else 6257bf215546Sopenharmony_ci unreachable("vs_offset should not be AC_EXP_PARAM_UNDEFINED."); 6258bf215546Sopenharmony_ci } 6259bf215546Sopenharmony_ci 6260bf215546Sopenharmony_ci ps_input_cntl[*ps_offset] = offset_to_ps_input(vs_offset, flat_shade, false, false); 6261bf215546Sopenharmony_ci ++(*ps_offset); 6262bf215546Sopenharmony_ci} 6263bf215546Sopenharmony_ci 6264bf215546Sopenharmony_cistatic void 6265bf215546Sopenharmony_ciinput_mask_to_ps_inputs(const struct radv_vs_output_info *outinfo, const struct radv_shader *ps, 6266bf215546Sopenharmony_ci uint32_t input_mask, uint32_t *ps_input_cntl, unsigned *ps_offset) 6267bf215546Sopenharmony_ci{ 6268bf215546Sopenharmony_ci u_foreach_bit(i, input_mask) { 6269bf215546Sopenharmony_ci unsigned vs_offset = outinfo->vs_output_param_offset[VARYING_SLOT_VAR0 + i]; 6270bf215546Sopenharmony_ci if (vs_offset == AC_EXP_PARAM_UNDEFINED) { 6271bf215546Sopenharmony_ci ps_input_cntl[*ps_offset] = S_028644_OFFSET(0x20); 6272bf215546Sopenharmony_ci ++(*ps_offset); 6273bf215546Sopenharmony_ci continue; 6274bf215546Sopenharmony_ci } 6275bf215546Sopenharmony_ci 6276bf215546Sopenharmony_ci bool flat_shade = !!(ps->info.ps.flat_shaded_mask & (1u << *ps_offset)); 6277bf215546Sopenharmony_ci bool explicit = !!(ps->info.ps.explicit_shaded_mask & (1u << *ps_offset)); 6278bf215546Sopenharmony_ci bool float16 = !!(ps->info.ps.float16_shaded_mask & (1u << *ps_offset)); 6279bf215546Sopenharmony_ci 6280bf215546Sopenharmony_ci ps_input_cntl[*ps_offset] = offset_to_ps_input(vs_offset, flat_shade, explicit, float16); 6281bf215546Sopenharmony_ci ++(*ps_offset); 6282bf215546Sopenharmony_ci } 6283bf215546Sopenharmony_ci} 6284bf215546Sopenharmony_ci 6285bf215546Sopenharmony_cistatic void 6286bf215546Sopenharmony_ciradv_pipeline_emit_ps_inputs(struct radeon_cmdbuf *ctx_cs, 6287bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 6288bf215546Sopenharmony_ci{ 6289bf215546Sopenharmony_ci struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; 6290bf215546Sopenharmony_ci const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); 6291bf215546Sopenharmony_ci bool mesh = radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH); 6292bf215546Sopenharmony_ci uint32_t ps_input_cntl[32]; 6293bf215546Sopenharmony_ci 6294bf215546Sopenharmony_ci unsigned ps_offset = 0; 6295bf215546Sopenharmony_ci 6296bf215546Sopenharmony_ci if (ps->info.ps.prim_id_input && !mesh) 6297bf215546Sopenharmony_ci single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, 6298bf215546Sopenharmony_ci true, false, true); 6299bf215546Sopenharmony_ci 6300bf215546Sopenharmony_ci if (ps->info.ps.layer_input && !mesh) 6301bf215546Sopenharmony_ci single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, 6302bf215546Sopenharmony_ci false, true, true); 6303bf215546Sopenharmony_ci 6304bf215546Sopenharmony_ci if (ps->info.ps.viewport_index_input && !mesh) 6305bf215546Sopenharmony_ci single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, 6306bf215546Sopenharmony_ci false, false, true); 6307bf215546Sopenharmony_ci 6308bf215546Sopenharmony_ci if (ps->info.ps.has_pcoord) 6309bf215546Sopenharmony_ci ps_input_cntl[ps_offset++] = S_028644_PT_SPRITE_TEX(1) | S_028644_OFFSET(0x20); 6310bf215546Sopenharmony_ci 6311bf215546Sopenharmony_ci if (ps->info.ps.num_input_clips_culls) { 6312bf215546Sopenharmony_ci single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST0, ps_input_cntl, &ps_offset, 6313bf215546Sopenharmony_ci true, false, false); 6314bf215546Sopenharmony_ci 6315bf215546Sopenharmony_ci if (ps->info.ps.num_input_clips_culls > 4) 6316bf215546Sopenharmony_ci single_slot_to_ps_input(outinfo, VARYING_SLOT_CLIP_DIST1, ps_input_cntl, &ps_offset, 6317bf215546Sopenharmony_ci true, false, false); 6318bf215546Sopenharmony_ci } 6319bf215546Sopenharmony_ci 6320bf215546Sopenharmony_ci input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_mask, 6321bf215546Sopenharmony_ci ps_input_cntl, &ps_offset); 6322bf215546Sopenharmony_ci 6323bf215546Sopenharmony_ci /* Per-primitive PS inputs: the HW needs these to be last. */ 6324bf215546Sopenharmony_ci 6325bf215546Sopenharmony_ci if (ps->info.ps.prim_id_input && mesh) 6326bf215546Sopenharmony_ci single_slot_to_ps_input(outinfo, VARYING_SLOT_PRIMITIVE_ID, ps_input_cntl, &ps_offset, 6327bf215546Sopenharmony_ci true, false, false); 6328bf215546Sopenharmony_ci 6329bf215546Sopenharmony_ci if (ps->info.ps.layer_input && mesh) 6330bf215546Sopenharmony_ci single_slot_to_ps_input(outinfo, VARYING_SLOT_LAYER, ps_input_cntl, &ps_offset, 6331bf215546Sopenharmony_ci false, true, false); 6332bf215546Sopenharmony_ci 6333bf215546Sopenharmony_ci if (ps->info.ps.viewport_index_input && mesh) 6334bf215546Sopenharmony_ci single_slot_to_ps_input(outinfo, VARYING_SLOT_VIEWPORT, ps_input_cntl, &ps_offset, 6335bf215546Sopenharmony_ci false, false, false); 6336bf215546Sopenharmony_ci 6337bf215546Sopenharmony_ci input_mask_to_ps_inputs(outinfo, ps, ps->info.ps.input_per_primitive_mask, 6338bf215546Sopenharmony_ci ps_input_cntl, &ps_offset); 6339bf215546Sopenharmony_ci 6340bf215546Sopenharmony_ci if (ps_offset) { 6341bf215546Sopenharmony_ci radeon_set_context_reg_seq(ctx_cs, R_028644_SPI_PS_INPUT_CNTL_0, ps_offset); 6342bf215546Sopenharmony_ci for (unsigned i = 0; i < ps_offset; i++) { 6343bf215546Sopenharmony_ci radeon_emit(ctx_cs, ps_input_cntl[i]); 6344bf215546Sopenharmony_ci } 6345bf215546Sopenharmony_ci } 6346bf215546Sopenharmony_ci} 6347bf215546Sopenharmony_ci 6348bf215546Sopenharmony_cistatic uint32_t 6349bf215546Sopenharmony_ciradv_compute_db_shader_control(const struct radv_physical_device *pdevice, 6350bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, 6351bf215546Sopenharmony_ci const struct radv_shader *ps) 6352bf215546Sopenharmony_ci{ 6353bf215546Sopenharmony_ci unsigned conservative_z_export = V_02880C_EXPORT_ANY_Z; 6354bf215546Sopenharmony_ci unsigned z_order; 6355bf215546Sopenharmony_ci if (ps->info.ps.early_fragment_test || !ps->info.ps.writes_memory) 6356bf215546Sopenharmony_ci z_order = V_02880C_EARLY_Z_THEN_LATE_Z; 6357bf215546Sopenharmony_ci else 6358bf215546Sopenharmony_ci z_order = V_02880C_LATE_Z; 6359bf215546Sopenharmony_ci 6360bf215546Sopenharmony_ci if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_GREATER) 6361bf215546Sopenharmony_ci conservative_z_export = V_02880C_EXPORT_GREATER_THAN_Z; 6362bf215546Sopenharmony_ci else if (ps->info.ps.depth_layout == FRAG_DEPTH_LAYOUT_LESS) 6363bf215546Sopenharmony_ci conservative_z_export = V_02880C_EXPORT_LESS_THAN_Z; 6364bf215546Sopenharmony_ci 6365bf215546Sopenharmony_ci bool disable_rbplus = pdevice->rad_info.has_rbplus && !pdevice->rad_info.rbplus_allowed; 6366bf215546Sopenharmony_ci 6367bf215546Sopenharmony_ci /* It shouldn't be needed to export gl_SampleMask when MSAA is disabled 6368bf215546Sopenharmony_ci * but this appears to break Project Cars (DXVK). See 6369bf215546Sopenharmony_ci * https://bugs.freedesktop.org/show_bug.cgi?id=109401 6370bf215546Sopenharmony_ci */ 6371bf215546Sopenharmony_ci bool mask_export_enable = ps->info.ps.writes_sample_mask; 6372bf215546Sopenharmony_ci 6373bf215546Sopenharmony_ci return S_02880C_Z_EXPORT_ENABLE(ps->info.ps.writes_z) | 6374bf215546Sopenharmony_ci S_02880C_STENCIL_TEST_VAL_EXPORT_ENABLE(ps->info.ps.writes_stencil) | 6375bf215546Sopenharmony_ci S_02880C_KILL_ENABLE(!!ps->info.ps.can_discard) | 6376bf215546Sopenharmony_ci S_02880C_MASK_EXPORT_ENABLE(mask_export_enable) | 6377bf215546Sopenharmony_ci S_02880C_CONSERVATIVE_Z_EXPORT(conservative_z_export) | S_02880C_Z_ORDER(z_order) | 6378bf215546Sopenharmony_ci S_02880C_DEPTH_BEFORE_SHADER(ps->info.ps.early_fragment_test) | 6379bf215546Sopenharmony_ci S_02880C_PRE_SHADER_DEPTH_COVERAGE_ENABLE(ps->info.ps.post_depth_coverage) | 6380bf215546Sopenharmony_ci S_02880C_EXEC_ON_HIER_FAIL(ps->info.ps.writes_memory) | 6381bf215546Sopenharmony_ci S_02880C_EXEC_ON_NOOP(ps->info.ps.writes_memory) | 6382bf215546Sopenharmony_ci S_02880C_DUAL_QUAD_DISABLE(disable_rbplus); 6383bf215546Sopenharmony_ci} 6384bf215546Sopenharmony_ci 6385bf215546Sopenharmony_cistatic void 6386bf215546Sopenharmony_ciradv_pipeline_emit_fragment_shader(struct radeon_cmdbuf *ctx_cs, struct radeon_cmdbuf *cs, 6387bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 6388bf215546Sopenharmony_ci{ 6389bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 6390bf215546Sopenharmony_ci struct radv_shader *ps; 6391bf215546Sopenharmony_ci bool param_gen; 6392bf215546Sopenharmony_ci uint64_t va; 6393bf215546Sopenharmony_ci assert(pipeline->base.shaders[MESA_SHADER_FRAGMENT]); 6394bf215546Sopenharmony_ci 6395bf215546Sopenharmony_ci ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; 6396bf215546Sopenharmony_ci va = radv_shader_get_va(ps); 6397bf215546Sopenharmony_ci 6398bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B020_SPI_SHADER_PGM_LO_PS, 4); 6399bf215546Sopenharmony_ci radeon_emit(cs, va >> 8); 6400bf215546Sopenharmony_ci radeon_emit(cs, S_00B024_MEM_BASE(va >> 40)); 6401bf215546Sopenharmony_ci radeon_emit(cs, ps->config.rsrc1); 6402bf215546Sopenharmony_ci radeon_emit(cs, ps->config.rsrc2); 6403bf215546Sopenharmony_ci 6404bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_02880C_DB_SHADER_CONTROL, 6405bf215546Sopenharmony_ci radv_compute_db_shader_control(pdevice, pipeline, ps)); 6406bf215546Sopenharmony_ci 6407bf215546Sopenharmony_ci radeon_set_context_reg_seq(ctx_cs, R_0286CC_SPI_PS_INPUT_ENA, 2); 6408bf215546Sopenharmony_ci radeon_emit(ctx_cs, ps->config.spi_ps_input_ena); 6409bf215546Sopenharmony_ci radeon_emit(ctx_cs, ps->config.spi_ps_input_addr); 6410bf215546Sopenharmony_ci 6411bf215546Sopenharmony_ci /* Workaround when there are no PS inputs but LDS is used. */ 6412bf215546Sopenharmony_ci param_gen = pdevice->rad_info.gfx_level >= GFX11 && 6413bf215546Sopenharmony_ci !ps->info.ps.num_interp && ps->config.lds_size; 6414bf215546Sopenharmony_ci 6415bf215546Sopenharmony_ci radeon_set_context_reg( 6416bf215546Sopenharmony_ci ctx_cs, R_0286D8_SPI_PS_IN_CONTROL, 6417bf215546Sopenharmony_ci S_0286D8_NUM_INTERP(ps->info.ps.num_interp) | 6418bf215546Sopenharmony_ci S_0286D8_NUM_PRIM_INTERP(ps->info.ps.num_prim_interp) | 6419bf215546Sopenharmony_ci S_0286D8_PS_W32_EN(ps->info.wave_size == 32) | 6420bf215546Sopenharmony_ci S_0286D8_PARAM_GEN(param_gen)); 6421bf215546Sopenharmony_ci 6422bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_0286E0_SPI_BARYC_CNTL, pipeline->spi_baryc_cntl); 6423bf215546Sopenharmony_ci 6424bf215546Sopenharmony_ci radeon_set_context_reg( 6425bf215546Sopenharmony_ci ctx_cs, R_028710_SPI_SHADER_Z_FORMAT, 6426bf215546Sopenharmony_ci ac_get_spi_shader_z_format(ps->info.ps.writes_z, ps->info.ps.writes_stencil, 6427bf215546Sopenharmony_ci ps->info.ps.writes_sample_mask, false)); 6428bf215546Sopenharmony_ci} 6429bf215546Sopenharmony_ci 6430bf215546Sopenharmony_cistatic void 6431bf215546Sopenharmony_ciradv_pipeline_emit_vgt_vertex_reuse(struct radeon_cmdbuf *ctx_cs, 6432bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 6433bf215546Sopenharmony_ci{ 6434bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 6435bf215546Sopenharmony_ci 6436bf215546Sopenharmony_ci if (pdevice->rad_info.family < CHIP_POLARIS10 || pdevice->rad_info.gfx_level >= GFX10) 6437bf215546Sopenharmony_ci return; 6438bf215546Sopenharmony_ci 6439bf215546Sopenharmony_ci unsigned vtx_reuse_depth = 30; 6440bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL) && 6441bf215546Sopenharmony_ci radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL)->info.tes.spacing == 6442bf215546Sopenharmony_ci TESS_SPACING_FRACTIONAL_ODD) { 6443bf215546Sopenharmony_ci vtx_reuse_depth = 14; 6444bf215546Sopenharmony_ci } 6445bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 6446bf215546Sopenharmony_ci S_028C58_VTX_REUSE_DEPTH(vtx_reuse_depth)); 6447bf215546Sopenharmony_ci} 6448bf215546Sopenharmony_ci 6449bf215546Sopenharmony_cistatic void 6450bf215546Sopenharmony_ciradv_pipeline_emit_vgt_shader_config(struct radeon_cmdbuf *ctx_cs, 6451bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 6452bf215546Sopenharmony_ci{ 6453bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 6454bf215546Sopenharmony_ci uint32_t stages = 0; 6455bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { 6456bf215546Sopenharmony_ci stages |= S_028B54_LS_EN(V_028B54_LS_STAGE_ON) | S_028B54_HS_EN(1) | S_028B54_DYNAMIC_HS(1); 6457bf215546Sopenharmony_ci 6458bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) 6459bf215546Sopenharmony_ci stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS) | S_028B54_GS_EN(1); 6460bf215546Sopenharmony_ci else if (radv_pipeline_has_ngg(pipeline)) 6461bf215546Sopenharmony_ci stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_DS); 6462bf215546Sopenharmony_ci else 6463bf215546Sopenharmony_ci stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_DS); 6464bf215546Sopenharmony_ci } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 6465bf215546Sopenharmony_ci stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL) | S_028B54_GS_EN(1); 6466bf215546Sopenharmony_ci } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) { 6467bf215546Sopenharmony_ci assert(!radv_pipeline_has_ngg_passthrough(pipeline)); 6468bf215546Sopenharmony_ci stages |= S_028B54_GS_EN(1) | S_028B54_GS_FAST_LAUNCH(1); 6469bf215546Sopenharmony_ci 6470bf215546Sopenharmony_ci if (pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring) 6471bf215546Sopenharmony_ci stages |= S_028B54_NGG_WAVE_ID_EN(1); 6472bf215546Sopenharmony_ci } else if (radv_pipeline_has_ngg(pipeline)) { 6473bf215546Sopenharmony_ci stages |= S_028B54_ES_EN(V_028B54_ES_STAGE_REAL); 6474bf215546Sopenharmony_ci } 6475bf215546Sopenharmony_ci 6476bf215546Sopenharmony_ci if (radv_pipeline_has_ngg(pipeline)) { 6477bf215546Sopenharmony_ci stages |= S_028B54_PRIMGEN_EN(1); 6478bf215546Sopenharmony_ci if (pipeline->streamout_shader) 6479bf215546Sopenharmony_ci stages |= S_028B54_NGG_WAVE_ID_EN(1); 6480bf215546Sopenharmony_ci if (radv_pipeline_has_ngg_passthrough(pipeline)) { 6481bf215546Sopenharmony_ci stages |= S_028B54_PRIMGEN_PASSTHRU_EN(1); 6482bf215546Sopenharmony_ci if (pdevice->rad_info.family >= CHIP_NAVI23) 6483bf215546Sopenharmony_ci stages |= S_028B54_PRIMGEN_PASSTHRU_NO_MSG(1); 6484bf215546Sopenharmony_ci } 6485bf215546Sopenharmony_ci } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 6486bf215546Sopenharmony_ci stages |= S_028B54_VS_EN(V_028B54_VS_STAGE_COPY_SHADER); 6487bf215546Sopenharmony_ci } 6488bf215546Sopenharmony_ci 6489bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX9) 6490bf215546Sopenharmony_ci stages |= S_028B54_MAX_PRIMGRP_IN_WAVE(2); 6491bf215546Sopenharmony_ci 6492bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10) { 6493bf215546Sopenharmony_ci uint8_t hs_size = 64, gs_size = 64, vs_size = 64; 6494bf215546Sopenharmony_ci 6495bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) 6496bf215546Sopenharmony_ci hs_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.wave_size; 6497bf215546Sopenharmony_ci 6498bf215546Sopenharmony_ci if (pipeline->base.shaders[MESA_SHADER_GEOMETRY]) { 6499bf215546Sopenharmony_ci vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.wave_size; 6500bf215546Sopenharmony_ci if (radv_pipeline_has_gs_copy_shader(&pipeline->base)) 6501bf215546Sopenharmony_ci vs_size = pipeline->base.gs_copy_shader->info.wave_size; 6502bf215546Sopenharmony_ci } else if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]) 6503bf215546Sopenharmony_ci vs_size = pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.wave_size; 6504bf215546Sopenharmony_ci else if (pipeline->base.shaders[MESA_SHADER_VERTEX]) 6505bf215546Sopenharmony_ci vs_size = pipeline->base.shaders[MESA_SHADER_VERTEX]->info.wave_size; 6506bf215546Sopenharmony_ci else if (pipeline->base.shaders[MESA_SHADER_MESH]) 6507bf215546Sopenharmony_ci vs_size = gs_size = pipeline->base.shaders[MESA_SHADER_MESH]->info.wave_size; 6508bf215546Sopenharmony_ci 6509bf215546Sopenharmony_ci if (radv_pipeline_has_ngg(pipeline)) { 6510bf215546Sopenharmony_ci assert(!radv_pipeline_has_gs_copy_shader(&pipeline->base)); 6511bf215546Sopenharmony_ci gs_size = vs_size; 6512bf215546Sopenharmony_ci } 6513bf215546Sopenharmony_ci 6514bf215546Sopenharmony_ci /* legacy GS only supports Wave64 */ 6515bf215546Sopenharmony_ci stages |= S_028B54_HS_W32_EN(hs_size == 32 ? 1 : 0) | 6516bf215546Sopenharmony_ci S_028B54_GS_W32_EN(gs_size == 32 ? 1 : 0) | 6517bf215546Sopenharmony_ci S_028B54_VS_W32_EN(vs_size == 32 ? 1 : 0); 6518bf215546Sopenharmony_ci } 6519bf215546Sopenharmony_ci 6520bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028B54_VGT_SHADER_STAGES_EN, stages); 6521bf215546Sopenharmony_ci} 6522bf215546Sopenharmony_ci 6523bf215546Sopenharmony_cistatic void 6524bf215546Sopenharmony_ciradv_pipeline_emit_cliprect_rule(struct radeon_cmdbuf *ctx_cs, 6525bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 6526bf215546Sopenharmony_ci{ 6527bf215546Sopenharmony_ci uint32_t cliprect_rule = 0; 6528bf215546Sopenharmony_ci 6529bf215546Sopenharmony_ci if (!info->dr.count) { 6530bf215546Sopenharmony_ci cliprect_rule = 0xffff; 6531bf215546Sopenharmony_ci } else { 6532bf215546Sopenharmony_ci for (unsigned i = 0; i < (1u << MAX_DISCARD_RECTANGLES); ++i) { 6533bf215546Sopenharmony_ci /* Interpret i as a bitmask, and then set the bit in 6534bf215546Sopenharmony_ci * the mask if that combination of rectangles in which 6535bf215546Sopenharmony_ci * the pixel is contained should pass the cliprect 6536bf215546Sopenharmony_ci * test. 6537bf215546Sopenharmony_ci */ 6538bf215546Sopenharmony_ci unsigned relevant_subset = i & ((1u << info->dr.count) - 1); 6539bf215546Sopenharmony_ci 6540bf215546Sopenharmony_ci if (info->dr.mode == VK_DISCARD_RECTANGLE_MODE_INCLUSIVE_EXT && !relevant_subset) 6541bf215546Sopenharmony_ci continue; 6542bf215546Sopenharmony_ci 6543bf215546Sopenharmony_ci if (info->dr.mode == VK_DISCARD_RECTANGLE_MODE_EXCLUSIVE_EXT && relevant_subset) 6544bf215546Sopenharmony_ci continue; 6545bf215546Sopenharmony_ci 6546bf215546Sopenharmony_ci cliprect_rule |= 1u << i; 6547bf215546Sopenharmony_ci } 6548bf215546Sopenharmony_ci } 6549bf215546Sopenharmony_ci 6550bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_02820C_PA_SC_CLIPRECT_RULE, cliprect_rule); 6551bf215546Sopenharmony_ci} 6552bf215546Sopenharmony_ci 6553bf215546Sopenharmony_cistatic void 6554bf215546Sopenharmony_cigfx10_pipeline_emit_ge_cntl(struct radeon_cmdbuf *ctx_cs, 6555bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline) 6556bf215546Sopenharmony_ci{ 6557bf215546Sopenharmony_ci bool break_wave_at_eoi = false; 6558bf215546Sopenharmony_ci unsigned primgroup_size; 6559bf215546Sopenharmony_ci unsigned vertgroup_size = 256; /* 256 = disable vertex grouping */ 6560bf215546Sopenharmony_ci 6561bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { 6562bf215546Sopenharmony_ci primgroup_size = pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.num_tess_patches; 6563bf215546Sopenharmony_ci } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 6564bf215546Sopenharmony_ci const struct gfx9_gs_info *gs_state = 6565bf215546Sopenharmony_ci &pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs_ring_info; 6566bf215546Sopenharmony_ci unsigned vgt_gs_onchip_cntl = gs_state->vgt_gs_onchip_cntl; 6567bf215546Sopenharmony_ci primgroup_size = G_028A44_GS_PRIMS_PER_SUBGRP(vgt_gs_onchip_cntl); 6568bf215546Sopenharmony_ci } else { 6569bf215546Sopenharmony_ci primgroup_size = 128; /* recommended without a GS and tess */ 6570bf215546Sopenharmony_ci } 6571bf215546Sopenharmony_ci 6572bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { 6573bf215546Sopenharmony_ci if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]->info.uses_prim_id || 6574bf215546Sopenharmony_ci radv_get_shader(&pipeline->base, MESA_SHADER_TESS_EVAL)->info.uses_prim_id) 6575bf215546Sopenharmony_ci break_wave_at_eoi = true; 6576bf215546Sopenharmony_ci } 6577bf215546Sopenharmony_ci 6578bf215546Sopenharmony_ci radeon_set_uconfig_reg(ctx_cs, R_03096C_GE_CNTL, 6579bf215546Sopenharmony_ci S_03096C_PRIM_GRP_SIZE_GFX10(primgroup_size) | 6580bf215546Sopenharmony_ci S_03096C_VERT_GRP_SIZE(vertgroup_size) | 6581bf215546Sopenharmony_ci S_03096C_PACKET_TO_ONE_PA(0) /* line stipple */ | 6582bf215546Sopenharmony_ci S_03096C_BREAK_WAVE_AT_EOI(break_wave_at_eoi)); 6583bf215546Sopenharmony_ci} 6584bf215546Sopenharmony_ci 6585bf215546Sopenharmony_cistatic void 6586bf215546Sopenharmony_ciradv_pipeline_emit_vgt_gs_out(struct radeon_cmdbuf *ctx_cs, 6587bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, 6588bf215546Sopenharmony_ci uint32_t vgt_gs_out_prim_type) 6589bf215546Sopenharmony_ci{ 6590bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 6591bf215546Sopenharmony_ci 6592bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX11) { 6593bf215546Sopenharmony_ci radeon_set_uconfig_reg(ctx_cs, R_030998_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type); 6594bf215546Sopenharmony_ci } else { 6595bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028A6C_VGT_GS_OUT_PRIM_TYPE, vgt_gs_out_prim_type); 6596bf215546Sopenharmony_ci } 6597bf215546Sopenharmony_ci} 6598bf215546Sopenharmony_ci 6599bf215546Sopenharmony_cistatic void 6600bf215546Sopenharmony_cigfx103_pipeline_emit_vgt_draw_payload_cntl(struct radeon_cmdbuf *ctx_cs, 6601bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, 6602bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 6603bf215546Sopenharmony_ci{ 6604bf215546Sopenharmony_ci const struct radv_vs_output_info *outinfo = get_vs_output_info(pipeline); 6605bf215546Sopenharmony_ci 6606bf215546Sopenharmony_ci bool enable_vrs = radv_is_vrs_enabled(pipeline, info); 6607bf215546Sopenharmony_ci 6608bf215546Sopenharmony_ci /* Enables the second channel of the primitive export instruction. 6609bf215546Sopenharmony_ci * This channel contains: VRS rate x, y, viewport and layer. 6610bf215546Sopenharmony_ci */ 6611bf215546Sopenharmony_ci bool enable_prim_payload = 6612bf215546Sopenharmony_ci outinfo && 6613bf215546Sopenharmony_ci (outinfo->writes_viewport_index_per_primitive || 6614bf215546Sopenharmony_ci outinfo->writes_layer_per_primitive || 6615bf215546Sopenharmony_ci outinfo->writes_primitive_shading_rate_per_primitive); 6616bf215546Sopenharmony_ci 6617bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 6618bf215546Sopenharmony_ci S_028A98_EN_VRS_RATE(enable_vrs) | 6619bf215546Sopenharmony_ci S_028A98_EN_PRIM_PAYLOAD(enable_prim_payload)); 6620bf215546Sopenharmony_ci} 6621bf215546Sopenharmony_ci 6622bf215546Sopenharmony_cistatic bool 6623bf215546Sopenharmony_cigfx103_pipeline_vrs_coarse_shading(const struct radv_graphics_pipeline *pipeline) 6624bf215546Sopenharmony_ci{ 6625bf215546Sopenharmony_ci struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; 6626bf215546Sopenharmony_ci struct radv_device *device = pipeline->base.device; 6627bf215546Sopenharmony_ci 6628bf215546Sopenharmony_ci if (device->instance->debug_flags & RADV_DEBUG_NO_VRS_FLAT_SHADING) 6629bf215546Sopenharmony_ci return false; 6630bf215546Sopenharmony_ci 6631bf215546Sopenharmony_ci if (!ps->info.ps.allow_flat_shading) 6632bf215546Sopenharmony_ci return false; 6633bf215546Sopenharmony_ci 6634bf215546Sopenharmony_ci return true; 6635bf215546Sopenharmony_ci} 6636bf215546Sopenharmony_ci 6637bf215546Sopenharmony_cistatic void 6638bf215546Sopenharmony_cigfx103_pipeline_emit_vrs_state(struct radeon_cmdbuf *ctx_cs, 6639bf215546Sopenharmony_ci const struct radv_graphics_pipeline *pipeline, 6640bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 6641bf215546Sopenharmony_ci{ 6642bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 6643bf215546Sopenharmony_ci uint32_t mode = V_028064_VRS_COMB_MODE_PASSTHRU; 6644bf215546Sopenharmony_ci uint8_t rate_x = 0, rate_y = 0; 6645bf215546Sopenharmony_ci bool enable_vrs = radv_is_vrs_enabled(pipeline, info); 6646bf215546Sopenharmony_ci 6647bf215546Sopenharmony_ci if (!enable_vrs && gfx103_pipeline_vrs_coarse_shading(pipeline)) { 6648bf215546Sopenharmony_ci /* When per-draw VRS is not enabled at all, try enabling VRS coarse shading 2x2 if the driver 6649bf215546Sopenharmony_ci * determined that it's safe to enable. 6650bf215546Sopenharmony_ci */ 6651bf215546Sopenharmony_ci mode = V_028064_VRS_COMB_MODE_OVERRIDE; 6652bf215546Sopenharmony_ci rate_x = rate_y = 1; 6653bf215546Sopenharmony_ci } else if (!radv_is_static_vrs_enabled(pipeline, info) && pipeline->force_vrs_per_vertex && 6654bf215546Sopenharmony_ci get_vs_output_info(pipeline)->writes_primitive_shading_rate) { 6655bf215546Sopenharmony_ci /* Otherwise, if per-draw VRS is not enabled statically, try forcing per-vertex VRS if 6656bf215546Sopenharmony_ci * requested by the user. Note that vkd3d-proton always has to declare VRS as dynamic because 6657bf215546Sopenharmony_ci * in DX12 it's fully dynamic. 6658bf215546Sopenharmony_ci */ 6659bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028848_PA_CL_VRS_CNTL, 6660bf215546Sopenharmony_ci S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) | 6661bf215546Sopenharmony_ci S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE)); 6662bf215546Sopenharmony_ci 6663bf215546Sopenharmony_ci /* If the shader is using discard, turn off coarse shading because discard at 2x2 pixel 6664bf215546Sopenharmony_ci * granularity degrades quality too much. MIN allows sample shading but not coarse shading. 6665bf215546Sopenharmony_ci */ 6666bf215546Sopenharmony_ci struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; 6667bf215546Sopenharmony_ci 6668bf215546Sopenharmony_ci mode = ps->info.ps.can_discard ? V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU; 6669bf215546Sopenharmony_ci } 6670bf215546Sopenharmony_ci 6671bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX11) { 6672bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, 6673bf215546Sopenharmony_ci S_0283D0_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | 6674bf215546Sopenharmony_ci S_0283D0_VRS_RATE((rate_x << 2) | rate_y)); 6675bf215546Sopenharmony_ci } else { 6676bf215546Sopenharmony_ci radeon_set_context_reg(ctx_cs, R_028064_DB_VRS_OVERRIDE_CNTL, 6677bf215546Sopenharmony_ci S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | 6678bf215546Sopenharmony_ci S_028064_VRS_OVERRIDE_RATE_X(rate_x) | 6679bf215546Sopenharmony_ci S_028064_VRS_OVERRIDE_RATE_Y(rate_y)); 6680bf215546Sopenharmony_ci } 6681bf215546Sopenharmony_ci} 6682bf215546Sopenharmony_ci 6683bf215546Sopenharmony_cistatic void 6684bf215546Sopenharmony_ciradv_pipeline_emit_pm4(struct radv_graphics_pipeline *pipeline, 6685bf215546Sopenharmony_ci const struct radv_blend_state *blend, 6686bf215546Sopenharmony_ci const struct radv_depth_stencil_state *ds_state, 6687bf215546Sopenharmony_ci uint32_t vgt_gs_out_prim_type, 6688bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 6689bf215546Sopenharmony_ci{ 6690bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 6691bf215546Sopenharmony_ci struct radeon_cmdbuf *ctx_cs = &pipeline->base.ctx_cs; 6692bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &pipeline->base.cs; 6693bf215546Sopenharmony_ci 6694bf215546Sopenharmony_ci cs->max_dw = 64; 6695bf215546Sopenharmony_ci ctx_cs->max_dw = 256; 6696bf215546Sopenharmony_ci cs->buf = malloc(4 * (cs->max_dw + ctx_cs->max_dw)); 6697bf215546Sopenharmony_ci ctx_cs->buf = cs->buf + cs->max_dw; 6698bf215546Sopenharmony_ci 6699bf215546Sopenharmony_ci radv_pipeline_emit_depth_stencil_state(ctx_cs, ds_state); 6700bf215546Sopenharmony_ci radv_pipeline_emit_blend_state(ctx_cs, pipeline, blend); 6701bf215546Sopenharmony_ci radv_pipeline_emit_raster_state(ctx_cs, pipeline, info); 6702bf215546Sopenharmony_ci radv_pipeline_emit_multisample_state(ctx_cs, pipeline); 6703bf215546Sopenharmony_ci radv_pipeline_emit_vgt_gs_mode(ctx_cs, pipeline); 6704bf215546Sopenharmony_ci radv_pipeline_emit_vertex_shader(ctx_cs, cs, pipeline); 6705bf215546Sopenharmony_ci radv_pipeline_emit_mesh_shader(ctx_cs, cs, pipeline); 6706bf215546Sopenharmony_ci 6707bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { 6708bf215546Sopenharmony_ci radv_pipeline_emit_tess_shaders(ctx_cs, cs, pipeline); 6709bf215546Sopenharmony_ci radv_pipeline_emit_tess_state(ctx_cs, pipeline, info); 6710bf215546Sopenharmony_ci } 6711bf215546Sopenharmony_ci 6712bf215546Sopenharmony_ci radv_pipeline_emit_geometry_shader(ctx_cs, cs, pipeline); 6713bf215546Sopenharmony_ci radv_pipeline_emit_fragment_shader(ctx_cs, cs, pipeline); 6714bf215546Sopenharmony_ci radv_pipeline_emit_ps_inputs(ctx_cs, pipeline); 6715bf215546Sopenharmony_ci radv_pipeline_emit_vgt_vertex_reuse(ctx_cs, pipeline); 6716bf215546Sopenharmony_ci radv_pipeline_emit_vgt_shader_config(ctx_cs, pipeline); 6717bf215546Sopenharmony_ci radv_pipeline_emit_cliprect_rule(ctx_cs, info); 6718bf215546Sopenharmony_ci radv_pipeline_emit_vgt_gs_out(ctx_cs, pipeline, vgt_gs_out_prim_type); 6719bf215546Sopenharmony_ci 6720bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10 && !radv_pipeline_has_ngg(pipeline)) 6721bf215546Sopenharmony_ci gfx10_pipeline_emit_ge_cntl(ctx_cs, pipeline); 6722bf215546Sopenharmony_ci 6723bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10_3) { 6724bf215546Sopenharmony_ci gfx103_pipeline_emit_vgt_draw_payload_cntl(ctx_cs, pipeline, info); 6725bf215546Sopenharmony_ci gfx103_pipeline_emit_vrs_state(ctx_cs, pipeline, info); 6726bf215546Sopenharmony_ci } 6727bf215546Sopenharmony_ci 6728bf215546Sopenharmony_ci pipeline->base.ctx_cs_hash = _mesa_hash_data(ctx_cs->buf, ctx_cs->cdw * 4); 6729bf215546Sopenharmony_ci 6730bf215546Sopenharmony_ci assert(ctx_cs->cdw <= ctx_cs->max_dw); 6731bf215546Sopenharmony_ci assert(cs->cdw <= cs->max_dw); 6732bf215546Sopenharmony_ci} 6733bf215546Sopenharmony_ci 6734bf215546Sopenharmony_cistatic void 6735bf215546Sopenharmony_ciradv_pipeline_init_vertex_input_state(struct radv_graphics_pipeline *pipeline, 6736bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 6737bf215546Sopenharmony_ci{ 6738bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 6739bf215546Sopenharmony_ci const struct radv_shader_info *vs_info = &radv_get_shader(&pipeline->base, MESA_SHADER_VERTEX)->info; 6740bf215546Sopenharmony_ci 6741bf215546Sopenharmony_ci for (uint32_t i = 0; i < MAX_VERTEX_ATTRIBS; i++) { 6742bf215546Sopenharmony_ci pipeline->attrib_ends[i] = info->vi.attrib_ends[i]; 6743bf215546Sopenharmony_ci pipeline->attrib_index_offset[i] = info->vi.attrib_index_offset[i]; 6744bf215546Sopenharmony_ci pipeline->attrib_bindings[i] = info->vi.attrib_bindings[i]; 6745bf215546Sopenharmony_ci } 6746bf215546Sopenharmony_ci 6747bf215546Sopenharmony_ci for (uint32_t i = 0; i < MAX_VBS; i++) { 6748bf215546Sopenharmony_ci pipeline->binding_stride[i] = info->vi.binding_stride[i]; 6749bf215546Sopenharmony_ci } 6750bf215546Sopenharmony_ci 6751bf215546Sopenharmony_ci pipeline->use_per_attribute_vb_descs = vs_info->vs.use_per_attribute_vb_descs; 6752bf215546Sopenharmony_ci pipeline->last_vertex_attrib_bit = util_last_bit(vs_info->vs.vb_desc_usage_mask); 6753bf215546Sopenharmony_ci if (pipeline->base.shaders[MESA_SHADER_VERTEX]) 6754bf215546Sopenharmony_ci pipeline->next_vertex_stage = MESA_SHADER_VERTEX; 6755bf215546Sopenharmony_ci else if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL]) 6756bf215546Sopenharmony_ci pipeline->next_vertex_stage = MESA_SHADER_TESS_CTRL; 6757bf215546Sopenharmony_ci else 6758bf215546Sopenharmony_ci pipeline->next_vertex_stage = MESA_SHADER_GEOMETRY; 6759bf215546Sopenharmony_ci if (pipeline->next_vertex_stage == MESA_SHADER_VERTEX) { 6760bf215546Sopenharmony_ci const struct radv_shader *vs_shader = pipeline->base.shaders[MESA_SHADER_VERTEX]; 6761bf215546Sopenharmony_ci pipeline->can_use_simple_input = vs_shader->info.is_ngg == pdevice->use_ngg && 6762bf215546Sopenharmony_ci vs_shader->info.wave_size == pdevice->ge_wave_size; 6763bf215546Sopenharmony_ci } else { 6764bf215546Sopenharmony_ci pipeline->can_use_simple_input = false; 6765bf215546Sopenharmony_ci } 6766bf215546Sopenharmony_ci if (vs_info->vs.dynamic_inputs) 6767bf215546Sopenharmony_ci pipeline->vb_desc_usage_mask = BITFIELD_MASK(pipeline->last_vertex_attrib_bit); 6768bf215546Sopenharmony_ci else 6769bf215546Sopenharmony_ci pipeline->vb_desc_usage_mask = vs_info->vs.vb_desc_usage_mask; 6770bf215546Sopenharmony_ci pipeline->vb_desc_alloc_size = util_bitcount(pipeline->vb_desc_usage_mask) * 16; 6771bf215546Sopenharmony_ci} 6772bf215546Sopenharmony_ci 6773bf215546Sopenharmony_cistatic struct radv_shader * 6774bf215546Sopenharmony_ciradv_pipeline_get_streamout_shader(struct radv_graphics_pipeline *pipeline) 6775bf215546Sopenharmony_ci{ 6776bf215546Sopenharmony_ci int i; 6777bf215546Sopenharmony_ci 6778bf215546Sopenharmony_ci for (i = MESA_SHADER_GEOMETRY; i >= MESA_SHADER_VERTEX; i--) { 6779bf215546Sopenharmony_ci struct radv_shader *shader = radv_get_shader(&pipeline->base, i); 6780bf215546Sopenharmony_ci 6781bf215546Sopenharmony_ci if (shader && shader->info.so.num_outputs > 0) 6782bf215546Sopenharmony_ci return shader; 6783bf215546Sopenharmony_ci } 6784bf215546Sopenharmony_ci 6785bf215546Sopenharmony_ci return NULL; 6786bf215546Sopenharmony_ci} 6787bf215546Sopenharmony_ci 6788bf215546Sopenharmony_cistatic bool 6789bf215546Sopenharmony_ciradv_shader_need_indirect_descriptor_sets(struct radv_pipeline *pipeline, gl_shader_stage stage) 6790bf215546Sopenharmony_ci{ 6791bf215546Sopenharmony_ci struct radv_userdata_info *loc = 6792bf215546Sopenharmony_ci radv_lookup_user_sgpr(pipeline, stage, AC_UD_INDIRECT_DESCRIPTOR_SETS); 6793bf215546Sopenharmony_ci return loc->sgpr_idx != -1; 6794bf215546Sopenharmony_ci} 6795bf215546Sopenharmony_ci 6796bf215546Sopenharmony_cistatic void 6797bf215546Sopenharmony_ciradv_pipeline_init_shader_stages_state(struct radv_graphics_pipeline *pipeline) 6798bf215546Sopenharmony_ci{ 6799bf215546Sopenharmony_ci struct radv_device *device = pipeline->base.device; 6800bf215546Sopenharmony_ci 6801bf215546Sopenharmony_ci for (unsigned i = 0; i < MESA_VULKAN_SHADER_STAGES; i++) { 6802bf215546Sopenharmony_ci bool shader_exists = !!pipeline->base.shaders[i]; 6803bf215546Sopenharmony_ci if (shader_exists || i < MESA_SHADER_COMPUTE) { 6804bf215546Sopenharmony_ci /* We need this info for some stages even when the shader doesn't exist. */ 6805bf215546Sopenharmony_ci pipeline->base.user_data_0[i] = radv_pipeline_stage_to_user_data_0( 6806bf215546Sopenharmony_ci pipeline, i, device->physical_device->rad_info.gfx_level); 6807bf215546Sopenharmony_ci 6808bf215546Sopenharmony_ci if (shader_exists) 6809bf215546Sopenharmony_ci pipeline->base.need_indirect_descriptor_sets |= 6810bf215546Sopenharmony_ci radv_shader_need_indirect_descriptor_sets(&pipeline->base, i); 6811bf215546Sopenharmony_ci } 6812bf215546Sopenharmony_ci } 6813bf215546Sopenharmony_ci 6814bf215546Sopenharmony_ci gl_shader_stage first_stage = 6815bf215546Sopenharmony_ci radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH) ? MESA_SHADER_MESH : MESA_SHADER_VERTEX; 6816bf215546Sopenharmony_ci 6817bf215546Sopenharmony_ci struct radv_userdata_info *loc = 6818bf215546Sopenharmony_ci radv_lookup_user_sgpr(&pipeline->base, first_stage, AC_UD_VS_BASE_VERTEX_START_INSTANCE); 6819bf215546Sopenharmony_ci if (loc->sgpr_idx != -1) { 6820bf215546Sopenharmony_ci pipeline->vtx_base_sgpr = pipeline->base.user_data_0[first_stage]; 6821bf215546Sopenharmony_ci pipeline->vtx_base_sgpr += loc->sgpr_idx * 4; 6822bf215546Sopenharmony_ci pipeline->vtx_emit_num = loc->num_sgprs; 6823bf215546Sopenharmony_ci pipeline->uses_drawid = 6824bf215546Sopenharmony_ci radv_get_shader(&pipeline->base, first_stage)->info.vs.needs_draw_id; 6825bf215546Sopenharmony_ci pipeline->uses_baseinstance = 6826bf215546Sopenharmony_ci radv_get_shader(&pipeline->base, first_stage)->info.vs.needs_base_instance; 6827bf215546Sopenharmony_ci 6828bf215546Sopenharmony_ci assert(first_stage != MESA_SHADER_MESH || !pipeline->uses_baseinstance); 6829bf215546Sopenharmony_ci } 6830bf215546Sopenharmony_ci} 6831bf215546Sopenharmony_ci 6832bf215546Sopenharmony_cistatic uint32_t 6833bf215546Sopenharmony_ciradv_pipeline_init_vgt_gs_out(struct radv_graphics_pipeline *pipeline, 6834bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info) 6835bf215546Sopenharmony_ci{ 6836bf215546Sopenharmony_ci uint32_t gs_out; 6837bf215546Sopenharmony_ci 6838bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) { 6839bf215546Sopenharmony_ci gs_out = 6840bf215546Sopenharmony_ci si_conv_gl_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_GEOMETRY]->info.gs.output_prim); 6841bf215546Sopenharmony_ci } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { 6842bf215546Sopenharmony_ci if (pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes.point_mode) { 6843bf215546Sopenharmony_ci gs_out = V_028A6C_POINTLIST; 6844bf215546Sopenharmony_ci } else { 6845bf215546Sopenharmony_ci gs_out = si_conv_tess_prim_to_gs_out( 6846bf215546Sopenharmony_ci pipeline->base.shaders[MESA_SHADER_TESS_EVAL]->info.tes._primitive_mode); 6847bf215546Sopenharmony_ci } 6848bf215546Sopenharmony_ci } else if (radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) { 6849bf215546Sopenharmony_ci gs_out = 6850bf215546Sopenharmony_ci si_conv_gl_prim_to_gs_out(pipeline->base.shaders[MESA_SHADER_MESH]->info.ms.output_prim); 6851bf215546Sopenharmony_ci } else { 6852bf215546Sopenharmony_ci gs_out = si_conv_prim_to_gs_out(info->ia.primitive_topology); 6853bf215546Sopenharmony_ci } 6854bf215546Sopenharmony_ci 6855bf215546Sopenharmony_ci return gs_out; 6856bf215546Sopenharmony_ci} 6857bf215546Sopenharmony_ci 6858bf215546Sopenharmony_cistatic void 6859bf215546Sopenharmony_ciradv_pipeline_init_extra(struct radv_graphics_pipeline *pipeline, 6860bf215546Sopenharmony_ci const struct radv_graphics_pipeline_create_info *extra, 6861bf215546Sopenharmony_ci struct radv_blend_state *blend_state, 6862bf215546Sopenharmony_ci struct radv_depth_stencil_state *ds_state, 6863bf215546Sopenharmony_ci const struct radv_graphics_pipeline_info *info, 6864bf215546Sopenharmony_ci uint32_t *vgt_gs_out_prim_type) 6865bf215546Sopenharmony_ci{ 6866bf215546Sopenharmony_ci if (extra->custom_blend_mode == V_028808_CB_ELIMINATE_FAST_CLEAR || 6867bf215546Sopenharmony_ci extra->custom_blend_mode == V_028808_CB_FMASK_DECOMPRESS || 6868bf215546Sopenharmony_ci extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX8 || 6869bf215546Sopenharmony_ci extra->custom_blend_mode == V_028808_CB_DCC_DECOMPRESS_GFX11 || 6870bf215546Sopenharmony_ci extra->custom_blend_mode == V_028808_CB_RESOLVE) { 6871bf215546Sopenharmony_ci /* According to the CB spec states, CB_SHADER_MASK should be set to enable writes to all four 6872bf215546Sopenharmony_ci * channels of MRT0. 6873bf215546Sopenharmony_ci */ 6874bf215546Sopenharmony_ci blend_state->cb_shader_mask = 0xf; 6875bf215546Sopenharmony_ci 6876bf215546Sopenharmony_ci if (extra->custom_blend_mode == V_028808_CB_RESOLVE) 6877bf215546Sopenharmony_ci pipeline->cb_color_control |= S_028808_DISABLE_DUAL_QUAD(1); 6878bf215546Sopenharmony_ci 6879bf215546Sopenharmony_ci pipeline->cb_color_control &= C_028808_MODE; 6880bf215546Sopenharmony_ci pipeline->cb_color_control |= S_028808_MODE(extra->custom_blend_mode); 6881bf215546Sopenharmony_ci } 6882bf215546Sopenharmony_ci 6883bf215546Sopenharmony_ci if (extra->use_rectlist) { 6884bf215546Sopenharmony_ci struct radv_dynamic_state *dynamic = &pipeline->dynamic_state; 6885bf215546Sopenharmony_ci dynamic->primitive_topology = V_008958_DI_PT_RECTLIST; 6886bf215546Sopenharmony_ci 6887bf215546Sopenharmony_ci *vgt_gs_out_prim_type = V_028A6C_TRISTRIP; 6888bf215546Sopenharmony_ci if (radv_pipeline_has_ngg(pipeline)) 6889bf215546Sopenharmony_ci *vgt_gs_out_prim_type = V_028A6C_RECTLIST; 6890bf215546Sopenharmony_ci 6891bf215546Sopenharmony_ci pipeline->rast_prim = *vgt_gs_out_prim_type; 6892bf215546Sopenharmony_ci } 6893bf215546Sopenharmony_ci 6894bf215546Sopenharmony_ci if (radv_pipeline_has_ds_attachments(&info->ri)) { 6895bf215546Sopenharmony_ci ds_state->db_render_control |= S_028000_DEPTH_CLEAR_ENABLE(extra->db_depth_clear); 6896bf215546Sopenharmony_ci ds_state->db_render_control |= S_028000_STENCIL_CLEAR_ENABLE(extra->db_stencil_clear); 6897bf215546Sopenharmony_ci ds_state->db_render_control |= S_028000_RESUMMARIZE_ENABLE(extra->resummarize_enable); 6898bf215546Sopenharmony_ci ds_state->db_render_control |= S_028000_DEPTH_COMPRESS_DISABLE(extra->depth_compress_disable); 6899bf215546Sopenharmony_ci ds_state->db_render_control |= S_028000_STENCIL_COMPRESS_DISABLE(extra->stencil_compress_disable); 6900bf215546Sopenharmony_ci } 6901bf215546Sopenharmony_ci} 6902bf215546Sopenharmony_ci 6903bf215546Sopenharmony_civoid 6904bf215546Sopenharmony_ciradv_pipeline_init(struct radv_device *device, struct radv_pipeline *pipeline, 6905bf215546Sopenharmony_ci enum radv_pipeline_type type) 6906bf215546Sopenharmony_ci{ 6907bf215546Sopenharmony_ci vk_object_base_init(&device->vk, &pipeline->base, VK_OBJECT_TYPE_PIPELINE); 6908bf215546Sopenharmony_ci 6909bf215546Sopenharmony_ci pipeline->device = device; 6910bf215546Sopenharmony_ci pipeline->type = type; 6911bf215546Sopenharmony_ci} 6912bf215546Sopenharmony_ci 6913bf215546Sopenharmony_cistatic VkResult 6914bf215546Sopenharmony_ciradv_graphics_pipeline_init(struct radv_graphics_pipeline *pipeline, struct radv_device *device, 6915bf215546Sopenharmony_ci struct radv_pipeline_cache *cache, 6916bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 6917bf215546Sopenharmony_ci const struct radv_graphics_pipeline_create_info *extra) 6918bf215546Sopenharmony_ci{ 6919bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout); 6920bf215546Sopenharmony_ci VkResult result; 6921bf215546Sopenharmony_ci 6922bf215546Sopenharmony_ci pipeline->last_vgt_api_stage = MESA_SHADER_NONE; 6923bf215546Sopenharmony_ci 6924bf215546Sopenharmony_ci /* Mark all states declared dynamic at pipeline creation. */ 6925bf215546Sopenharmony_ci if (pCreateInfo->pDynamicState) { 6926bf215546Sopenharmony_ci uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; 6927bf215546Sopenharmony_ci for (uint32_t s = 0; s < count; s++) { 6928bf215546Sopenharmony_ci pipeline->dynamic_states |= 6929bf215546Sopenharmony_ci radv_dynamic_state_mask(pCreateInfo->pDynamicState->pDynamicStates[s]); 6930bf215546Sopenharmony_ci } 6931bf215546Sopenharmony_ci } 6932bf215546Sopenharmony_ci 6933bf215546Sopenharmony_ci /* Mark all active stages at pipeline creation. */ 6934bf215546Sopenharmony_ci for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { 6935bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i]; 6936bf215546Sopenharmony_ci 6937bf215546Sopenharmony_ci pipeline->active_stages |= sinfo->stage; 6938bf215546Sopenharmony_ci } 6939bf215546Sopenharmony_ci 6940bf215546Sopenharmony_ci struct radv_graphics_pipeline_info info = radv_pipeline_init_graphics_info(pipeline, pCreateInfo); 6941bf215546Sopenharmony_ci 6942bf215546Sopenharmony_ci struct radv_blend_state blend = radv_pipeline_init_blend_state(pipeline, pCreateInfo, &info); 6943bf215546Sopenharmony_ci 6944bf215546Sopenharmony_ci const VkPipelineCreationFeedbackCreateInfo *creation_feedback = 6945bf215546Sopenharmony_ci vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 6946bf215546Sopenharmony_ci 6947bf215546Sopenharmony_ci struct radv_pipeline_key key = 6948bf215546Sopenharmony_ci radv_generate_graphics_pipeline_key(pipeline, pCreateInfo, &info, &blend); 6949bf215546Sopenharmony_ci 6950bf215546Sopenharmony_ci result = radv_create_shaders(&pipeline->base, pipeline_layout, device, cache, &key, pCreateInfo->pStages, 6951bf215546Sopenharmony_ci pCreateInfo->stageCount, pCreateInfo->flags, NULL, 6952bf215546Sopenharmony_ci creation_feedback, NULL, NULL, &pipeline->last_vgt_api_stage); 6953bf215546Sopenharmony_ci if (result != VK_SUCCESS) 6954bf215546Sopenharmony_ci return result; 6955bf215546Sopenharmony_ci 6956bf215546Sopenharmony_ci pipeline->spi_baryc_cntl = S_0286E0_FRONT_FACE_ALL_BITS(1); 6957bf215546Sopenharmony_ci 6958bf215546Sopenharmony_ci uint32_t vgt_gs_out_prim_type = radv_pipeline_init_vgt_gs_out(pipeline, &info); 6959bf215546Sopenharmony_ci 6960bf215546Sopenharmony_ci radv_pipeline_init_multisample_state(pipeline, &blend, &info, vgt_gs_out_prim_type); 6961bf215546Sopenharmony_ci 6962bf215546Sopenharmony_ci if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) 6963bf215546Sopenharmony_ci radv_pipeline_init_input_assembly_state(pipeline, &info); 6964bf215546Sopenharmony_ci radv_pipeline_init_dynamic_state(pipeline, &info); 6965bf215546Sopenharmony_ci 6966bf215546Sopenharmony_ci pipeline->negative_one_to_one = info.vp.negative_one_to_one; 6967bf215546Sopenharmony_ci 6968bf215546Sopenharmony_ci radv_pipeline_init_raster_state(pipeline, &info); 6969bf215546Sopenharmony_ci 6970bf215546Sopenharmony_ci struct radv_depth_stencil_state ds_state = 6971bf215546Sopenharmony_ci radv_pipeline_init_depth_stencil_state(pipeline, &info); 6972bf215546Sopenharmony_ci 6973bf215546Sopenharmony_ci if (device->physical_device->rad_info.gfx_level >= GFX10_3) 6974bf215546Sopenharmony_ci gfx103_pipeline_init_vrs_state(pipeline, &info); 6975bf215546Sopenharmony_ci 6976bf215546Sopenharmony_ci /* Ensure that some export memory is always allocated, for two reasons: 6977bf215546Sopenharmony_ci * 6978bf215546Sopenharmony_ci * 1) Correctness: The hardware ignores the EXEC mask if no export 6979bf215546Sopenharmony_ci * memory is allocated, so KILL and alpha test do not work correctly 6980bf215546Sopenharmony_ci * without this. 6981bf215546Sopenharmony_ci * 2) Performance: Every shader needs at least a NULL export, even when 6982bf215546Sopenharmony_ci * it writes no color/depth output. The NULL export instruction 6983bf215546Sopenharmony_ci * stalls without this setting. 6984bf215546Sopenharmony_ci * 6985bf215546Sopenharmony_ci * Don't add this to CB_SHADER_MASK. 6986bf215546Sopenharmony_ci * 6987bf215546Sopenharmony_ci * GFX10 supports pixel shaders without exports by setting both the 6988bf215546Sopenharmony_ci * color and Z formats to SPI_SHADER_ZERO. The hw will skip export 6989bf215546Sopenharmony_ci * instructions if any are present. 6990bf215546Sopenharmony_ci */ 6991bf215546Sopenharmony_ci struct radv_shader *ps = pipeline->base.shaders[MESA_SHADER_FRAGMENT]; 6992bf215546Sopenharmony_ci if ((device->physical_device->rad_info.gfx_level <= GFX9 || ps->info.ps.can_discard) && 6993bf215546Sopenharmony_ci !blend.spi_shader_col_format) { 6994bf215546Sopenharmony_ci if (!ps->info.ps.writes_z && !ps->info.ps.writes_stencil && !ps->info.ps.writes_sample_mask) 6995bf215546Sopenharmony_ci blend.spi_shader_col_format = V_028714_SPI_SHADER_32_R; 6996bf215546Sopenharmony_ci } 6997bf215546Sopenharmony_ci 6998bf215546Sopenharmony_ci pipeline->col_format = blend.spi_shader_col_format; 6999bf215546Sopenharmony_ci pipeline->cb_target_mask = blend.cb_target_mask; 7000bf215546Sopenharmony_ci 7001bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY) && !radv_pipeline_has_ngg(pipeline)) { 7002bf215546Sopenharmony_ci struct radv_shader *gs = pipeline->base.shaders[MESA_SHADER_GEOMETRY]; 7003bf215546Sopenharmony_ci 7004bf215546Sopenharmony_ci radv_pipeline_init_gs_ring_state(pipeline, &gs->info.gs_ring_info); 7005bf215546Sopenharmony_ci } 7006bf215546Sopenharmony_ci 7007bf215546Sopenharmony_ci if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL)) { 7008bf215546Sopenharmony_ci pipeline->tess_patch_control_points = info.ts.patch_control_points; 7009bf215546Sopenharmony_ci } 7010bf215546Sopenharmony_ci 7011bf215546Sopenharmony_ci if (!radv_pipeline_has_stage(pipeline, MESA_SHADER_MESH)) 7012bf215546Sopenharmony_ci radv_pipeline_init_vertex_input_state(pipeline, &info); 7013bf215546Sopenharmony_ci 7014bf215546Sopenharmony_ci radv_pipeline_init_binning_state(pipeline, &blend, &info); 7015bf215546Sopenharmony_ci radv_pipeline_init_shader_stages_state(pipeline); 7016bf215546Sopenharmony_ci radv_pipeline_init_scratch(device, &pipeline->base); 7017bf215546Sopenharmony_ci 7018bf215546Sopenharmony_ci /* Find the last vertex shader stage that eventually uses streamout. */ 7019bf215546Sopenharmony_ci pipeline->streamout_shader = radv_pipeline_get_streamout_shader(pipeline); 7020bf215546Sopenharmony_ci 7021bf215546Sopenharmony_ci pipeline->is_ngg = radv_pipeline_has_ngg(pipeline); 7022bf215546Sopenharmony_ci pipeline->has_ngg_culling = 7023bf215546Sopenharmony_ci pipeline->is_ngg && 7024bf215546Sopenharmony_ci pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.has_ngg_culling; 7025bf215546Sopenharmony_ci pipeline->force_vrs_per_vertex = 7026bf215546Sopenharmony_ci pipeline->base.shaders[pipeline->last_vgt_api_stage]->info.force_vrs_per_vertex; 7027bf215546Sopenharmony_ci pipeline->uses_user_sample_locations = info.ms.sample_locs_enable; 7028bf215546Sopenharmony_ci pipeline->rast_prim = vgt_gs_out_prim_type; 7029bf215546Sopenharmony_ci 7030bf215546Sopenharmony_ci if (!(pipeline->dynamic_states & RADV_DYNAMIC_LINE_WIDTH)) { 7031bf215546Sopenharmony_ci pipeline->line_width = info.rs.line_width; 7032bf215546Sopenharmony_ci } 7033bf215546Sopenharmony_ci 7034bf215546Sopenharmony_ci pipeline->base.push_constant_size = pipeline_layout->push_constant_size; 7035bf215546Sopenharmony_ci pipeline->base.dynamic_offset_count = pipeline_layout->dynamic_offset_count; 7036bf215546Sopenharmony_ci 7037bf215546Sopenharmony_ci if (extra) { 7038bf215546Sopenharmony_ci radv_pipeline_init_extra(pipeline, extra, &blend, &ds_state, &info, &vgt_gs_out_prim_type); 7039bf215546Sopenharmony_ci } 7040bf215546Sopenharmony_ci 7041bf215546Sopenharmony_ci radv_pipeline_emit_pm4(pipeline, &blend, &ds_state, vgt_gs_out_prim_type, &info); 7042bf215546Sopenharmony_ci 7043bf215546Sopenharmony_ci return result; 7044bf215546Sopenharmony_ci} 7045bf215546Sopenharmony_ci 7046bf215546Sopenharmony_cistatic VkResult 7047bf215546Sopenharmony_ciradv_graphics_pipeline_create_nonlegacy(VkDevice _device, VkPipelineCache _cache, 7048bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 7049bf215546Sopenharmony_ci const struct radv_graphics_pipeline_create_info *extra, 7050bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 7051bf215546Sopenharmony_ci VkPipeline *pPipeline) 7052bf215546Sopenharmony_ci{ 7053bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 7054bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); 7055bf215546Sopenharmony_ci struct radv_graphics_pipeline *pipeline; 7056bf215546Sopenharmony_ci VkResult result; 7057bf215546Sopenharmony_ci 7058bf215546Sopenharmony_ci pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 7059bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 7060bf215546Sopenharmony_ci if (pipeline == NULL) 7061bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 7062bf215546Sopenharmony_ci 7063bf215546Sopenharmony_ci radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_GRAPHICS); 7064bf215546Sopenharmony_ci 7065bf215546Sopenharmony_ci result = radv_graphics_pipeline_init(pipeline, device, cache, pCreateInfo, extra); 7066bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 7067bf215546Sopenharmony_ci radv_pipeline_destroy(device, &pipeline->base, pAllocator); 7068bf215546Sopenharmony_ci return result; 7069bf215546Sopenharmony_ci } 7070bf215546Sopenharmony_ci 7071bf215546Sopenharmony_ci *pPipeline = radv_pipeline_to_handle(&pipeline->base); 7072bf215546Sopenharmony_ci 7073bf215546Sopenharmony_ci return VK_SUCCESS; 7074bf215546Sopenharmony_ci} 7075bf215546Sopenharmony_ci 7076bf215546Sopenharmony_ci/* This is a wrapper for radv_graphics_pipeline_create_nonlegacy that does all legacy conversions 7077bf215546Sopenharmony_ci * for the VkGraphicsPipelineCreateInfo data. */ 7078bf215546Sopenharmony_ciVkResult 7079bf215546Sopenharmony_ciradv_graphics_pipeline_create(VkDevice _device, VkPipelineCache _cache, 7080bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 7081bf215546Sopenharmony_ci const struct radv_graphics_pipeline_create_info *extra, 7082bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, VkPipeline *pPipeline) 7083bf215546Sopenharmony_ci{ 7084bf215546Sopenharmony_ci VkGraphicsPipelineCreateInfo create_info = *pCreateInfo; 7085bf215546Sopenharmony_ci 7086bf215546Sopenharmony_ci VkPipelineRenderingCreateInfo rendering_create_info; 7087bf215546Sopenharmony_ci VkFormat color_formats[MAX_RTS]; 7088bf215546Sopenharmony_ci VkAttachmentSampleCountInfoAMD sample_info; 7089bf215546Sopenharmony_ci VkSampleCountFlagBits samples[MAX_RTS]; 7090bf215546Sopenharmony_ci if (pCreateInfo->renderPass != VK_NULL_HANDLE) { 7091bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_render_pass, pass, pCreateInfo->renderPass); 7092bf215546Sopenharmony_ci struct radv_subpass *subpass = pass->subpasses + pCreateInfo->subpass; 7093bf215546Sopenharmony_ci 7094bf215546Sopenharmony_ci rendering_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO; 7095bf215546Sopenharmony_ci rendering_create_info.pNext = create_info.pNext; 7096bf215546Sopenharmony_ci create_info.pNext = &rendering_create_info; 7097bf215546Sopenharmony_ci 7098bf215546Sopenharmony_ci rendering_create_info.viewMask = subpass->view_mask; 7099bf215546Sopenharmony_ci 7100bf215546Sopenharmony_ci VkFormat ds_format = 7101bf215546Sopenharmony_ci subpass->depth_stencil_attachment 7102bf215546Sopenharmony_ci ? pass->attachments[subpass->depth_stencil_attachment->attachment].format 7103bf215546Sopenharmony_ci : VK_FORMAT_UNDEFINED; 7104bf215546Sopenharmony_ci 7105bf215546Sopenharmony_ci rendering_create_info.depthAttachmentFormat = 7106bf215546Sopenharmony_ci vk_format_has_depth(ds_format) ? ds_format : VK_FORMAT_UNDEFINED; 7107bf215546Sopenharmony_ci rendering_create_info.stencilAttachmentFormat = 7108bf215546Sopenharmony_ci vk_format_has_stencil(ds_format) ? ds_format : VK_FORMAT_UNDEFINED; 7109bf215546Sopenharmony_ci 7110bf215546Sopenharmony_ci rendering_create_info.colorAttachmentCount = subpass->color_count; 7111bf215546Sopenharmony_ci rendering_create_info.pColorAttachmentFormats = color_formats; 7112bf215546Sopenharmony_ci for (unsigned i = 0; i < rendering_create_info.colorAttachmentCount; ++i) { 7113bf215546Sopenharmony_ci if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) 7114bf215546Sopenharmony_ci color_formats[i] = pass->attachments[subpass->color_attachments[i].attachment].format; 7115bf215546Sopenharmony_ci else 7116bf215546Sopenharmony_ci color_formats[i] = VK_FORMAT_UNDEFINED; 7117bf215546Sopenharmony_ci } 7118bf215546Sopenharmony_ci 7119bf215546Sopenharmony_ci create_info.renderPass = VK_NULL_HANDLE; 7120bf215546Sopenharmony_ci 7121bf215546Sopenharmony_ci sample_info.sType = VK_STRUCTURE_TYPE_ATTACHMENT_SAMPLE_COUNT_INFO_AMD; 7122bf215546Sopenharmony_ci sample_info.pNext = create_info.pNext; 7123bf215546Sopenharmony_ci create_info.pNext = &sample_info; 7124bf215546Sopenharmony_ci 7125bf215546Sopenharmony_ci sample_info.colorAttachmentCount = rendering_create_info.colorAttachmentCount; 7126bf215546Sopenharmony_ci sample_info.pColorAttachmentSamples = samples; 7127bf215546Sopenharmony_ci for (unsigned i = 0; i < sample_info.colorAttachmentCount; ++i) { 7128bf215546Sopenharmony_ci if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) { 7129bf215546Sopenharmony_ci samples[i] = pass->attachments[subpass->color_attachments[i].attachment].samples; 7130bf215546Sopenharmony_ci } else 7131bf215546Sopenharmony_ci samples[i] = 1; 7132bf215546Sopenharmony_ci } 7133bf215546Sopenharmony_ci sample_info.depthStencilAttachmentSamples = subpass->depth_sample_count; 7134bf215546Sopenharmony_ci } 7135bf215546Sopenharmony_ci 7136bf215546Sopenharmony_ci return radv_graphics_pipeline_create_nonlegacy(_device, _cache, &create_info, extra, pAllocator, 7137bf215546Sopenharmony_ci pPipeline); 7138bf215546Sopenharmony_ci} 7139bf215546Sopenharmony_ci 7140bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 7141bf215546Sopenharmony_ciradv_CreateGraphicsPipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, 7142bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfos, 7143bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) 7144bf215546Sopenharmony_ci{ 7145bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 7146bf215546Sopenharmony_ci unsigned i = 0; 7147bf215546Sopenharmony_ci 7148bf215546Sopenharmony_ci for (; i < count; i++) { 7149bf215546Sopenharmony_ci VkResult r; 7150bf215546Sopenharmony_ci r = radv_graphics_pipeline_create(_device, pipelineCache, &pCreateInfos[i], NULL, pAllocator, 7151bf215546Sopenharmony_ci &pPipelines[i]); 7152bf215546Sopenharmony_ci if (r != VK_SUCCESS) { 7153bf215546Sopenharmony_ci result = r; 7154bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 7155bf215546Sopenharmony_ci 7156bf215546Sopenharmony_ci if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 7157bf215546Sopenharmony_ci break; 7158bf215546Sopenharmony_ci } 7159bf215546Sopenharmony_ci } 7160bf215546Sopenharmony_ci 7161bf215546Sopenharmony_ci for (; i < count; ++i) 7162bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 7163bf215546Sopenharmony_ci 7164bf215546Sopenharmony_ci return result; 7165bf215546Sopenharmony_ci} 7166bf215546Sopenharmony_ci 7167bf215546Sopenharmony_civoid 7168bf215546Sopenharmony_ciradv_pipeline_emit_hw_cs(const struct radv_physical_device *pdevice, struct radeon_cmdbuf *cs, 7169bf215546Sopenharmony_ci const struct radv_shader *shader) 7170bf215546Sopenharmony_ci{ 7171bf215546Sopenharmony_ci uint64_t va = radv_shader_get_va(shader); 7172bf215546Sopenharmony_ci 7173bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B830_COMPUTE_PGM_LO, va >> 8); 7174bf215546Sopenharmony_ci 7175bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B848_COMPUTE_PGM_RSRC1, 2); 7176bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc1); 7177bf215546Sopenharmony_ci radeon_emit(cs, shader->config.rsrc2); 7178bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10) { 7179bf215546Sopenharmony_ci radeon_set_sh_reg(cs, R_00B8A0_COMPUTE_PGM_RSRC3, shader->config.rsrc3); 7180bf215546Sopenharmony_ci } 7181bf215546Sopenharmony_ci} 7182bf215546Sopenharmony_ci 7183bf215546Sopenharmony_civoid 7184bf215546Sopenharmony_ciradv_pipeline_emit_compute_state(const struct radv_physical_device *pdevice, 7185bf215546Sopenharmony_ci struct radeon_cmdbuf *cs, const struct radv_shader *shader) 7186bf215546Sopenharmony_ci{ 7187bf215546Sopenharmony_ci unsigned threads_per_threadgroup; 7188bf215546Sopenharmony_ci unsigned threadgroups_per_cu = 1; 7189bf215546Sopenharmony_ci unsigned waves_per_threadgroup; 7190bf215546Sopenharmony_ci unsigned max_waves_per_sh = 0; 7191bf215546Sopenharmony_ci 7192bf215546Sopenharmony_ci /* Calculate best compute resource limits. */ 7193bf215546Sopenharmony_ci threads_per_threadgroup = 7194bf215546Sopenharmony_ci shader->info.cs.block_size[0] * shader->info.cs.block_size[1] * shader->info.cs.block_size[2]; 7195bf215546Sopenharmony_ci waves_per_threadgroup = DIV_ROUND_UP(threads_per_threadgroup, shader->info.wave_size); 7196bf215546Sopenharmony_ci 7197bf215546Sopenharmony_ci if (pdevice->rad_info.gfx_level >= GFX10 && waves_per_threadgroup == 1) 7198bf215546Sopenharmony_ci threadgroups_per_cu = 2; 7199bf215546Sopenharmony_ci 7200bf215546Sopenharmony_ci radeon_set_sh_reg( 7201bf215546Sopenharmony_ci cs, R_00B854_COMPUTE_RESOURCE_LIMITS, 7202bf215546Sopenharmony_ci ac_get_compute_resource_limits(&pdevice->rad_info, waves_per_threadgroup, 7203bf215546Sopenharmony_ci max_waves_per_sh, threadgroups_per_cu)); 7204bf215546Sopenharmony_ci 7205bf215546Sopenharmony_ci radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3); 7206bf215546Sopenharmony_ci radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[0])); 7207bf215546Sopenharmony_ci radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[1])); 7208bf215546Sopenharmony_ci radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(shader->info.cs.block_size[2])); 7209bf215546Sopenharmony_ci} 7210bf215546Sopenharmony_ci 7211bf215546Sopenharmony_cistatic void 7212bf215546Sopenharmony_ciradv_compute_generate_pm4(struct radv_compute_pipeline *pipeline) 7213bf215546Sopenharmony_ci{ 7214bf215546Sopenharmony_ci struct radv_physical_device *pdevice = pipeline->base.device->physical_device; 7215bf215546Sopenharmony_ci struct radv_shader *shader = pipeline->base.shaders[MESA_SHADER_COMPUTE]; 7216bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &pipeline->base.cs; 7217bf215546Sopenharmony_ci 7218bf215546Sopenharmony_ci cs->max_dw = pdevice->rad_info.gfx_level >= GFX10 ? 19 : 16; 7219bf215546Sopenharmony_ci cs->buf = malloc(cs->max_dw * 4); 7220bf215546Sopenharmony_ci 7221bf215546Sopenharmony_ci radv_pipeline_emit_hw_cs(pdevice, cs, shader); 7222bf215546Sopenharmony_ci radv_pipeline_emit_compute_state(pdevice, cs, shader); 7223bf215546Sopenharmony_ci 7224bf215546Sopenharmony_ci assert(pipeline->base.cs.cdw <= pipeline->base.cs.max_dw); 7225bf215546Sopenharmony_ci} 7226bf215546Sopenharmony_ci 7227bf215546Sopenharmony_cistatic struct radv_pipeline_key 7228bf215546Sopenharmony_ciradv_generate_compute_pipeline_key(struct radv_compute_pipeline *pipeline, 7229bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *pCreateInfo) 7230bf215546Sopenharmony_ci{ 7231bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage = &pCreateInfo->stage; 7232bf215546Sopenharmony_ci struct radv_pipeline_key key = radv_generate_pipeline_key(&pipeline->base, pCreateInfo->flags); 7233bf215546Sopenharmony_ci 7234bf215546Sopenharmony_ci const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *subgroup_size = 7235bf215546Sopenharmony_ci vk_find_struct_const(stage->pNext, 7236bf215546Sopenharmony_ci PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO); 7237bf215546Sopenharmony_ci 7238bf215546Sopenharmony_ci if (subgroup_size) { 7239bf215546Sopenharmony_ci assert(subgroup_size->requiredSubgroupSize == 32 || 7240bf215546Sopenharmony_ci subgroup_size->requiredSubgroupSize == 64); 7241bf215546Sopenharmony_ci key.cs.compute_subgroup_size = subgroup_size->requiredSubgroupSize; 7242bf215546Sopenharmony_ci } else if (stage->flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT) { 7243bf215546Sopenharmony_ci key.cs.require_full_subgroups = true; 7244bf215546Sopenharmony_ci } 7245bf215546Sopenharmony_ci 7246bf215546Sopenharmony_ci return key; 7247bf215546Sopenharmony_ci} 7248bf215546Sopenharmony_ci 7249bf215546Sopenharmony_ciVkResult 7250bf215546Sopenharmony_ciradv_compute_pipeline_create(VkDevice _device, VkPipelineCache _cache, 7251bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *pCreateInfo, 7252bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, const uint8_t *custom_hash, 7253bf215546Sopenharmony_ci struct radv_pipeline_shader_stack_size *rt_stack_sizes, 7254bf215546Sopenharmony_ci uint32_t rt_group_count, VkPipeline *pPipeline) 7255bf215546Sopenharmony_ci{ 7256bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 7257bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); 7258bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline_layout, pipeline_layout, pCreateInfo->layout); 7259bf215546Sopenharmony_ci struct radv_compute_pipeline *pipeline; 7260bf215546Sopenharmony_ci VkResult result; 7261bf215546Sopenharmony_ci 7262bf215546Sopenharmony_ci pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8, 7263bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 7264bf215546Sopenharmony_ci if (pipeline == NULL) { 7265bf215546Sopenharmony_ci free(rt_stack_sizes); 7266bf215546Sopenharmony_ci return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 7267bf215546Sopenharmony_ci } 7268bf215546Sopenharmony_ci 7269bf215546Sopenharmony_ci radv_pipeline_init(device, &pipeline->base, RADV_PIPELINE_COMPUTE); 7270bf215546Sopenharmony_ci 7271bf215546Sopenharmony_ci pipeline->rt_stack_sizes = rt_stack_sizes; 7272bf215546Sopenharmony_ci pipeline->group_count = rt_group_count; 7273bf215546Sopenharmony_ci 7274bf215546Sopenharmony_ci const VkPipelineCreationFeedbackCreateInfo *creation_feedback = 7275bf215546Sopenharmony_ci vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 7276bf215546Sopenharmony_ci 7277bf215546Sopenharmony_ci struct radv_pipeline_key key = radv_generate_compute_pipeline_key(pipeline, pCreateInfo); 7278bf215546Sopenharmony_ci 7279bf215546Sopenharmony_ci UNUSED gl_shader_stage last_vgt_api_stage = MESA_SHADER_NONE; 7280bf215546Sopenharmony_ci result = radv_create_shaders(&pipeline->base, pipeline_layout, device, cache, &key, &pCreateInfo->stage, 7281bf215546Sopenharmony_ci 1, pCreateInfo->flags, custom_hash, creation_feedback, 7282bf215546Sopenharmony_ci &pipeline->rt_stack_sizes, &pipeline->group_count, 7283bf215546Sopenharmony_ci &last_vgt_api_stage); 7284bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 7285bf215546Sopenharmony_ci radv_pipeline_destroy(device, &pipeline->base, pAllocator); 7286bf215546Sopenharmony_ci return result; 7287bf215546Sopenharmony_ci } 7288bf215546Sopenharmony_ci 7289bf215546Sopenharmony_ci pipeline->base.user_data_0[MESA_SHADER_COMPUTE] = R_00B900_COMPUTE_USER_DATA_0; 7290bf215546Sopenharmony_ci pipeline->base.need_indirect_descriptor_sets |= 7291bf215546Sopenharmony_ci radv_shader_need_indirect_descriptor_sets(&pipeline->base, MESA_SHADER_COMPUTE); 7292bf215546Sopenharmony_ci radv_pipeline_init_scratch(device, &pipeline->base); 7293bf215546Sopenharmony_ci 7294bf215546Sopenharmony_ci pipeline->base.push_constant_size = pipeline_layout->push_constant_size; 7295bf215546Sopenharmony_ci pipeline->base.dynamic_offset_count = pipeline_layout->dynamic_offset_count; 7296bf215546Sopenharmony_ci 7297bf215546Sopenharmony_ci if (device->physical_device->rad_info.has_cs_regalloc_hang_bug) { 7298bf215546Sopenharmony_ci struct radv_shader *compute_shader = pipeline->base.shaders[MESA_SHADER_COMPUTE]; 7299bf215546Sopenharmony_ci unsigned *cs_block_size = compute_shader->info.cs.block_size; 7300bf215546Sopenharmony_ci 7301bf215546Sopenharmony_ci pipeline->cs_regalloc_hang_bug = cs_block_size[0] * cs_block_size[1] * cs_block_size[2] > 256; 7302bf215546Sopenharmony_ci } 7303bf215546Sopenharmony_ci 7304bf215546Sopenharmony_ci radv_compute_generate_pm4(pipeline); 7305bf215546Sopenharmony_ci 7306bf215546Sopenharmony_ci *pPipeline = radv_pipeline_to_handle(&pipeline->base); 7307bf215546Sopenharmony_ci 7308bf215546Sopenharmony_ci return VK_SUCCESS; 7309bf215546Sopenharmony_ci} 7310bf215546Sopenharmony_ci 7311bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 7312bf215546Sopenharmony_ciradv_CreateComputePipelines(VkDevice _device, VkPipelineCache pipelineCache, uint32_t count, 7313bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *pCreateInfos, 7314bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines) 7315bf215546Sopenharmony_ci{ 7316bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 7317bf215546Sopenharmony_ci 7318bf215546Sopenharmony_ci unsigned i = 0; 7319bf215546Sopenharmony_ci for (; i < count; i++) { 7320bf215546Sopenharmony_ci VkResult r; 7321bf215546Sopenharmony_ci r = radv_compute_pipeline_create(_device, pipelineCache, &pCreateInfos[i], pAllocator, NULL, 7322bf215546Sopenharmony_ci NULL, 0, &pPipelines[i]); 7323bf215546Sopenharmony_ci if (r != VK_SUCCESS) { 7324bf215546Sopenharmony_ci result = r; 7325bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 7326bf215546Sopenharmony_ci 7327bf215546Sopenharmony_ci if (pCreateInfos[i].flags & VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 7328bf215546Sopenharmony_ci break; 7329bf215546Sopenharmony_ci } 7330bf215546Sopenharmony_ci } 7331bf215546Sopenharmony_ci 7332bf215546Sopenharmony_ci for (; i < count; ++i) 7333bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 7334bf215546Sopenharmony_ci 7335bf215546Sopenharmony_ci return result; 7336bf215546Sopenharmony_ci} 7337bf215546Sopenharmony_ci 7338bf215546Sopenharmony_cistatic uint32_t 7339bf215546Sopenharmony_ciradv_get_executable_count(struct radv_pipeline *pipeline) 7340bf215546Sopenharmony_ci{ 7341bf215546Sopenharmony_ci uint32_t ret = 0; 7342bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { 7343bf215546Sopenharmony_ci if (!pipeline->shaders[i]) 7344bf215546Sopenharmony_ci continue; 7345bf215546Sopenharmony_ci 7346bf215546Sopenharmony_ci if (i == MESA_SHADER_GEOMETRY && 7347bf215546Sopenharmony_ci !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) { 7348bf215546Sopenharmony_ci ret += 2u; 7349bf215546Sopenharmony_ci } else { 7350bf215546Sopenharmony_ci ret += 1u; 7351bf215546Sopenharmony_ci } 7352bf215546Sopenharmony_ci } 7353bf215546Sopenharmony_ci return ret; 7354bf215546Sopenharmony_ci} 7355bf215546Sopenharmony_ci 7356bf215546Sopenharmony_cistatic struct radv_shader * 7357bf215546Sopenharmony_ciradv_get_shader_from_executable_index(struct radv_pipeline *pipeline, int index, 7358bf215546Sopenharmony_ci gl_shader_stage *stage) 7359bf215546Sopenharmony_ci{ 7360bf215546Sopenharmony_ci for (int i = 0; i < MESA_VULKAN_SHADER_STAGES; ++i) { 7361bf215546Sopenharmony_ci if (!pipeline->shaders[i]) 7362bf215546Sopenharmony_ci continue; 7363bf215546Sopenharmony_ci if (!index) { 7364bf215546Sopenharmony_ci *stage = i; 7365bf215546Sopenharmony_ci return pipeline->shaders[i]; 7366bf215546Sopenharmony_ci } 7367bf215546Sopenharmony_ci 7368bf215546Sopenharmony_ci --index; 7369bf215546Sopenharmony_ci 7370bf215546Sopenharmony_ci if (i == MESA_SHADER_GEOMETRY && 7371bf215546Sopenharmony_ci !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) { 7372bf215546Sopenharmony_ci if (!index) { 7373bf215546Sopenharmony_ci *stage = i; 7374bf215546Sopenharmony_ci return pipeline->gs_copy_shader; 7375bf215546Sopenharmony_ci } 7376bf215546Sopenharmony_ci --index; 7377bf215546Sopenharmony_ci } 7378bf215546Sopenharmony_ci } 7379bf215546Sopenharmony_ci 7380bf215546Sopenharmony_ci *stage = -1; 7381bf215546Sopenharmony_ci return NULL; 7382bf215546Sopenharmony_ci} 7383bf215546Sopenharmony_ci 7384bf215546Sopenharmony_ci/* Basically strlcpy (which does not exist on linux) specialized for 7385bf215546Sopenharmony_ci * descriptions. */ 7386bf215546Sopenharmony_cistatic void 7387bf215546Sopenharmony_cidesc_copy(char *desc, const char *src) 7388bf215546Sopenharmony_ci{ 7389bf215546Sopenharmony_ci int len = strlen(src); 7390bf215546Sopenharmony_ci assert(len < VK_MAX_DESCRIPTION_SIZE); 7391bf215546Sopenharmony_ci memcpy(desc, src, len); 7392bf215546Sopenharmony_ci memset(desc + len, 0, VK_MAX_DESCRIPTION_SIZE - len); 7393bf215546Sopenharmony_ci} 7394bf215546Sopenharmony_ci 7395bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 7396bf215546Sopenharmony_ciradv_GetPipelineExecutablePropertiesKHR(VkDevice _device, const VkPipelineInfoKHR *pPipelineInfo, 7397bf215546Sopenharmony_ci uint32_t *pExecutableCount, 7398bf215546Sopenharmony_ci VkPipelineExecutablePropertiesKHR *pProperties) 7399bf215546Sopenharmony_ci{ 7400bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline, pipeline, pPipelineInfo->pipeline); 7401bf215546Sopenharmony_ci const uint32_t total_count = radv_get_executable_count(pipeline); 7402bf215546Sopenharmony_ci 7403bf215546Sopenharmony_ci if (!pProperties) { 7404bf215546Sopenharmony_ci *pExecutableCount = total_count; 7405bf215546Sopenharmony_ci return VK_SUCCESS; 7406bf215546Sopenharmony_ci } 7407bf215546Sopenharmony_ci 7408bf215546Sopenharmony_ci const uint32_t count = MIN2(total_count, *pExecutableCount); 7409bf215546Sopenharmony_ci for (unsigned i = 0, executable_idx = 0; i < MESA_VULKAN_SHADER_STAGES && executable_idx < count; ++i) { 7410bf215546Sopenharmony_ci if (!pipeline->shaders[i]) 7411bf215546Sopenharmony_ci continue; 7412bf215546Sopenharmony_ci pProperties[executable_idx].stages = mesa_to_vk_shader_stage(i); 7413bf215546Sopenharmony_ci const char *name = NULL; 7414bf215546Sopenharmony_ci const char *description = NULL; 7415bf215546Sopenharmony_ci switch (i) { 7416bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 7417bf215546Sopenharmony_ci name = "Vertex Shader"; 7418bf215546Sopenharmony_ci description = "Vulkan Vertex Shader"; 7419bf215546Sopenharmony_ci break; 7420bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 7421bf215546Sopenharmony_ci if (!pipeline->shaders[MESA_SHADER_VERTEX]) { 7422bf215546Sopenharmony_ci pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT; 7423bf215546Sopenharmony_ci name = "Vertex + Tessellation Control Shaders"; 7424bf215546Sopenharmony_ci description = "Combined Vulkan Vertex and Tessellation Control Shaders"; 7425bf215546Sopenharmony_ci } else { 7426bf215546Sopenharmony_ci name = "Tessellation Control Shader"; 7427bf215546Sopenharmony_ci description = "Vulkan Tessellation Control Shader"; 7428bf215546Sopenharmony_ci } 7429bf215546Sopenharmony_ci break; 7430bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 7431bf215546Sopenharmony_ci name = "Tessellation Evaluation Shader"; 7432bf215546Sopenharmony_ci description = "Vulkan Tessellation Evaluation Shader"; 7433bf215546Sopenharmony_ci break; 7434bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 7435bf215546Sopenharmony_ci if (pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_TESS_EVAL]) { 7436bf215546Sopenharmony_ci pProperties[executable_idx].stages |= VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; 7437bf215546Sopenharmony_ci name = "Tessellation Evaluation + Geometry Shaders"; 7438bf215546Sopenharmony_ci description = "Combined Vulkan Tessellation Evaluation and Geometry Shaders"; 7439bf215546Sopenharmony_ci } else if (!pipeline->shaders[MESA_SHADER_TESS_CTRL] && !pipeline->shaders[MESA_SHADER_VERTEX]) { 7440bf215546Sopenharmony_ci pProperties[executable_idx].stages |= VK_SHADER_STAGE_VERTEX_BIT; 7441bf215546Sopenharmony_ci name = "Vertex + Geometry Shader"; 7442bf215546Sopenharmony_ci description = "Combined Vulkan Vertex and Geometry Shaders"; 7443bf215546Sopenharmony_ci } else { 7444bf215546Sopenharmony_ci name = "Geometry Shader"; 7445bf215546Sopenharmony_ci description = "Vulkan Geometry Shader"; 7446bf215546Sopenharmony_ci } 7447bf215546Sopenharmony_ci break; 7448bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 7449bf215546Sopenharmony_ci name = "Fragment Shader"; 7450bf215546Sopenharmony_ci description = "Vulkan Fragment Shader"; 7451bf215546Sopenharmony_ci break; 7452bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: 7453bf215546Sopenharmony_ci name = "Compute Shader"; 7454bf215546Sopenharmony_ci description = "Vulkan Compute Shader"; 7455bf215546Sopenharmony_ci break; 7456bf215546Sopenharmony_ci case MESA_SHADER_MESH: 7457bf215546Sopenharmony_ci name = "Mesh Shader"; 7458bf215546Sopenharmony_ci description = "Vulkan Mesh Shader"; 7459bf215546Sopenharmony_ci break; 7460bf215546Sopenharmony_ci case MESA_SHADER_TASK: 7461bf215546Sopenharmony_ci name = "Task Shader"; 7462bf215546Sopenharmony_ci description = "Vulkan Task Shader"; 7463bf215546Sopenharmony_ci break; 7464bf215546Sopenharmony_ci } 7465bf215546Sopenharmony_ci 7466bf215546Sopenharmony_ci pProperties[executable_idx].subgroupSize = pipeline->shaders[i]->info.wave_size; 7467bf215546Sopenharmony_ci desc_copy(pProperties[executable_idx].name, name); 7468bf215546Sopenharmony_ci desc_copy(pProperties[executable_idx].description, description); 7469bf215546Sopenharmony_ci 7470bf215546Sopenharmony_ci ++executable_idx; 7471bf215546Sopenharmony_ci if (i == MESA_SHADER_GEOMETRY && 7472bf215546Sopenharmony_ci !radv_pipeline_has_ngg(radv_pipeline_to_graphics(pipeline))) { 7473bf215546Sopenharmony_ci assert(pipeline->gs_copy_shader); 7474bf215546Sopenharmony_ci if (executable_idx >= count) 7475bf215546Sopenharmony_ci break; 7476bf215546Sopenharmony_ci 7477bf215546Sopenharmony_ci pProperties[executable_idx].stages = VK_SHADER_STAGE_GEOMETRY_BIT; 7478bf215546Sopenharmony_ci pProperties[executable_idx].subgroupSize = 64; 7479bf215546Sopenharmony_ci desc_copy(pProperties[executable_idx].name, "GS Copy Shader"); 7480bf215546Sopenharmony_ci desc_copy(pProperties[executable_idx].description, 7481bf215546Sopenharmony_ci "Extra shader stage that loads the GS output ringbuffer into the rasterizer"); 7482bf215546Sopenharmony_ci 7483bf215546Sopenharmony_ci ++executable_idx; 7484bf215546Sopenharmony_ci } 7485bf215546Sopenharmony_ci } 7486bf215546Sopenharmony_ci 7487bf215546Sopenharmony_ci VkResult result = *pExecutableCount < total_count ? VK_INCOMPLETE : VK_SUCCESS; 7488bf215546Sopenharmony_ci *pExecutableCount = count; 7489bf215546Sopenharmony_ci return result; 7490bf215546Sopenharmony_ci} 7491bf215546Sopenharmony_ci 7492bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 7493bf215546Sopenharmony_ciradv_GetPipelineExecutableStatisticsKHR(VkDevice _device, 7494bf215546Sopenharmony_ci const VkPipelineExecutableInfoKHR *pExecutableInfo, 7495bf215546Sopenharmony_ci uint32_t *pStatisticCount, 7496bf215546Sopenharmony_ci VkPipelineExecutableStatisticKHR *pStatistics) 7497bf215546Sopenharmony_ci{ 7498bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 7499bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline); 7500bf215546Sopenharmony_ci gl_shader_stage stage; 7501bf215546Sopenharmony_ci struct radv_shader *shader = 7502bf215546Sopenharmony_ci radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage); 7503bf215546Sopenharmony_ci 7504bf215546Sopenharmony_ci const struct radv_physical_device *pdevice = device->physical_device; 7505bf215546Sopenharmony_ci 7506bf215546Sopenharmony_ci unsigned lds_increment = pdevice->rad_info.gfx_level >= GFX11 && stage == MESA_SHADER_FRAGMENT 7507bf215546Sopenharmony_ci ? 1024 : pdevice->rad_info.lds_encode_granularity; 7508bf215546Sopenharmony_ci unsigned max_waves = radv_get_max_waves(device, shader, stage); 7509bf215546Sopenharmony_ci 7510bf215546Sopenharmony_ci VkPipelineExecutableStatisticKHR *s = pStatistics; 7511bf215546Sopenharmony_ci VkPipelineExecutableStatisticKHR *end = s + (pStatistics ? *pStatisticCount : 0); 7512bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 7513bf215546Sopenharmony_ci 7514bf215546Sopenharmony_ci if (s < end) { 7515bf215546Sopenharmony_ci desc_copy(s->name, "Driver pipeline hash"); 7516bf215546Sopenharmony_ci desc_copy(s->description, "Driver pipeline hash used by RGP"); 7517bf215546Sopenharmony_ci s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 7518bf215546Sopenharmony_ci s->value.u64 = pipeline->pipeline_hash; 7519bf215546Sopenharmony_ci } 7520bf215546Sopenharmony_ci ++s; 7521bf215546Sopenharmony_ci 7522bf215546Sopenharmony_ci if (s < end) { 7523bf215546Sopenharmony_ci desc_copy(s->name, "SGPRs"); 7524bf215546Sopenharmony_ci desc_copy(s->description, "Number of SGPR registers allocated per subgroup"); 7525bf215546Sopenharmony_ci s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 7526bf215546Sopenharmony_ci s->value.u64 = shader->config.num_sgprs; 7527bf215546Sopenharmony_ci } 7528bf215546Sopenharmony_ci ++s; 7529bf215546Sopenharmony_ci 7530bf215546Sopenharmony_ci if (s < end) { 7531bf215546Sopenharmony_ci desc_copy(s->name, "VGPRs"); 7532bf215546Sopenharmony_ci desc_copy(s->description, "Number of VGPR registers allocated per subgroup"); 7533bf215546Sopenharmony_ci s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 7534bf215546Sopenharmony_ci s->value.u64 = shader->config.num_vgprs; 7535bf215546Sopenharmony_ci } 7536bf215546Sopenharmony_ci ++s; 7537bf215546Sopenharmony_ci 7538bf215546Sopenharmony_ci if (s < end) { 7539bf215546Sopenharmony_ci desc_copy(s->name, "Spilled SGPRs"); 7540bf215546Sopenharmony_ci desc_copy(s->description, "Number of SGPR registers spilled per subgroup"); 7541bf215546Sopenharmony_ci s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 7542bf215546Sopenharmony_ci s->value.u64 = shader->config.spilled_sgprs; 7543bf215546Sopenharmony_ci } 7544bf215546Sopenharmony_ci ++s; 7545bf215546Sopenharmony_ci 7546bf215546Sopenharmony_ci if (s < end) { 7547bf215546Sopenharmony_ci desc_copy(s->name, "Spilled VGPRs"); 7548bf215546Sopenharmony_ci desc_copy(s->description, "Number of VGPR registers spilled per subgroup"); 7549bf215546Sopenharmony_ci s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 7550bf215546Sopenharmony_ci s->value.u64 = shader->config.spilled_vgprs; 7551bf215546Sopenharmony_ci } 7552bf215546Sopenharmony_ci ++s; 7553bf215546Sopenharmony_ci 7554bf215546Sopenharmony_ci if (s < end) { 7555bf215546Sopenharmony_ci desc_copy(s->name, "Code size"); 7556bf215546Sopenharmony_ci desc_copy(s->description, "Code size in bytes"); 7557bf215546Sopenharmony_ci s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 7558bf215546Sopenharmony_ci s->value.u64 = shader->exec_size; 7559bf215546Sopenharmony_ci } 7560bf215546Sopenharmony_ci ++s; 7561bf215546Sopenharmony_ci 7562bf215546Sopenharmony_ci if (s < end) { 7563bf215546Sopenharmony_ci desc_copy(s->name, "LDS size"); 7564bf215546Sopenharmony_ci desc_copy(s->description, "LDS size in bytes per workgroup"); 7565bf215546Sopenharmony_ci s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 7566bf215546Sopenharmony_ci s->value.u64 = shader->config.lds_size * lds_increment; 7567bf215546Sopenharmony_ci } 7568bf215546Sopenharmony_ci ++s; 7569bf215546Sopenharmony_ci 7570bf215546Sopenharmony_ci if (s < end) { 7571bf215546Sopenharmony_ci desc_copy(s->name, "Scratch size"); 7572bf215546Sopenharmony_ci desc_copy(s->description, "Private memory in bytes per subgroup"); 7573bf215546Sopenharmony_ci s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 7574bf215546Sopenharmony_ci s->value.u64 = shader->config.scratch_bytes_per_wave; 7575bf215546Sopenharmony_ci } 7576bf215546Sopenharmony_ci ++s; 7577bf215546Sopenharmony_ci 7578bf215546Sopenharmony_ci if (s < end) { 7579bf215546Sopenharmony_ci desc_copy(s->name, "Subgroups per SIMD"); 7580bf215546Sopenharmony_ci desc_copy(s->description, "The maximum number of subgroups in flight on a SIMD unit"); 7581bf215546Sopenharmony_ci s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 7582bf215546Sopenharmony_ci s->value.u64 = max_waves; 7583bf215546Sopenharmony_ci } 7584bf215546Sopenharmony_ci ++s; 7585bf215546Sopenharmony_ci 7586bf215546Sopenharmony_ci if (shader->statistics) { 7587bf215546Sopenharmony_ci for (unsigned i = 0; i < aco_num_statistics; i++) { 7588bf215546Sopenharmony_ci const struct aco_compiler_statistic_info *info = &aco_statistic_infos[i]; 7589bf215546Sopenharmony_ci if (s < end) { 7590bf215546Sopenharmony_ci desc_copy(s->name, info->name); 7591bf215546Sopenharmony_ci desc_copy(s->description, info->desc); 7592bf215546Sopenharmony_ci s->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 7593bf215546Sopenharmony_ci s->value.u64 = shader->statistics[i]; 7594bf215546Sopenharmony_ci } 7595bf215546Sopenharmony_ci ++s; 7596bf215546Sopenharmony_ci } 7597bf215546Sopenharmony_ci } 7598bf215546Sopenharmony_ci 7599bf215546Sopenharmony_ci if (!pStatistics) 7600bf215546Sopenharmony_ci *pStatisticCount = s - pStatistics; 7601bf215546Sopenharmony_ci else if (s > end) { 7602bf215546Sopenharmony_ci *pStatisticCount = end - pStatistics; 7603bf215546Sopenharmony_ci result = VK_INCOMPLETE; 7604bf215546Sopenharmony_ci } else { 7605bf215546Sopenharmony_ci *pStatisticCount = s - pStatistics; 7606bf215546Sopenharmony_ci } 7607bf215546Sopenharmony_ci 7608bf215546Sopenharmony_ci return result; 7609bf215546Sopenharmony_ci} 7610bf215546Sopenharmony_ci 7611bf215546Sopenharmony_cistatic VkResult 7612bf215546Sopenharmony_ciradv_copy_representation(void *data, size_t *data_size, const char *src) 7613bf215546Sopenharmony_ci{ 7614bf215546Sopenharmony_ci size_t total_size = strlen(src) + 1; 7615bf215546Sopenharmony_ci 7616bf215546Sopenharmony_ci if (!data) { 7617bf215546Sopenharmony_ci *data_size = total_size; 7618bf215546Sopenharmony_ci return VK_SUCCESS; 7619bf215546Sopenharmony_ci } 7620bf215546Sopenharmony_ci 7621bf215546Sopenharmony_ci size_t size = MIN2(total_size, *data_size); 7622bf215546Sopenharmony_ci 7623bf215546Sopenharmony_ci memcpy(data, src, size); 7624bf215546Sopenharmony_ci if (size) 7625bf215546Sopenharmony_ci *((char *)data + size - 1) = 0; 7626bf215546Sopenharmony_ci return size < total_size ? VK_INCOMPLETE : VK_SUCCESS; 7627bf215546Sopenharmony_ci} 7628bf215546Sopenharmony_ci 7629bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 7630bf215546Sopenharmony_ciradv_GetPipelineExecutableInternalRepresentationsKHR( 7631bf215546Sopenharmony_ci VkDevice _device, const VkPipelineExecutableInfoKHR *pExecutableInfo, 7632bf215546Sopenharmony_ci uint32_t *pInternalRepresentationCount, 7633bf215546Sopenharmony_ci VkPipelineExecutableInternalRepresentationKHR *pInternalRepresentations) 7634bf215546Sopenharmony_ci{ 7635bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_device, device, _device); 7636bf215546Sopenharmony_ci RADV_FROM_HANDLE(radv_pipeline, pipeline, pExecutableInfo->pipeline); 7637bf215546Sopenharmony_ci gl_shader_stage stage; 7638bf215546Sopenharmony_ci struct radv_shader *shader = 7639bf215546Sopenharmony_ci radv_get_shader_from_executable_index(pipeline, pExecutableInfo->executableIndex, &stage); 7640bf215546Sopenharmony_ci 7641bf215546Sopenharmony_ci VkPipelineExecutableInternalRepresentationKHR *p = pInternalRepresentations; 7642bf215546Sopenharmony_ci VkPipelineExecutableInternalRepresentationKHR *end = 7643bf215546Sopenharmony_ci p + (pInternalRepresentations ? *pInternalRepresentationCount : 0); 7644bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 7645bf215546Sopenharmony_ci /* optimized NIR */ 7646bf215546Sopenharmony_ci if (p < end) { 7647bf215546Sopenharmony_ci p->isText = true; 7648bf215546Sopenharmony_ci desc_copy(p->name, "NIR Shader(s)"); 7649bf215546Sopenharmony_ci desc_copy(p->description, "The optimized NIR shader(s)"); 7650bf215546Sopenharmony_ci if (radv_copy_representation(p->pData, &p->dataSize, shader->nir_string) != VK_SUCCESS) 7651bf215546Sopenharmony_ci result = VK_INCOMPLETE; 7652bf215546Sopenharmony_ci } 7653bf215546Sopenharmony_ci ++p; 7654bf215546Sopenharmony_ci 7655bf215546Sopenharmony_ci /* backend IR */ 7656bf215546Sopenharmony_ci if (p < end) { 7657bf215546Sopenharmony_ci p->isText = true; 7658bf215546Sopenharmony_ci if (radv_use_llvm_for_stage(device, stage)) { 7659bf215546Sopenharmony_ci desc_copy(p->name, "LLVM IR"); 7660bf215546Sopenharmony_ci desc_copy(p->description, "The LLVM IR after some optimizations"); 7661bf215546Sopenharmony_ci } else { 7662bf215546Sopenharmony_ci desc_copy(p->name, "ACO IR"); 7663bf215546Sopenharmony_ci desc_copy(p->description, "The ACO IR after some optimizations"); 7664bf215546Sopenharmony_ci } 7665bf215546Sopenharmony_ci if (radv_copy_representation(p->pData, &p->dataSize, shader->ir_string) != VK_SUCCESS) 7666bf215546Sopenharmony_ci result = VK_INCOMPLETE; 7667bf215546Sopenharmony_ci } 7668bf215546Sopenharmony_ci ++p; 7669bf215546Sopenharmony_ci 7670bf215546Sopenharmony_ci /* Disassembler */ 7671bf215546Sopenharmony_ci if (p < end && shader->disasm_string) { 7672bf215546Sopenharmony_ci p->isText = true; 7673bf215546Sopenharmony_ci desc_copy(p->name, "Assembly"); 7674bf215546Sopenharmony_ci desc_copy(p->description, "Final Assembly"); 7675bf215546Sopenharmony_ci if (radv_copy_representation(p->pData, &p->dataSize, shader->disasm_string) != VK_SUCCESS) 7676bf215546Sopenharmony_ci result = VK_INCOMPLETE; 7677bf215546Sopenharmony_ci } 7678bf215546Sopenharmony_ci ++p; 7679bf215546Sopenharmony_ci 7680bf215546Sopenharmony_ci if (!pInternalRepresentations) 7681bf215546Sopenharmony_ci *pInternalRepresentationCount = p - pInternalRepresentations; 7682bf215546Sopenharmony_ci else if (p > end) { 7683bf215546Sopenharmony_ci result = VK_INCOMPLETE; 7684bf215546Sopenharmony_ci *pInternalRepresentationCount = end - pInternalRepresentations; 7685bf215546Sopenharmony_ci } else { 7686bf215546Sopenharmony_ci *pInternalRepresentationCount = p - pInternalRepresentations; 7687bf215546Sopenharmony_ci } 7688bf215546Sopenharmony_ci 7689bf215546Sopenharmony_ci return result; 7690bf215546Sopenharmony_ci} 7691