1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2016 Red Hat. 3bf215546Sopenharmony_ci * Copyright © 2016 Bas Nieuwenhuizen 4bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * based in part on anv driver which is: 7bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 8bf215546Sopenharmony_ci */ 9bf215546Sopenharmony_ci 10bf215546Sopenharmony_ci#include "tu_pipeline.h" 11bf215546Sopenharmony_ci 12bf215546Sopenharmony_ci#include "common/freedreno_guardband.h" 13bf215546Sopenharmony_ci 14bf215546Sopenharmony_ci#include "ir3/ir3_nir.h" 15bf215546Sopenharmony_ci#include "main/menums.h" 16bf215546Sopenharmony_ci#include "nir/nir.h" 17bf215546Sopenharmony_ci#include "nir/nir_builder.h" 18bf215546Sopenharmony_ci#include "spirv/nir_spirv.h" 19bf215546Sopenharmony_ci#include "util/debug.h" 20bf215546Sopenharmony_ci#include "util/mesa-sha1.h" 21bf215546Sopenharmony_ci#include "vk_pipeline.h" 22bf215546Sopenharmony_ci#include "vk_render_pass.h" 23bf215546Sopenharmony_ci#include "vk_util.h" 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "tu_cmd_buffer.h" 26bf215546Sopenharmony_ci#include "tu_cs.h" 27bf215546Sopenharmony_ci#include "tu_device.h" 28bf215546Sopenharmony_ci#include "tu_formats.h" 29bf215546Sopenharmony_ci#include "tu_lrz.h" 30bf215546Sopenharmony_ci#include "tu_pass.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci/* Emit IB that preloads the descriptors that the shader uses */ 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_cistatic void 35bf215546Sopenharmony_ciemit_load_state(struct tu_cs *cs, unsigned opcode, enum a6xx_state_type st, 36bf215546Sopenharmony_ci enum a6xx_state_block sb, unsigned base, unsigned offset, 37bf215546Sopenharmony_ci unsigned count) 38bf215546Sopenharmony_ci{ 39bf215546Sopenharmony_ci /* Note: just emit one packet, even if count overflows NUM_UNIT. It's not 40bf215546Sopenharmony_ci * clear if emitting more packets will even help anything. Presumably the 41bf215546Sopenharmony_ci * descriptor cache is relatively small, and these packets stop doing 42bf215546Sopenharmony_ci * anything when there are too many descriptors. 43bf215546Sopenharmony_ci */ 44bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, opcode, 3); 45bf215546Sopenharmony_ci tu_cs_emit(cs, 46bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(st) | 47bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_BINDLESS) | 48bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(sb) | 49bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(MIN2(count, 1024-1))); 50bf215546Sopenharmony_ci tu_cs_emit_qw(cs, offset | (base << 28)); 51bf215546Sopenharmony_ci} 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_cistatic unsigned 54bf215546Sopenharmony_citu6_load_state_size(struct tu_pipeline *pipeline, 55bf215546Sopenharmony_ci struct tu_pipeline_layout *layout, bool compute) 56bf215546Sopenharmony_ci{ 57bf215546Sopenharmony_ci const unsigned load_state_size = 4; 58bf215546Sopenharmony_ci unsigned size = 0; 59bf215546Sopenharmony_ci for (unsigned i = 0; i < layout->num_sets; i++) { 60bf215546Sopenharmony_ci if (!(pipeline->active_desc_sets & (1u << i))) 61bf215546Sopenharmony_ci continue; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci struct tu_descriptor_set_layout *set_layout = layout->set[i].layout; 64bf215546Sopenharmony_ci for (unsigned j = 0; j < set_layout->binding_count; j++) { 65bf215546Sopenharmony_ci struct tu_descriptor_set_binding_layout *binding = &set_layout->binding[j]; 66bf215546Sopenharmony_ci unsigned count = 0; 67bf215546Sopenharmony_ci /* Note: some users, like amber for example, pass in 68bf215546Sopenharmony_ci * VK_SHADER_STAGE_ALL which includes a bunch of extra bits, so 69bf215546Sopenharmony_ci * filter these out by using VK_SHADER_STAGE_ALL_GRAPHICS explicitly. 70bf215546Sopenharmony_ci */ 71bf215546Sopenharmony_ci VkShaderStageFlags stages = compute ? 72bf215546Sopenharmony_ci binding->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT : 73bf215546Sopenharmony_ci binding->shader_stages & VK_SHADER_STAGE_ALL_GRAPHICS; 74bf215546Sopenharmony_ci unsigned stage_count = util_bitcount(stages); 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci if (!binding->array_size) 77bf215546Sopenharmony_ci continue; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci switch (binding->type) { 80bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 81bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 82bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 83bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 84bf215546Sopenharmony_ci /* IBO-backed resources only need one packet for all graphics stages */ 85bf215546Sopenharmony_ci if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) 86bf215546Sopenharmony_ci count += 1; 87bf215546Sopenharmony_ci if (stages & VK_SHADER_STAGE_COMPUTE_BIT) 88bf215546Sopenharmony_ci count += 1; 89bf215546Sopenharmony_ci break; 90bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_SAMPLER: 91bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 92bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 93bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 94bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 95bf215546Sopenharmony_ci /* Textures and UBO's needs a packet for each stage */ 96bf215546Sopenharmony_ci count = stage_count; 97bf215546Sopenharmony_ci break; 98bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 99bf215546Sopenharmony_ci /* Because of how we pack combined images and samplers, we 100bf215546Sopenharmony_ci * currently can't use one packet for the whole array. 101bf215546Sopenharmony_ci */ 102bf215546Sopenharmony_ci count = stage_count * binding->array_size * 2; 103bf215546Sopenharmony_ci break; 104bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: 105bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE: 106bf215546Sopenharmony_ci break; 107bf215546Sopenharmony_ci default: 108bf215546Sopenharmony_ci unreachable("bad descriptor type"); 109bf215546Sopenharmony_ci } 110bf215546Sopenharmony_ci size += count * load_state_size; 111bf215546Sopenharmony_ci } 112bf215546Sopenharmony_ci } 113bf215546Sopenharmony_ci return size; 114bf215546Sopenharmony_ci} 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_cistatic void 117bf215546Sopenharmony_citu6_emit_load_state(struct tu_pipeline *pipeline, 118bf215546Sopenharmony_ci struct tu_pipeline_layout *layout, bool compute) 119bf215546Sopenharmony_ci{ 120bf215546Sopenharmony_ci unsigned size = tu6_load_state_size(pipeline, layout, compute); 121bf215546Sopenharmony_ci if (size == 0) 122bf215546Sopenharmony_ci return; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci struct tu_cs cs; 125bf215546Sopenharmony_ci tu_cs_begin_sub_stream(&pipeline->cs, size, &cs); 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci for (unsigned i = 0; i < layout->num_sets; i++) { 128bf215546Sopenharmony_ci /* From 13.2.7. Descriptor Set Binding: 129bf215546Sopenharmony_ci * 130bf215546Sopenharmony_ci * A compatible descriptor set must be bound for all set numbers that 131bf215546Sopenharmony_ci * any shaders in a pipeline access, at the time that a draw or 132bf215546Sopenharmony_ci * dispatch command is recorded to execute using that pipeline. 133bf215546Sopenharmony_ci * However, if none of the shaders in a pipeline statically use any 134bf215546Sopenharmony_ci * bindings with a particular set number, then no descriptor set need 135bf215546Sopenharmony_ci * be bound for that set number, even if the pipeline layout includes 136bf215546Sopenharmony_ci * a non-trivial descriptor set layout for that set number. 137bf215546Sopenharmony_ci * 138bf215546Sopenharmony_ci * This means that descriptor sets unused by the pipeline may have a 139bf215546Sopenharmony_ci * garbage or 0 BINDLESS_BASE register, which will cause context faults 140bf215546Sopenharmony_ci * when prefetching descriptors from these sets. Skip prefetching for 141bf215546Sopenharmony_ci * descriptors from them to avoid this. This is also an optimization, 142bf215546Sopenharmony_ci * since these prefetches would be useless. 143bf215546Sopenharmony_ci */ 144bf215546Sopenharmony_ci if (!(pipeline->active_desc_sets & (1u << i))) 145bf215546Sopenharmony_ci continue; 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_ci struct tu_descriptor_set_layout *set_layout = layout->set[i].layout; 148bf215546Sopenharmony_ci for (unsigned j = 0; j < set_layout->binding_count; j++) { 149bf215546Sopenharmony_ci struct tu_descriptor_set_binding_layout *binding = &set_layout->binding[j]; 150bf215546Sopenharmony_ci unsigned base = i; 151bf215546Sopenharmony_ci unsigned offset = binding->offset / 4; 152bf215546Sopenharmony_ci /* Note: some users, like amber for example, pass in 153bf215546Sopenharmony_ci * VK_SHADER_STAGE_ALL which includes a bunch of extra bits, so 154bf215546Sopenharmony_ci * filter these out by using VK_SHADER_STAGE_ALL_GRAPHICS explicitly. 155bf215546Sopenharmony_ci */ 156bf215546Sopenharmony_ci VkShaderStageFlags stages = compute ? 157bf215546Sopenharmony_ci binding->shader_stages & VK_SHADER_STAGE_COMPUTE_BIT : 158bf215546Sopenharmony_ci binding->shader_stages & VK_SHADER_STAGE_ALL_GRAPHICS; 159bf215546Sopenharmony_ci unsigned count = binding->array_size; 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci /* If this is a variable-count descriptor, then the array_size is an 162bf215546Sopenharmony_ci * upper bound on the size, but we don't know how many descriptors 163bf215546Sopenharmony_ci * will actually be used. Therefore we can't pre-load them here. 164bf215546Sopenharmony_ci */ 165bf215546Sopenharmony_ci if (j == set_layout->binding_count - 1 && 166bf215546Sopenharmony_ci set_layout->has_variable_descriptors) 167bf215546Sopenharmony_ci continue; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci if (count == 0 || stages == 0) 170bf215546Sopenharmony_ci continue; 171bf215546Sopenharmony_ci switch (binding->type) { 172bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 173bf215546Sopenharmony_ci base = MAX_SETS; 174bf215546Sopenharmony_ci offset = (layout->set[i].dynamic_offset_start + 175bf215546Sopenharmony_ci binding->dynamic_offset_offset) / 4; 176bf215546Sopenharmony_ci FALLTHROUGH; 177bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 178bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 179bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: { 180bf215546Sopenharmony_ci unsigned mul = binding->size / (A6XX_TEX_CONST_DWORDS * 4); 181bf215546Sopenharmony_ci /* IBO-backed resources only need one packet for all graphics stages */ 182bf215546Sopenharmony_ci if (stages & ~VK_SHADER_STAGE_COMPUTE_BIT) { 183bf215546Sopenharmony_ci emit_load_state(&cs, CP_LOAD_STATE6, ST6_SHADER, SB6_IBO, 184bf215546Sopenharmony_ci base, offset, count * mul); 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci if (stages & VK_SHADER_STAGE_COMPUTE_BIT) { 187bf215546Sopenharmony_ci emit_load_state(&cs, CP_LOAD_STATE6_FRAG, ST6_IBO, SB6_CS_SHADER, 188bf215546Sopenharmony_ci base, offset, count * mul); 189bf215546Sopenharmony_ci } 190bf215546Sopenharmony_ci break; 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: 193bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE: 194bf215546Sopenharmony_ci /* nothing - input attachment doesn't use bindless */ 195bf215546Sopenharmony_ci break; 196bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_SAMPLER: 197bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 198bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: { 199bf215546Sopenharmony_ci tu_foreach_stage(stage, stages) { 200bf215546Sopenharmony_ci emit_load_state(&cs, tu6_stage2opcode(stage), 201bf215546Sopenharmony_ci binding->type == VK_DESCRIPTOR_TYPE_SAMPLER ? 202bf215546Sopenharmony_ci ST6_SHADER : ST6_CONSTANTS, 203bf215546Sopenharmony_ci tu6_stage2texsb(stage), base, offset, count); 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci break; 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 208bf215546Sopenharmony_ci base = MAX_SETS; 209bf215546Sopenharmony_ci offset = (layout->set[i].dynamic_offset_start + 210bf215546Sopenharmony_ci binding->dynamic_offset_offset) / 4; 211bf215546Sopenharmony_ci FALLTHROUGH; 212bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { 213bf215546Sopenharmony_ci tu_foreach_stage(stage, stages) { 214bf215546Sopenharmony_ci emit_load_state(&cs, tu6_stage2opcode(stage), ST6_UBO, 215bf215546Sopenharmony_ci tu6_stage2shadersb(stage), base, offset, count); 216bf215546Sopenharmony_ci } 217bf215546Sopenharmony_ci break; 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: { 220bf215546Sopenharmony_ci tu_foreach_stage(stage, stages) { 221bf215546Sopenharmony_ci /* TODO: We could emit less CP_LOAD_STATE6 if we used 222bf215546Sopenharmony_ci * struct-of-arrays instead of array-of-structs. 223bf215546Sopenharmony_ci */ 224bf215546Sopenharmony_ci for (unsigned i = 0; i < count; i++) { 225bf215546Sopenharmony_ci unsigned tex_offset = offset + 2 * i * A6XX_TEX_CONST_DWORDS; 226bf215546Sopenharmony_ci unsigned sam_offset = offset + (2 * i + 1) * A6XX_TEX_CONST_DWORDS; 227bf215546Sopenharmony_ci emit_load_state(&cs, tu6_stage2opcode(stage), 228bf215546Sopenharmony_ci ST6_CONSTANTS, tu6_stage2texsb(stage), 229bf215546Sopenharmony_ci base, tex_offset, 1); 230bf215546Sopenharmony_ci emit_load_state(&cs, tu6_stage2opcode(stage), 231bf215546Sopenharmony_ci ST6_SHADER, tu6_stage2texsb(stage), 232bf215546Sopenharmony_ci base, sam_offset, 1); 233bf215546Sopenharmony_ci } 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci break; 236bf215546Sopenharmony_ci } 237bf215546Sopenharmony_ci default: 238bf215546Sopenharmony_ci unreachable("bad descriptor type"); 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci } 241bf215546Sopenharmony_ci } 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci pipeline->load_state = tu_cs_end_draw_state(&pipeline->cs, &cs); 244bf215546Sopenharmony_ci} 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_cistruct tu_pipeline_builder 247bf215546Sopenharmony_ci{ 248bf215546Sopenharmony_ci struct tu_device *device; 249bf215546Sopenharmony_ci void *mem_ctx; 250bf215546Sopenharmony_ci struct vk_pipeline_cache *cache; 251bf215546Sopenharmony_ci struct tu_pipeline_layout *layout; 252bf215546Sopenharmony_ci const VkAllocationCallbacks *alloc; 253bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *create_info; 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci struct tu_compiled_shaders *shaders; 256bf215546Sopenharmony_ci struct ir3_shader_variant *binning_variant; 257bf215546Sopenharmony_ci uint64_t shader_iova[MESA_SHADER_FRAGMENT + 1]; 258bf215546Sopenharmony_ci uint64_t binning_vs_iova; 259bf215546Sopenharmony_ci 260bf215546Sopenharmony_ci uint32_t additional_cs_reserve_size; 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci struct tu_pvtmem_config pvtmem; 263bf215546Sopenharmony_ci 264bf215546Sopenharmony_ci bool rasterizer_discard; 265bf215546Sopenharmony_ci /* these states are affectd by rasterizer_discard */ 266bf215546Sopenharmony_ci bool emit_msaa_state; 267bf215546Sopenharmony_ci bool depth_clip_disable; 268bf215546Sopenharmony_ci VkSampleCountFlagBits samples; 269bf215546Sopenharmony_ci bool use_color_attachments; 270bf215546Sopenharmony_ci bool use_dual_src_blend; 271bf215546Sopenharmony_ci bool alpha_to_coverage; 272bf215546Sopenharmony_ci uint32_t color_attachment_count; 273bf215546Sopenharmony_ci VkFormat color_attachment_formats[MAX_RTS]; 274bf215546Sopenharmony_ci VkFormat depth_attachment_format; 275bf215546Sopenharmony_ci uint32_t render_components; 276bf215546Sopenharmony_ci uint32_t multiview_mask; 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_ci bool subpass_raster_order_attachment_access; 279bf215546Sopenharmony_ci bool subpass_feedback_loop_color; 280bf215546Sopenharmony_ci bool subpass_feedback_loop_ds; 281bf215546Sopenharmony_ci}; 282bf215546Sopenharmony_ci 283bf215546Sopenharmony_cistatic bool 284bf215546Sopenharmony_citu_logic_op_reads_dst(VkLogicOp op) 285bf215546Sopenharmony_ci{ 286bf215546Sopenharmony_ci switch (op) { 287bf215546Sopenharmony_ci case VK_LOGIC_OP_CLEAR: 288bf215546Sopenharmony_ci case VK_LOGIC_OP_COPY: 289bf215546Sopenharmony_ci case VK_LOGIC_OP_COPY_INVERTED: 290bf215546Sopenharmony_ci case VK_LOGIC_OP_SET: 291bf215546Sopenharmony_ci return false; 292bf215546Sopenharmony_ci default: 293bf215546Sopenharmony_ci return true; 294bf215546Sopenharmony_ci } 295bf215546Sopenharmony_ci} 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_cistatic VkBlendFactor 298bf215546Sopenharmony_citu_blend_factor_no_dst_alpha(VkBlendFactor factor) 299bf215546Sopenharmony_ci{ 300bf215546Sopenharmony_ci /* treat dst alpha as 1.0 and avoid reading it */ 301bf215546Sopenharmony_ci switch (factor) { 302bf215546Sopenharmony_ci case VK_BLEND_FACTOR_DST_ALPHA: 303bf215546Sopenharmony_ci return VK_BLEND_FACTOR_ONE; 304bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA: 305bf215546Sopenharmony_ci return VK_BLEND_FACTOR_ZERO; 306bf215546Sopenharmony_ci default: 307bf215546Sopenharmony_ci return factor; 308bf215546Sopenharmony_ci } 309bf215546Sopenharmony_ci} 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_cistatic bool tu_blend_factor_is_dual_src(VkBlendFactor factor) 312bf215546Sopenharmony_ci{ 313bf215546Sopenharmony_ci switch (factor) { 314bf215546Sopenharmony_ci case VK_BLEND_FACTOR_SRC1_COLOR: 315bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR: 316bf215546Sopenharmony_ci case VK_BLEND_FACTOR_SRC1_ALPHA: 317bf215546Sopenharmony_ci case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA: 318bf215546Sopenharmony_ci return true; 319bf215546Sopenharmony_ci default: 320bf215546Sopenharmony_ci return false; 321bf215546Sopenharmony_ci } 322bf215546Sopenharmony_ci} 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_cistatic bool 325bf215546Sopenharmony_citu_blend_state_is_dual_src(const VkPipelineColorBlendStateCreateInfo *info) 326bf215546Sopenharmony_ci{ 327bf215546Sopenharmony_ci if (!info) 328bf215546Sopenharmony_ci return false; 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci for (unsigned i = 0; i < info->attachmentCount; i++) { 331bf215546Sopenharmony_ci const VkPipelineColorBlendAttachmentState *blend = &info->pAttachments[i]; 332bf215546Sopenharmony_ci if (tu_blend_factor_is_dual_src(blend->srcColorBlendFactor) || 333bf215546Sopenharmony_ci tu_blend_factor_is_dual_src(blend->dstColorBlendFactor) || 334bf215546Sopenharmony_ci tu_blend_factor_is_dual_src(blend->srcAlphaBlendFactor) || 335bf215546Sopenharmony_ci tu_blend_factor_is_dual_src(blend->dstAlphaBlendFactor)) 336bf215546Sopenharmony_ci return true; 337bf215546Sopenharmony_ci } 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci return false; 340bf215546Sopenharmony_ci} 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_cistatic const struct xs_config { 343bf215546Sopenharmony_ci uint16_t reg_sp_xs_ctrl; 344bf215546Sopenharmony_ci uint16_t reg_sp_xs_config; 345bf215546Sopenharmony_ci uint16_t reg_sp_xs_instrlen; 346bf215546Sopenharmony_ci uint16_t reg_hlsq_xs_ctrl; 347bf215546Sopenharmony_ci uint16_t reg_sp_xs_first_exec_offset; 348bf215546Sopenharmony_ci uint16_t reg_sp_xs_pvt_mem_hw_stack_offset; 349bf215546Sopenharmony_ci} xs_config[] = { 350bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = { 351bf215546Sopenharmony_ci REG_A6XX_SP_VS_CTRL_REG0, 352bf215546Sopenharmony_ci REG_A6XX_SP_VS_CONFIG, 353bf215546Sopenharmony_ci REG_A6XX_SP_VS_INSTRLEN, 354bf215546Sopenharmony_ci REG_A6XX_HLSQ_VS_CNTL, 355bf215546Sopenharmony_ci REG_A6XX_SP_VS_OBJ_FIRST_EXEC_OFFSET, 356bf215546Sopenharmony_ci REG_A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET, 357bf215546Sopenharmony_ci }, 358bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = { 359bf215546Sopenharmony_ci REG_A6XX_SP_HS_CTRL_REG0, 360bf215546Sopenharmony_ci REG_A6XX_SP_HS_CONFIG, 361bf215546Sopenharmony_ci REG_A6XX_SP_HS_INSTRLEN, 362bf215546Sopenharmony_ci REG_A6XX_HLSQ_HS_CNTL, 363bf215546Sopenharmony_ci REG_A6XX_SP_HS_OBJ_FIRST_EXEC_OFFSET, 364bf215546Sopenharmony_ci REG_A6XX_SP_HS_PVT_MEM_HW_STACK_OFFSET, 365bf215546Sopenharmony_ci }, 366bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = { 367bf215546Sopenharmony_ci REG_A6XX_SP_DS_CTRL_REG0, 368bf215546Sopenharmony_ci REG_A6XX_SP_DS_CONFIG, 369bf215546Sopenharmony_ci REG_A6XX_SP_DS_INSTRLEN, 370bf215546Sopenharmony_ci REG_A6XX_HLSQ_DS_CNTL, 371bf215546Sopenharmony_ci REG_A6XX_SP_DS_OBJ_FIRST_EXEC_OFFSET, 372bf215546Sopenharmony_ci REG_A6XX_SP_DS_PVT_MEM_HW_STACK_OFFSET, 373bf215546Sopenharmony_ci }, 374bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = { 375bf215546Sopenharmony_ci REG_A6XX_SP_GS_CTRL_REG0, 376bf215546Sopenharmony_ci REG_A6XX_SP_GS_CONFIG, 377bf215546Sopenharmony_ci REG_A6XX_SP_GS_INSTRLEN, 378bf215546Sopenharmony_ci REG_A6XX_HLSQ_GS_CNTL, 379bf215546Sopenharmony_ci REG_A6XX_SP_GS_OBJ_FIRST_EXEC_OFFSET, 380bf215546Sopenharmony_ci REG_A6XX_SP_GS_PVT_MEM_HW_STACK_OFFSET, 381bf215546Sopenharmony_ci }, 382bf215546Sopenharmony_ci [MESA_SHADER_FRAGMENT] = { 383bf215546Sopenharmony_ci REG_A6XX_SP_FS_CTRL_REG0, 384bf215546Sopenharmony_ci REG_A6XX_SP_FS_CONFIG, 385bf215546Sopenharmony_ci REG_A6XX_SP_FS_INSTRLEN, 386bf215546Sopenharmony_ci REG_A6XX_HLSQ_FS_CNTL, 387bf215546Sopenharmony_ci REG_A6XX_SP_FS_OBJ_FIRST_EXEC_OFFSET, 388bf215546Sopenharmony_ci REG_A6XX_SP_FS_PVT_MEM_HW_STACK_OFFSET, 389bf215546Sopenharmony_ci }, 390bf215546Sopenharmony_ci [MESA_SHADER_COMPUTE] = { 391bf215546Sopenharmony_ci REG_A6XX_SP_CS_CTRL_REG0, 392bf215546Sopenharmony_ci REG_A6XX_SP_CS_CONFIG, 393bf215546Sopenharmony_ci REG_A6XX_SP_CS_INSTRLEN, 394bf215546Sopenharmony_ci REG_A6XX_HLSQ_CS_CNTL, 395bf215546Sopenharmony_ci REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET, 396bf215546Sopenharmony_ci REG_A6XX_SP_CS_PVT_MEM_HW_STACK_OFFSET, 397bf215546Sopenharmony_ci }, 398bf215546Sopenharmony_ci}; 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_cistatic uint32_t 401bf215546Sopenharmony_citu_xs_get_immediates_packet_size_dwords(const struct ir3_shader_variant *xs) 402bf215546Sopenharmony_ci{ 403bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(xs); 404bf215546Sopenharmony_ci uint32_t base = const_state->offsets.immediate; 405bf215546Sopenharmony_ci int32_t size = DIV_ROUND_UP(const_state->immediates_count, 4); 406bf215546Sopenharmony_ci 407bf215546Sopenharmony_ci /* truncate size to avoid writing constants that shader 408bf215546Sopenharmony_ci * does not use: 409bf215546Sopenharmony_ci */ 410bf215546Sopenharmony_ci size = MIN2(size + base, xs->constlen) - base; 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci return MAX2(size, 0) * 4; 413bf215546Sopenharmony_ci} 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci/* We allocate fixed-length substreams for shader state, however some 416bf215546Sopenharmony_ci * parts of the state may have unbound length. Their additional space 417bf215546Sopenharmony_ci * requirements should be calculated here. 418bf215546Sopenharmony_ci */ 419bf215546Sopenharmony_cistatic uint32_t 420bf215546Sopenharmony_citu_xs_get_additional_cs_size_dwords(const struct ir3_shader_variant *xs) 421bf215546Sopenharmony_ci{ 422bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(xs); 423bf215546Sopenharmony_ci 424bf215546Sopenharmony_ci uint32_t size = tu_xs_get_immediates_packet_size_dwords(xs); 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_ci /* Variable number of UBO upload ranges. */ 427bf215546Sopenharmony_ci size += 4 * const_state->ubo_state.num_enabled; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci /* Variable number of dwords for the primitive map */ 430bf215546Sopenharmony_ci size += xs->input_size; 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci size += xs->constant_data_size / 4; 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_ci return size; 435bf215546Sopenharmony_ci} 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_civoid 438bf215546Sopenharmony_citu6_emit_xs_config(struct tu_cs *cs, 439bf215546Sopenharmony_ci gl_shader_stage stage, /* xs->type, but xs may be NULL */ 440bf215546Sopenharmony_ci const struct ir3_shader_variant *xs) 441bf215546Sopenharmony_ci{ 442bf215546Sopenharmony_ci const struct xs_config *cfg = &xs_config[stage]; 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci if (!xs) { 445bf215546Sopenharmony_ci /* shader stage disabled */ 446bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 1); 447bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1); 450bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 451bf215546Sopenharmony_ci return; 452bf215546Sopenharmony_ci } 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_config, 1); 455bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_VS_CONFIG_ENABLED | 456bf215546Sopenharmony_ci COND(xs->bindless_tex, A6XX_SP_VS_CONFIG_BINDLESS_TEX) | 457bf215546Sopenharmony_ci COND(xs->bindless_samp, A6XX_SP_VS_CONFIG_BINDLESS_SAMP) | 458bf215546Sopenharmony_ci COND(xs->bindless_ibo, A6XX_SP_VS_CONFIG_BINDLESS_IBO) | 459bf215546Sopenharmony_ci COND(xs->bindless_ubo, A6XX_SP_VS_CONFIG_BINDLESS_UBO) | 460bf215546Sopenharmony_ci A6XX_SP_VS_CONFIG_NTEX(xs->num_samp) | 461bf215546Sopenharmony_ci A6XX_SP_VS_CONFIG_NSAMP(xs->num_samp)); 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_hlsq_xs_ctrl, 1); 464bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_HLSQ_VS_CNTL_CONSTLEN(xs->constlen) | 465bf215546Sopenharmony_ci A6XX_HLSQ_VS_CNTL_ENABLED); 466bf215546Sopenharmony_ci} 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_civoid 469bf215546Sopenharmony_citu6_emit_xs(struct tu_cs *cs, 470bf215546Sopenharmony_ci gl_shader_stage stage, /* xs->type, but xs may be NULL */ 471bf215546Sopenharmony_ci const struct ir3_shader_variant *xs, 472bf215546Sopenharmony_ci const struct tu_pvtmem_config *pvtmem, 473bf215546Sopenharmony_ci uint64_t binary_iova) 474bf215546Sopenharmony_ci{ 475bf215546Sopenharmony_ci const struct xs_config *cfg = &xs_config[stage]; 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_ci if (!xs) { 478bf215546Sopenharmony_ci /* shader stage disabled */ 479bf215546Sopenharmony_ci return; 480bf215546Sopenharmony_ci } 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci enum a6xx_threadsize thrsz = 483bf215546Sopenharmony_ci xs->info.double_threadsize ? THREAD128 : THREAD64; 484bf215546Sopenharmony_ci switch (stage) { 485bf215546Sopenharmony_ci case MESA_SHADER_VERTEX: 486bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_VS_CTRL_REG0( 487bf215546Sopenharmony_ci .fullregfootprint = xs->info.max_reg + 1, 488bf215546Sopenharmony_ci .halfregfootprint = xs->info.max_half_reg + 1, 489bf215546Sopenharmony_ci .branchstack = ir3_shader_branchstack_hw(xs), 490bf215546Sopenharmony_ci .mergedregs = xs->mergedregs, 491bf215546Sopenharmony_ci )); 492bf215546Sopenharmony_ci break; 493bf215546Sopenharmony_ci case MESA_SHADER_TESS_CTRL: 494bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_HS_CTRL_REG0( 495bf215546Sopenharmony_ci .fullregfootprint = xs->info.max_reg + 1, 496bf215546Sopenharmony_ci .halfregfootprint = xs->info.max_half_reg + 1, 497bf215546Sopenharmony_ci .branchstack = ir3_shader_branchstack_hw(xs), 498bf215546Sopenharmony_ci )); 499bf215546Sopenharmony_ci break; 500bf215546Sopenharmony_ci case MESA_SHADER_TESS_EVAL: 501bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_DS_CTRL_REG0( 502bf215546Sopenharmony_ci .fullregfootprint = xs->info.max_reg + 1, 503bf215546Sopenharmony_ci .halfregfootprint = xs->info.max_half_reg + 1, 504bf215546Sopenharmony_ci .branchstack = ir3_shader_branchstack_hw(xs), 505bf215546Sopenharmony_ci )); 506bf215546Sopenharmony_ci break; 507bf215546Sopenharmony_ci case MESA_SHADER_GEOMETRY: 508bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_GS_CTRL_REG0( 509bf215546Sopenharmony_ci .fullregfootprint = xs->info.max_reg + 1, 510bf215546Sopenharmony_ci .halfregfootprint = xs->info.max_half_reg + 1, 511bf215546Sopenharmony_ci .branchstack = ir3_shader_branchstack_hw(xs), 512bf215546Sopenharmony_ci )); 513bf215546Sopenharmony_ci break; 514bf215546Sopenharmony_ci case MESA_SHADER_FRAGMENT: 515bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_FS_CTRL_REG0( 516bf215546Sopenharmony_ci .fullregfootprint = xs->info.max_reg + 1, 517bf215546Sopenharmony_ci .halfregfootprint = xs->info.max_half_reg + 1, 518bf215546Sopenharmony_ci .branchstack = ir3_shader_branchstack_hw(xs), 519bf215546Sopenharmony_ci .mergedregs = xs->mergedregs, 520bf215546Sopenharmony_ci .threadsize = thrsz, 521bf215546Sopenharmony_ci .pixlodenable = xs->need_pixlod, 522bf215546Sopenharmony_ci .diff_fine = xs->need_fine_derivatives, 523bf215546Sopenharmony_ci .varying = xs->total_in != 0, 524bf215546Sopenharmony_ci /* unknown bit, seems unnecessary */ 525bf215546Sopenharmony_ci .unk24 = true, 526bf215546Sopenharmony_ci )); 527bf215546Sopenharmony_ci break; 528bf215546Sopenharmony_ci case MESA_SHADER_COMPUTE: 529bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_CS_CTRL_REG0( 530bf215546Sopenharmony_ci .fullregfootprint = xs->info.max_reg + 1, 531bf215546Sopenharmony_ci .halfregfootprint = xs->info.max_half_reg + 1, 532bf215546Sopenharmony_ci .branchstack = ir3_shader_branchstack_hw(xs), 533bf215546Sopenharmony_ci .mergedregs = xs->mergedregs, 534bf215546Sopenharmony_ci .threadsize = thrsz, 535bf215546Sopenharmony_ci )); 536bf215546Sopenharmony_ci break; 537bf215546Sopenharmony_ci default: 538bf215546Sopenharmony_ci unreachable("bad shader stage"); 539bf215546Sopenharmony_ci } 540bf215546Sopenharmony_ci 541bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_instrlen, 1); 542bf215546Sopenharmony_ci tu_cs_emit(cs, xs->instrlen); 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci /* emit program binary & private memory layout 545bf215546Sopenharmony_ci * binary_iova should be aligned to 1 instrlen unit (128 bytes) 546bf215546Sopenharmony_ci */ 547bf215546Sopenharmony_ci 548bf215546Sopenharmony_ci assert((binary_iova & 0x7f) == 0); 549bf215546Sopenharmony_ci assert((pvtmem->iova & 0x1f) == 0); 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_first_exec_offset, 7); 552bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 553bf215546Sopenharmony_ci tu_cs_emit_qw(cs, binary_iova); 554bf215546Sopenharmony_ci tu_cs_emit(cs, 555bf215546Sopenharmony_ci A6XX_SP_VS_PVT_MEM_PARAM_MEMSIZEPERITEM(pvtmem->per_fiber_size)); 556bf215546Sopenharmony_ci tu_cs_emit_qw(cs, pvtmem->iova); 557bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_SIZE_TOTALPVTMEMSIZE(pvtmem->per_sp_size) | 558bf215546Sopenharmony_ci COND(pvtmem->per_wave, A6XX_SP_VS_PVT_MEM_SIZE_PERWAVEMEMLAYOUT)); 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_pvt_mem_hw_stack_offset, 1); 561bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_VS_PVT_MEM_HW_STACK_OFFSET_OFFSET(pvtmem->per_sp_size)); 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci uint32_t shader_preload_size = 564bf215546Sopenharmony_ci MIN2(xs->instrlen, cs->device->physical_device->info->a6xx.instr_cache_size); 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3); 567bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | 568bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | 569bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | 570bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) | 571bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(shader_preload_size)); 572bf215546Sopenharmony_ci tu_cs_emit_qw(cs, binary_iova); 573bf215546Sopenharmony_ci 574bf215546Sopenharmony_ci /* emit immediates */ 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(xs); 577bf215546Sopenharmony_ci uint32_t base = const_state->offsets.immediate; 578bf215546Sopenharmony_ci unsigned immediate_size = tu_xs_get_immediates_packet_size_dwords(xs); 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci if (immediate_size > 0) { 581bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + immediate_size); 582bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) | 583bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 584bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 585bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) | 586bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(immediate_size / 4)); 587bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 588bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci tu_cs_emit_array(cs, const_state->immediates, immediate_size); 591bf215546Sopenharmony_ci } 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci if (const_state->constant_data_ubo != -1) { 594bf215546Sopenharmony_ci uint64_t iova = binary_iova + xs->info.constant_data_offset; 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci /* Upload UBO state for the constant data. */ 597bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 5); 598bf215546Sopenharmony_ci tu_cs_emit(cs, 599bf215546Sopenharmony_ci CP_LOAD_STATE6_0_DST_OFF(const_state->constant_data_ubo) | 600bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_UBO)| 601bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 602bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) | 603bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(1)); 604bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 605bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 606bf215546Sopenharmony_ci int size_vec4s = DIV_ROUND_UP(xs->constant_data_size, 16); 607bf215546Sopenharmony_ci tu_cs_emit_qw(cs, 608bf215546Sopenharmony_ci iova | 609bf215546Sopenharmony_ci (uint64_t)A6XX_UBO_1_SIZE(size_vec4s) << 32); 610bf215546Sopenharmony_ci 611bf215546Sopenharmony_ci /* Upload the constant data to the const file if needed. */ 612bf215546Sopenharmony_ci const struct ir3_ubo_analysis_state *ubo_state = &const_state->ubo_state; 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci for (int i = 0; i < ubo_state->num_enabled; i++) { 615bf215546Sopenharmony_ci if (ubo_state->range[i].ubo.block != const_state->constant_data_ubo || 616bf215546Sopenharmony_ci ubo_state->range[i].ubo.bindless) { 617bf215546Sopenharmony_ci continue; 618bf215546Sopenharmony_ci } 619bf215546Sopenharmony_ci 620bf215546Sopenharmony_ci uint32_t start = ubo_state->range[i].start; 621bf215546Sopenharmony_ci uint32_t end = ubo_state->range[i].end; 622bf215546Sopenharmony_ci uint32_t size = MIN2(end - start, 623bf215546Sopenharmony_ci (16 * xs->constlen) - ubo_state->range[i].offset); 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3); 626bf215546Sopenharmony_ci tu_cs_emit(cs, 627bf215546Sopenharmony_ci CP_LOAD_STATE6_0_DST_OFF(ubo_state->range[i].offset / 16) | 628bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 629bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | 630bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) | 631bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(size / 16)); 632bf215546Sopenharmony_ci tu_cs_emit_qw(cs, iova + start); 633bf215546Sopenharmony_ci } 634bf215546Sopenharmony_ci } 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci /* emit FS driver param */ 637bf215546Sopenharmony_ci if (stage == MESA_SHADER_FRAGMENT && const_state->num_driver_params > 0) { 638bf215546Sopenharmony_ci uint32_t base = const_state->offsets.driver_param; 639bf215546Sopenharmony_ci int32_t size = DIV_ROUND_UP(const_state->num_driver_params, 4); 640bf215546Sopenharmony_ci size = MAX2(MIN2(size + base, xs->constlen) - base, 0); 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci if (size > 0) { 643bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, tu6_stage2opcode(stage), 3 + size * 4); 644bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) | 645bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 646bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 647bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(tu6_stage2shadersb(stage)) | 648bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(size)); 649bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 650bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_ci assert(size == 1); 653bf215546Sopenharmony_ci tu_cs_emit(cs, xs->info.double_threadsize ? 128 : 64); 654bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 655bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 656bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 657bf215546Sopenharmony_ci } 658bf215546Sopenharmony_ci } 659bf215546Sopenharmony_ci} 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_cistatic void 662bf215546Sopenharmony_citu6_emit_shared_consts_enable(struct tu_cs *cs, bool enable) 663bf215546Sopenharmony_ci{ 664bf215546Sopenharmony_ci /* Enable/disable shared constants */ 665bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_HLSQ_SHARED_CONSTS(.enable = enable)); 666bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_MODE_CONTROL(.constant_demotion_enable = true, 667bf215546Sopenharmony_ci .isammode = ISAMMODE_GL, 668bf215546Sopenharmony_ci .shared_consts_enable = enable)); 669bf215546Sopenharmony_ci} 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_cistatic void 672bf215546Sopenharmony_citu6_emit_cs_config(struct tu_cs *cs, 673bf215546Sopenharmony_ci const struct ir3_shader_variant *v, 674bf215546Sopenharmony_ci const struct tu_pvtmem_config *pvtmem, 675bf215546Sopenharmony_ci uint64_t binary_iova) 676bf215546Sopenharmony_ci{ 677bf215546Sopenharmony_ci bool shared_consts_enable = ir3_const_state(v)->shared_consts_enable; 678bf215546Sopenharmony_ci tu6_emit_shared_consts_enable(cs, shared_consts_enable); 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( 681bf215546Sopenharmony_ci .cs_state = true, 682bf215546Sopenharmony_ci .cs_ibo = true, 683bf215546Sopenharmony_ci .cs_shared_const = shared_consts_enable)); 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci tu6_emit_xs_config(cs, MESA_SHADER_COMPUTE, v); 686bf215546Sopenharmony_ci tu6_emit_xs(cs, MESA_SHADER_COMPUTE, v, pvtmem, binary_iova); 687bf215546Sopenharmony_ci 688bf215546Sopenharmony_ci uint32_t shared_size = MAX2(((int)v->shared_size - 1) / 1024, 1); 689bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_UNKNOWN_A9B1, 1); 690bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_CS_UNKNOWN_A9B1_SHARED_SIZE(shared_size) | 691bf215546Sopenharmony_ci A6XX_SP_CS_UNKNOWN_A9B1_UNK6); 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci if (cs->device->physical_device->info->a6xx.has_lpac) { 694bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_UNKNOWN_B9D0, 1); 695bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_HLSQ_CS_UNKNOWN_B9D0_SHARED_SIZE(shared_size) | 696bf215546Sopenharmony_ci A6XX_HLSQ_CS_UNKNOWN_B9D0_UNK6); 697bf215546Sopenharmony_ci } 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci uint32_t local_invocation_id = 700bf215546Sopenharmony_ci ir3_find_sysval_regid(v, SYSTEM_VALUE_LOCAL_INVOCATION_ID); 701bf215546Sopenharmony_ci uint32_t work_group_id = 702bf215546Sopenharmony_ci ir3_find_sysval_regid(v, SYSTEM_VALUE_WORKGROUP_ID); 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci enum a6xx_threadsize thrsz = v->info.double_threadsize ? THREAD128 : THREAD64; 705bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CS_CNTL_0, 2); 706bf215546Sopenharmony_ci tu_cs_emit(cs, 707bf215546Sopenharmony_ci A6XX_HLSQ_CS_CNTL_0_WGIDCONSTID(work_group_id) | 708bf215546Sopenharmony_ci A6XX_HLSQ_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | 709bf215546Sopenharmony_ci A6XX_HLSQ_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | 710bf215546Sopenharmony_ci A6XX_HLSQ_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); 711bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_HLSQ_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | 712bf215546Sopenharmony_ci A6XX_HLSQ_CS_CNTL_1_THREADSIZE(thrsz)); 713bf215546Sopenharmony_ci 714bf215546Sopenharmony_ci if (cs->device->physical_device->info->a6xx.has_lpac) { 715bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_CS_CNTL_0, 2); 716bf215546Sopenharmony_ci tu_cs_emit(cs, 717bf215546Sopenharmony_ci A6XX_SP_CS_CNTL_0_WGIDCONSTID(work_group_id) | 718bf215546Sopenharmony_ci A6XX_SP_CS_CNTL_0_WGSIZECONSTID(regid(63, 0)) | 719bf215546Sopenharmony_ci A6XX_SP_CS_CNTL_0_WGOFFSETCONSTID(regid(63, 0)) | 720bf215546Sopenharmony_ci A6XX_SP_CS_CNTL_0_LOCALIDREGID(local_invocation_id)); 721bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_CS_CNTL_1_LINEARLOCALIDREGID(regid(63, 0)) | 722bf215546Sopenharmony_ci A6XX_SP_CS_CNTL_1_THREADSIZE(thrsz)); 723bf215546Sopenharmony_ci } 724bf215546Sopenharmony_ci} 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_cistatic void 727bf215546Sopenharmony_citu6_emit_vs_system_values(struct tu_cs *cs, 728bf215546Sopenharmony_ci const struct ir3_shader_variant *vs, 729bf215546Sopenharmony_ci const struct ir3_shader_variant *hs, 730bf215546Sopenharmony_ci const struct ir3_shader_variant *ds, 731bf215546Sopenharmony_ci const struct ir3_shader_variant *gs, 732bf215546Sopenharmony_ci bool primid_passthru) 733bf215546Sopenharmony_ci{ 734bf215546Sopenharmony_ci const uint32_t vertexid_regid = 735bf215546Sopenharmony_ci ir3_find_sysval_regid(vs, SYSTEM_VALUE_VERTEX_ID); 736bf215546Sopenharmony_ci const uint32_t instanceid_regid = 737bf215546Sopenharmony_ci ir3_find_sysval_regid(vs, SYSTEM_VALUE_INSTANCE_ID); 738bf215546Sopenharmony_ci const uint32_t tess_coord_x_regid = hs ? 739bf215546Sopenharmony_ci ir3_find_sysval_regid(ds, SYSTEM_VALUE_TESS_COORD) : 740bf215546Sopenharmony_ci regid(63, 0); 741bf215546Sopenharmony_ci const uint32_t tess_coord_y_regid = VALIDREG(tess_coord_x_regid) ? 742bf215546Sopenharmony_ci tess_coord_x_regid + 1 : 743bf215546Sopenharmony_ci regid(63, 0); 744bf215546Sopenharmony_ci const uint32_t hs_rel_patch_regid = hs ? 745bf215546Sopenharmony_ci ir3_find_sysval_regid(hs, SYSTEM_VALUE_REL_PATCH_ID_IR3) : 746bf215546Sopenharmony_ci regid(63, 0); 747bf215546Sopenharmony_ci const uint32_t ds_rel_patch_regid = hs ? 748bf215546Sopenharmony_ci ir3_find_sysval_regid(ds, SYSTEM_VALUE_REL_PATCH_ID_IR3) : 749bf215546Sopenharmony_ci regid(63, 0); 750bf215546Sopenharmony_ci const uint32_t hs_invocation_regid = hs ? 751bf215546Sopenharmony_ci ir3_find_sysval_regid(hs, SYSTEM_VALUE_TCS_HEADER_IR3) : 752bf215546Sopenharmony_ci regid(63, 0); 753bf215546Sopenharmony_ci const uint32_t gs_primitiveid_regid = gs ? 754bf215546Sopenharmony_ci ir3_find_sysval_regid(gs, SYSTEM_VALUE_PRIMITIVE_ID) : 755bf215546Sopenharmony_ci regid(63, 0); 756bf215546Sopenharmony_ci const uint32_t vs_primitiveid_regid = hs ? 757bf215546Sopenharmony_ci ir3_find_sysval_regid(hs, SYSTEM_VALUE_PRIMITIVE_ID) : 758bf215546Sopenharmony_ci gs_primitiveid_regid; 759bf215546Sopenharmony_ci const uint32_t ds_primitiveid_regid = ds ? 760bf215546Sopenharmony_ci ir3_find_sysval_regid(ds, SYSTEM_VALUE_PRIMITIVE_ID) : 761bf215546Sopenharmony_ci regid(63, 0); 762bf215546Sopenharmony_ci const uint32_t gsheader_regid = gs ? 763bf215546Sopenharmony_ci ir3_find_sysval_regid(gs, SYSTEM_VALUE_GS_HEADER_IR3) : 764bf215546Sopenharmony_ci regid(63, 0); 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci /* Note: we currently don't support multiview with tess or GS. If we did, 767bf215546Sopenharmony_ci * and the HW actually works, then we'd have to somehow share this across 768bf215546Sopenharmony_ci * stages. Note that the blob doesn't support this either. 769bf215546Sopenharmony_ci */ 770bf215546Sopenharmony_ci const uint32_t viewid_regid = 771bf215546Sopenharmony_ci ir3_find_sysval_regid(vs, SYSTEM_VALUE_VIEW_INDEX); 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_VFD_CONTROL_1, 6); 774bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_VFD_CONTROL_1_REGID4VTX(vertexid_regid) | 775bf215546Sopenharmony_ci A6XX_VFD_CONTROL_1_REGID4INST(instanceid_regid) | 776bf215546Sopenharmony_ci A6XX_VFD_CONTROL_1_REGID4PRIMID(vs_primitiveid_regid) | 777bf215546Sopenharmony_ci A6XX_VFD_CONTROL_1_REGID4VIEWID(viewid_regid)); 778bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_VFD_CONTROL_2_REGID_HSRELPATCHID(hs_rel_patch_regid) | 779bf215546Sopenharmony_ci A6XX_VFD_CONTROL_2_REGID_INVOCATIONID(hs_invocation_regid)); 780bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_VFD_CONTROL_3_REGID_DSRELPATCHID(ds_rel_patch_regid) | 781bf215546Sopenharmony_ci A6XX_VFD_CONTROL_3_REGID_TESSX(tess_coord_x_regid) | 782bf215546Sopenharmony_ci A6XX_VFD_CONTROL_3_REGID_TESSY(tess_coord_y_regid) | 783bf215546Sopenharmony_ci A6XX_VFD_CONTROL_3_REGID_DSPRIMID(ds_primitiveid_regid)); 784bf215546Sopenharmony_ci tu_cs_emit(cs, 0x000000fc); /* VFD_CONTROL_4 */ 785bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_VFD_CONTROL_5_REGID_GSHEADER(gsheader_regid) | 786bf215546Sopenharmony_ci 0xfc00); /* VFD_CONTROL_5 */ 787bf215546Sopenharmony_ci tu_cs_emit(cs, COND(primid_passthru, A6XX_VFD_CONTROL_6_PRIMID_PASSTHRU)); /* VFD_CONTROL_6 */ 788bf215546Sopenharmony_ci} 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_cistatic void 791bf215546Sopenharmony_citu6_setup_streamout(struct tu_cs *cs, 792bf215546Sopenharmony_ci const struct ir3_shader_variant *v, 793bf215546Sopenharmony_ci struct ir3_shader_linkage *l) 794bf215546Sopenharmony_ci{ 795bf215546Sopenharmony_ci const struct ir3_stream_output_info *info = &v->stream_output; 796bf215546Sopenharmony_ci /* Note: 64 here comes from the HW layout of the program RAM. The program 797bf215546Sopenharmony_ci * for stream N is at DWORD 64 * N. 798bf215546Sopenharmony_ci */ 799bf215546Sopenharmony_ci#define A6XX_SO_PROG_DWORDS 64 800bf215546Sopenharmony_ci uint32_t prog[A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS] = {}; 801bf215546Sopenharmony_ci BITSET_DECLARE(valid_dwords, A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) = {0}; 802bf215546Sopenharmony_ci 803bf215546Sopenharmony_ci /* TODO: streamout state should be in a non-GMEM draw state */ 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_ci /* no streamout: */ 806bf215546Sopenharmony_ci if (info->num_outputs == 0) { 807bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 4); 808bf215546Sopenharmony_ci tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL); 809bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 810bf215546Sopenharmony_ci tu_cs_emit(cs, REG_A6XX_VPC_SO_STREAM_CNTL); 811bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 812bf215546Sopenharmony_ci return; 813bf215546Sopenharmony_ci } 814bf215546Sopenharmony_ci 815bf215546Sopenharmony_ci for (unsigned i = 0; i < info->num_outputs; i++) { 816bf215546Sopenharmony_ci const struct ir3_stream_output *out = &info->output[i]; 817bf215546Sopenharmony_ci unsigned k = out->register_index; 818bf215546Sopenharmony_ci unsigned idx; 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci /* Skip it, if it's an output that was never assigned a register. */ 821bf215546Sopenharmony_ci if (k >= v->outputs_count || v->outputs[k].regid == INVALID_REG) 822bf215546Sopenharmony_ci continue; 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci /* linkage map sorted by order frag shader wants things, so 825bf215546Sopenharmony_ci * a bit less ideal here.. 826bf215546Sopenharmony_ci */ 827bf215546Sopenharmony_ci for (idx = 0; idx < l->cnt; idx++) 828bf215546Sopenharmony_ci if (l->var[idx].slot == v->outputs[k].slot) 829bf215546Sopenharmony_ci break; 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_ci assert(idx < l->cnt); 832bf215546Sopenharmony_ci 833bf215546Sopenharmony_ci for (unsigned j = 0; j < out->num_components; j++) { 834bf215546Sopenharmony_ci unsigned c = j + out->start_component; 835bf215546Sopenharmony_ci unsigned loc = l->var[idx].loc + c; 836bf215546Sopenharmony_ci unsigned off = j + out->dst_offset; /* in dwords */ 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci assert(loc < A6XX_SO_PROG_DWORDS * 2); 839bf215546Sopenharmony_ci unsigned dword = out->stream * A6XX_SO_PROG_DWORDS + loc/2; 840bf215546Sopenharmony_ci if (loc & 1) { 841bf215546Sopenharmony_ci prog[dword] |= A6XX_VPC_SO_PROG_B_EN | 842bf215546Sopenharmony_ci A6XX_VPC_SO_PROG_B_BUF(out->output_buffer) | 843bf215546Sopenharmony_ci A6XX_VPC_SO_PROG_B_OFF(off * 4); 844bf215546Sopenharmony_ci } else { 845bf215546Sopenharmony_ci prog[dword] |= A6XX_VPC_SO_PROG_A_EN | 846bf215546Sopenharmony_ci A6XX_VPC_SO_PROG_A_BUF(out->output_buffer) | 847bf215546Sopenharmony_ci A6XX_VPC_SO_PROG_A_OFF(off * 4); 848bf215546Sopenharmony_ci } 849bf215546Sopenharmony_ci BITSET_SET(valid_dwords, dword); 850bf215546Sopenharmony_ci } 851bf215546Sopenharmony_ci } 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci unsigned prog_count = 0; 854bf215546Sopenharmony_ci unsigned start, end; 855bf215546Sopenharmony_ci BITSET_FOREACH_RANGE(start, end, valid_dwords, 856bf215546Sopenharmony_ci A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) { 857bf215546Sopenharmony_ci prog_count += end - start + 1; 858bf215546Sopenharmony_ci } 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_CONTEXT_REG_BUNCH, 10 + 2 * prog_count); 861bf215546Sopenharmony_ci tu_cs_emit(cs, REG_A6XX_VPC_SO_STREAM_CNTL); 862bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_VPC_SO_STREAM_CNTL_STREAM_ENABLE(info->streams_written) | 863bf215546Sopenharmony_ci COND(info->stride[0] > 0, 864bf215546Sopenharmony_ci A6XX_VPC_SO_STREAM_CNTL_BUF0_STREAM(1 + info->buffer_to_stream[0])) | 865bf215546Sopenharmony_ci COND(info->stride[1] > 0, 866bf215546Sopenharmony_ci A6XX_VPC_SO_STREAM_CNTL_BUF1_STREAM(1 + info->buffer_to_stream[1])) | 867bf215546Sopenharmony_ci COND(info->stride[2] > 0, 868bf215546Sopenharmony_ci A6XX_VPC_SO_STREAM_CNTL_BUF2_STREAM(1 + info->buffer_to_stream[2])) | 869bf215546Sopenharmony_ci COND(info->stride[3] > 0, 870bf215546Sopenharmony_ci A6XX_VPC_SO_STREAM_CNTL_BUF3_STREAM(1 + info->buffer_to_stream[3]))); 871bf215546Sopenharmony_ci for (uint32_t i = 0; i < 4; i++) { 872bf215546Sopenharmony_ci tu_cs_emit(cs, REG_A6XX_VPC_SO_BUFFER_STRIDE(i)); 873bf215546Sopenharmony_ci tu_cs_emit(cs, info->stride[i]); 874bf215546Sopenharmony_ci } 875bf215546Sopenharmony_ci bool first = true; 876bf215546Sopenharmony_ci BITSET_FOREACH_RANGE(start, end, valid_dwords, 877bf215546Sopenharmony_ci A6XX_SO_PROG_DWORDS * IR3_MAX_SO_STREAMS) { 878bf215546Sopenharmony_ci tu_cs_emit(cs, REG_A6XX_VPC_SO_CNTL); 879bf215546Sopenharmony_ci tu_cs_emit(cs, COND(first, A6XX_VPC_SO_CNTL_RESET) | 880bf215546Sopenharmony_ci A6XX_VPC_SO_CNTL_ADDR(start)); 881bf215546Sopenharmony_ci for (unsigned i = start; i < end; i++) { 882bf215546Sopenharmony_ci tu_cs_emit(cs, REG_A6XX_VPC_SO_PROG); 883bf215546Sopenharmony_ci tu_cs_emit(cs, prog[i]); 884bf215546Sopenharmony_ci } 885bf215546Sopenharmony_ci first = false; 886bf215546Sopenharmony_ci } 887bf215546Sopenharmony_ci} 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_cistatic void 890bf215546Sopenharmony_citu6_emit_const(struct tu_cs *cs, uint32_t opcode, uint32_t base, 891bf215546Sopenharmony_ci enum a6xx_state_block block, uint32_t offset, 892bf215546Sopenharmony_ci uint32_t size, const uint32_t *dwords) { 893bf215546Sopenharmony_ci assert(size % 4 == 0); 894bf215546Sopenharmony_ci 895bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, opcode, 3 + size); 896bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(base) | 897bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 898bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 899bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(block) | 900bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(size / 4)); 901bf215546Sopenharmony_ci 902bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 903bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 904bf215546Sopenharmony_ci dwords = (uint32_t *)&((uint8_t *)dwords)[offset]; 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci tu_cs_emit_array(cs, dwords, size); 907bf215546Sopenharmony_ci} 908bf215546Sopenharmony_ci 909bf215546Sopenharmony_cistatic void 910bf215546Sopenharmony_citu6_emit_link_map(struct tu_cs *cs, 911bf215546Sopenharmony_ci const struct ir3_shader_variant *producer, 912bf215546Sopenharmony_ci const struct ir3_shader_variant *consumer, 913bf215546Sopenharmony_ci enum a6xx_state_block sb) 914bf215546Sopenharmony_ci{ 915bf215546Sopenharmony_ci const struct ir3_const_state *const_state = ir3_const_state(consumer); 916bf215546Sopenharmony_ci uint32_t base = const_state->offsets.primitive_map; 917bf215546Sopenharmony_ci int size = DIV_ROUND_UP(consumer->input_size, 4); 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci size = (MIN2(size + base, consumer->constlen) - base) * 4; 920bf215546Sopenharmony_ci if (size <= 0) 921bf215546Sopenharmony_ci return; 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, base, sb, 0, size, 924bf215546Sopenharmony_ci producer->output_loc); 925bf215546Sopenharmony_ci} 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_cistatic uint16_t 928bf215546Sopenharmony_ciprimitive_to_tess(enum shader_prim primitive) { 929bf215546Sopenharmony_ci switch (primitive) { 930bf215546Sopenharmony_ci case SHADER_PRIM_POINTS: 931bf215546Sopenharmony_ci return TESS_POINTS; 932bf215546Sopenharmony_ci case SHADER_PRIM_LINE_STRIP: 933bf215546Sopenharmony_ci return TESS_LINES; 934bf215546Sopenharmony_ci case SHADER_PRIM_TRIANGLE_STRIP: 935bf215546Sopenharmony_ci return TESS_CW_TRIS; 936bf215546Sopenharmony_ci default: 937bf215546Sopenharmony_ci unreachable(""); 938bf215546Sopenharmony_ci } 939bf215546Sopenharmony_ci} 940bf215546Sopenharmony_ci 941bf215546Sopenharmony_civoid 942bf215546Sopenharmony_citu6_emit_vpc(struct tu_cs *cs, 943bf215546Sopenharmony_ci const struct ir3_shader_variant *vs, 944bf215546Sopenharmony_ci const struct ir3_shader_variant *hs, 945bf215546Sopenharmony_ci const struct ir3_shader_variant *ds, 946bf215546Sopenharmony_ci const struct ir3_shader_variant *gs, 947bf215546Sopenharmony_ci const struct ir3_shader_variant *fs, 948bf215546Sopenharmony_ci uint32_t patch_control_points) 949bf215546Sopenharmony_ci{ 950bf215546Sopenharmony_ci /* note: doesn't compile as static because of the array regs.. */ 951bf215546Sopenharmony_ci const struct reg_config { 952bf215546Sopenharmony_ci uint16_t reg_sp_xs_out_reg; 953bf215546Sopenharmony_ci uint16_t reg_sp_xs_vpc_dst_reg; 954bf215546Sopenharmony_ci uint16_t reg_vpc_xs_pack; 955bf215546Sopenharmony_ci uint16_t reg_vpc_xs_clip_cntl; 956bf215546Sopenharmony_ci uint16_t reg_gras_xs_cl_cntl; 957bf215546Sopenharmony_ci uint16_t reg_pc_xs_out_cntl; 958bf215546Sopenharmony_ci uint16_t reg_sp_xs_primitive_cntl; 959bf215546Sopenharmony_ci uint16_t reg_vpc_xs_layer_cntl; 960bf215546Sopenharmony_ci uint16_t reg_gras_xs_layer_cntl; 961bf215546Sopenharmony_ci } reg_config[] = { 962bf215546Sopenharmony_ci [MESA_SHADER_VERTEX] = { 963bf215546Sopenharmony_ci REG_A6XX_SP_VS_OUT_REG(0), 964bf215546Sopenharmony_ci REG_A6XX_SP_VS_VPC_DST_REG(0), 965bf215546Sopenharmony_ci REG_A6XX_VPC_VS_PACK, 966bf215546Sopenharmony_ci REG_A6XX_VPC_VS_CLIP_CNTL, 967bf215546Sopenharmony_ci REG_A6XX_GRAS_VS_CL_CNTL, 968bf215546Sopenharmony_ci REG_A6XX_PC_VS_OUT_CNTL, 969bf215546Sopenharmony_ci REG_A6XX_SP_VS_PRIMITIVE_CNTL, 970bf215546Sopenharmony_ci REG_A6XX_VPC_VS_LAYER_CNTL, 971bf215546Sopenharmony_ci REG_A6XX_GRAS_VS_LAYER_CNTL 972bf215546Sopenharmony_ci }, 973bf215546Sopenharmony_ci [MESA_SHADER_TESS_CTRL] = { 974bf215546Sopenharmony_ci 0, 975bf215546Sopenharmony_ci 0, 976bf215546Sopenharmony_ci 0, 977bf215546Sopenharmony_ci 0, 978bf215546Sopenharmony_ci 0, 979bf215546Sopenharmony_ci REG_A6XX_PC_HS_OUT_CNTL, 980bf215546Sopenharmony_ci 0, 981bf215546Sopenharmony_ci 0, 982bf215546Sopenharmony_ci 0 983bf215546Sopenharmony_ci }, 984bf215546Sopenharmony_ci [MESA_SHADER_TESS_EVAL] = { 985bf215546Sopenharmony_ci REG_A6XX_SP_DS_OUT_REG(0), 986bf215546Sopenharmony_ci REG_A6XX_SP_DS_VPC_DST_REG(0), 987bf215546Sopenharmony_ci REG_A6XX_VPC_DS_PACK, 988bf215546Sopenharmony_ci REG_A6XX_VPC_DS_CLIP_CNTL, 989bf215546Sopenharmony_ci REG_A6XX_GRAS_DS_CL_CNTL, 990bf215546Sopenharmony_ci REG_A6XX_PC_DS_OUT_CNTL, 991bf215546Sopenharmony_ci REG_A6XX_SP_DS_PRIMITIVE_CNTL, 992bf215546Sopenharmony_ci REG_A6XX_VPC_DS_LAYER_CNTL, 993bf215546Sopenharmony_ci REG_A6XX_GRAS_DS_LAYER_CNTL 994bf215546Sopenharmony_ci }, 995bf215546Sopenharmony_ci [MESA_SHADER_GEOMETRY] = { 996bf215546Sopenharmony_ci REG_A6XX_SP_GS_OUT_REG(0), 997bf215546Sopenharmony_ci REG_A6XX_SP_GS_VPC_DST_REG(0), 998bf215546Sopenharmony_ci REG_A6XX_VPC_GS_PACK, 999bf215546Sopenharmony_ci REG_A6XX_VPC_GS_CLIP_CNTL, 1000bf215546Sopenharmony_ci REG_A6XX_GRAS_GS_CL_CNTL, 1001bf215546Sopenharmony_ci REG_A6XX_PC_GS_OUT_CNTL, 1002bf215546Sopenharmony_ci REG_A6XX_SP_GS_PRIMITIVE_CNTL, 1003bf215546Sopenharmony_ci REG_A6XX_VPC_GS_LAYER_CNTL, 1004bf215546Sopenharmony_ci REG_A6XX_GRAS_GS_LAYER_CNTL 1005bf215546Sopenharmony_ci }, 1006bf215546Sopenharmony_ci }; 1007bf215546Sopenharmony_ci 1008bf215546Sopenharmony_ci const struct ir3_shader_variant *last_shader; 1009bf215546Sopenharmony_ci if (gs) { 1010bf215546Sopenharmony_ci last_shader = gs; 1011bf215546Sopenharmony_ci } else if (hs) { 1012bf215546Sopenharmony_ci last_shader = ds; 1013bf215546Sopenharmony_ci } else { 1014bf215546Sopenharmony_ci last_shader = vs; 1015bf215546Sopenharmony_ci } 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci const struct reg_config *cfg = ®_config[last_shader->type]; 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci struct ir3_shader_linkage linkage = { 1020bf215546Sopenharmony_ci .primid_loc = 0xff, 1021bf215546Sopenharmony_ci .clip0_loc = 0xff, 1022bf215546Sopenharmony_ci .clip1_loc = 0xff, 1023bf215546Sopenharmony_ci }; 1024bf215546Sopenharmony_ci if (fs) 1025bf215546Sopenharmony_ci ir3_link_shaders(&linkage, last_shader, fs, true); 1026bf215546Sopenharmony_ci 1027bf215546Sopenharmony_ci if (last_shader->stream_output.num_outputs) 1028bf215546Sopenharmony_ci ir3_link_stream_out(&linkage, last_shader); 1029bf215546Sopenharmony_ci 1030bf215546Sopenharmony_ci /* We do this after linking shaders in order to know whether PrimID 1031bf215546Sopenharmony_ci * passthrough needs to be enabled. 1032bf215546Sopenharmony_ci */ 1033bf215546Sopenharmony_ci bool primid_passthru = linkage.primid_loc != 0xff; 1034bf215546Sopenharmony_ci tu6_emit_vs_system_values(cs, vs, hs, ds, gs, primid_passthru); 1035bf215546Sopenharmony_ci 1036bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VAR_DISABLE(0), 4); 1037bf215546Sopenharmony_ci tu_cs_emit(cs, ~linkage.varmask[0]); 1038bf215546Sopenharmony_ci tu_cs_emit(cs, ~linkage.varmask[1]); 1039bf215546Sopenharmony_ci tu_cs_emit(cs, ~linkage.varmask[2]); 1040bf215546Sopenharmony_ci tu_cs_emit(cs, ~linkage.varmask[3]); 1041bf215546Sopenharmony_ci 1042bf215546Sopenharmony_ci /* a6xx finds position/pointsize at the end */ 1043bf215546Sopenharmony_ci const uint32_t pointsize_regid = 1044bf215546Sopenharmony_ci ir3_find_output_regid(last_shader, VARYING_SLOT_PSIZ); 1045bf215546Sopenharmony_ci const uint32_t layer_regid = 1046bf215546Sopenharmony_ci ir3_find_output_regid(last_shader, VARYING_SLOT_LAYER); 1047bf215546Sopenharmony_ci const uint32_t view_regid = 1048bf215546Sopenharmony_ci ir3_find_output_regid(last_shader, VARYING_SLOT_VIEWPORT); 1049bf215546Sopenharmony_ci const uint32_t clip0_regid = 1050bf215546Sopenharmony_ci ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST0); 1051bf215546Sopenharmony_ci const uint32_t clip1_regid = 1052bf215546Sopenharmony_ci ir3_find_output_regid(last_shader, VARYING_SLOT_CLIP_DIST1); 1053bf215546Sopenharmony_ci uint32_t flags_regid = gs ? 1054bf215546Sopenharmony_ci ir3_find_output_regid(gs, VARYING_SLOT_GS_VERTEX_FLAGS_IR3) : 0; 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_ci uint32_t pointsize_loc = 0xff, position_loc = 0xff, layer_loc = 0xff, view_loc = 0xff; 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci if (layer_regid != regid(63, 0)) { 1059bf215546Sopenharmony_ci layer_loc = linkage.max_loc; 1060bf215546Sopenharmony_ci ir3_link_add(&linkage, VARYING_SLOT_LAYER, layer_regid, 0x1, linkage.max_loc); 1061bf215546Sopenharmony_ci } 1062bf215546Sopenharmony_ci 1063bf215546Sopenharmony_ci if (view_regid != regid(63, 0)) { 1064bf215546Sopenharmony_ci view_loc = linkage.max_loc; 1065bf215546Sopenharmony_ci ir3_link_add(&linkage, VARYING_SLOT_VIEWPORT, view_regid, 0x1, linkage.max_loc); 1066bf215546Sopenharmony_ci } 1067bf215546Sopenharmony_ci 1068bf215546Sopenharmony_ci unsigned extra_pos = 0; 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci for (unsigned i = 0; i < last_shader->outputs_count; i++) { 1071bf215546Sopenharmony_ci if (last_shader->outputs[i].slot != VARYING_SLOT_POS) 1072bf215546Sopenharmony_ci continue; 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci if (position_loc == 0xff) 1075bf215546Sopenharmony_ci position_loc = linkage.max_loc; 1076bf215546Sopenharmony_ci 1077bf215546Sopenharmony_ci ir3_link_add(&linkage, last_shader->outputs[i].slot, 1078bf215546Sopenharmony_ci last_shader->outputs[i].regid, 1079bf215546Sopenharmony_ci 0xf, position_loc + 4 * last_shader->outputs[i].view); 1080bf215546Sopenharmony_ci extra_pos = MAX2(extra_pos, last_shader->outputs[i].view); 1081bf215546Sopenharmony_ci } 1082bf215546Sopenharmony_ci 1083bf215546Sopenharmony_ci if (pointsize_regid != regid(63, 0)) { 1084bf215546Sopenharmony_ci pointsize_loc = linkage.max_loc; 1085bf215546Sopenharmony_ci ir3_link_add(&linkage, VARYING_SLOT_PSIZ, pointsize_regid, 0x1, linkage.max_loc); 1086bf215546Sopenharmony_ci } 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci uint8_t clip_cull_mask = last_shader->clip_mask | last_shader->cull_mask; 1089bf215546Sopenharmony_ci 1090bf215546Sopenharmony_ci /* Handle the case where clip/cull distances aren't read by the FS */ 1091bf215546Sopenharmony_ci uint32_t clip0_loc = linkage.clip0_loc, clip1_loc = linkage.clip1_loc; 1092bf215546Sopenharmony_ci if (clip0_loc == 0xff && clip0_regid != regid(63, 0)) { 1093bf215546Sopenharmony_ci clip0_loc = linkage.max_loc; 1094bf215546Sopenharmony_ci ir3_link_add(&linkage, VARYING_SLOT_CLIP_DIST0, clip0_regid, 1095bf215546Sopenharmony_ci clip_cull_mask & 0xf, linkage.max_loc); 1096bf215546Sopenharmony_ci } 1097bf215546Sopenharmony_ci if (clip1_loc == 0xff && clip1_regid != regid(63, 0)) { 1098bf215546Sopenharmony_ci clip1_loc = linkage.max_loc; 1099bf215546Sopenharmony_ci ir3_link_add(&linkage, VARYING_SLOT_CLIP_DIST1, clip1_regid, 1100bf215546Sopenharmony_ci clip_cull_mask >> 4, linkage.max_loc); 1101bf215546Sopenharmony_ci } 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci tu6_setup_streamout(cs, last_shader, &linkage); 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci /* The GPU hangs on some models when there are no outputs (xs_pack::CNT), 1106bf215546Sopenharmony_ci * at least when a DS is the last stage, so add a dummy output to keep it 1107bf215546Sopenharmony_ci * happy if there aren't any. We do this late in order to avoid emitting 1108bf215546Sopenharmony_ci * any unused code and make sure that optimizations don't remove it. 1109bf215546Sopenharmony_ci */ 1110bf215546Sopenharmony_ci if (linkage.cnt == 0) 1111bf215546Sopenharmony_ci ir3_link_add(&linkage, 0, 0, 0x1, linkage.max_loc); 1112bf215546Sopenharmony_ci 1113bf215546Sopenharmony_ci /* map outputs of the last shader to VPC */ 1114bf215546Sopenharmony_ci assert(linkage.cnt <= 32); 1115bf215546Sopenharmony_ci const uint32_t sp_out_count = DIV_ROUND_UP(linkage.cnt, 2); 1116bf215546Sopenharmony_ci const uint32_t sp_vpc_dst_count = DIV_ROUND_UP(linkage.cnt, 4); 1117bf215546Sopenharmony_ci uint32_t sp_out[16] = {0}; 1118bf215546Sopenharmony_ci uint32_t sp_vpc_dst[8] = {0}; 1119bf215546Sopenharmony_ci for (uint32_t i = 0; i < linkage.cnt; i++) { 1120bf215546Sopenharmony_ci ((uint16_t *) sp_out)[i] = 1121bf215546Sopenharmony_ci A6XX_SP_VS_OUT_REG_A_REGID(linkage.var[i].regid) | 1122bf215546Sopenharmony_ci A6XX_SP_VS_OUT_REG_A_COMPMASK(linkage.var[i].compmask); 1123bf215546Sopenharmony_ci ((uint8_t *) sp_vpc_dst)[i] = 1124bf215546Sopenharmony_ci A6XX_SP_VS_VPC_DST_REG_OUTLOC0(linkage.var[i].loc); 1125bf215546Sopenharmony_ci } 1126bf215546Sopenharmony_ci 1127bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_out_reg, sp_out_count); 1128bf215546Sopenharmony_ci tu_cs_emit_array(cs, sp_out, sp_out_count); 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_vpc_dst_reg, sp_vpc_dst_count); 1131bf215546Sopenharmony_ci tu_cs_emit_array(cs, sp_vpc_dst, sp_vpc_dst_count); 1132bf215546Sopenharmony_ci 1133bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_pack, 1); 1134bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_VPC_VS_PACK_POSITIONLOC(position_loc) | 1135bf215546Sopenharmony_ci A6XX_VPC_VS_PACK_PSIZELOC(pointsize_loc) | 1136bf215546Sopenharmony_ci A6XX_VPC_VS_PACK_STRIDE_IN_VPC(linkage.max_loc) | 1137bf215546Sopenharmony_ci A6XX_VPC_VS_PACK_EXTRAPOS(extra_pos)); 1138bf215546Sopenharmony_ci 1139bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_clip_cntl, 1); 1140bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_VPC_VS_CLIP_CNTL_CLIP_MASK(clip_cull_mask) | 1141bf215546Sopenharmony_ci A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_03_LOC(clip0_loc) | 1142bf215546Sopenharmony_ci A6XX_VPC_VS_CLIP_CNTL_CLIP_DIST_47_LOC(clip1_loc)); 1143bf215546Sopenharmony_ci 1144bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_cl_cntl, 1); 1145bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_GRAS_VS_CL_CNTL_CLIP_MASK(last_shader->clip_mask) | 1146bf215546Sopenharmony_ci A6XX_GRAS_VS_CL_CNTL_CULL_MASK(last_shader->cull_mask)); 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci const struct ir3_shader_variant *geom_shaders[] = { vs, hs, ds, gs }; 1149bf215546Sopenharmony_ci 1150bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(geom_shaders); i++) { 1151bf215546Sopenharmony_ci const struct ir3_shader_variant *shader = geom_shaders[i]; 1152bf215546Sopenharmony_ci if (!shader) 1153bf215546Sopenharmony_ci continue; 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci bool primid = shader->type != MESA_SHADER_VERTEX && 1156bf215546Sopenharmony_ci VALIDREG(ir3_find_sysval_regid(shader, SYSTEM_VALUE_PRIMITIVE_ID)); 1157bf215546Sopenharmony_ci 1158bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, reg_config[shader->type].reg_pc_xs_out_cntl, 1); 1159bf215546Sopenharmony_ci if (shader == last_shader) { 1160bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_PC_VS_OUT_CNTL_STRIDE_IN_VPC(linkage.max_loc) | 1161bf215546Sopenharmony_ci CONDREG(pointsize_regid, A6XX_PC_VS_OUT_CNTL_PSIZE) | 1162bf215546Sopenharmony_ci CONDREG(layer_regid, A6XX_PC_VS_OUT_CNTL_LAYER) | 1163bf215546Sopenharmony_ci CONDREG(view_regid, A6XX_PC_VS_OUT_CNTL_VIEW) | 1164bf215546Sopenharmony_ci COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID) | 1165bf215546Sopenharmony_ci A6XX_PC_VS_OUT_CNTL_CLIP_MASK(clip_cull_mask)); 1166bf215546Sopenharmony_ci } else { 1167bf215546Sopenharmony_ci tu_cs_emit(cs, COND(primid, A6XX_PC_VS_OUT_CNTL_PRIMITIVE_ID)); 1168bf215546Sopenharmony_ci } 1169bf215546Sopenharmony_ci } 1170bf215546Sopenharmony_ci 1171bf215546Sopenharmony_ci /* if vertex_flags somehow gets optimized out, your gonna have a bad time: */ 1172bf215546Sopenharmony_ci if (gs) 1173bf215546Sopenharmony_ci assert(flags_regid != INVALID_REG); 1174bf215546Sopenharmony_ci 1175bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_sp_xs_primitive_cntl, 1); 1176bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_VS_PRIMITIVE_CNTL_OUT(linkage.cnt) | 1177bf215546Sopenharmony_ci A6XX_SP_GS_PRIMITIVE_CNTL_FLAGS_REGID(flags_regid)); 1178bf215546Sopenharmony_ci 1179bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_vpc_xs_layer_cntl, 1); 1180bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_VPC_VS_LAYER_CNTL_LAYERLOC(layer_loc) | 1181bf215546Sopenharmony_ci A6XX_VPC_VS_LAYER_CNTL_VIEWLOC(view_loc)); 1182bf215546Sopenharmony_ci 1183bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, cfg->reg_gras_xs_layer_cntl, 1); 1184bf215546Sopenharmony_ci tu_cs_emit(cs, CONDREG(layer_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_LAYER) | 1185bf215546Sopenharmony_ci CONDREG(view_regid, A6XX_GRAS_GS_LAYER_CNTL_WRITES_VIEW)); 1186bf215546Sopenharmony_ci 1187bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_PC_PRIMID_PASSTHRU(primid_passthru)); 1188bf215546Sopenharmony_ci 1189bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_VPC_CNTL_0, 1); 1190bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_VPC_CNTL_0_NUMNONPOSVAR(fs ? fs->total_in : 0) | 1191bf215546Sopenharmony_ci COND(fs && fs->total_in, A6XX_VPC_CNTL_0_VARYING) | 1192bf215546Sopenharmony_ci A6XX_VPC_CNTL_0_PRIMIDLOC(linkage.primid_loc) | 1193bf215546Sopenharmony_ci A6XX_VPC_CNTL_0_VIEWIDLOC(linkage.viewid_loc)); 1194bf215546Sopenharmony_ci 1195bf215546Sopenharmony_ci if (hs) { 1196bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_NUM_VERTEX, 1); 1197bf215546Sopenharmony_ci tu_cs_emit(cs, hs->tess.tcs_vertices_out); 1198bf215546Sopenharmony_ci 1199bf215546Sopenharmony_ci /* Total attribute slots in HS incoming patch. */ 1200bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_PC_HS_INPUT_SIZE, 1); 1201bf215546Sopenharmony_ci tu_cs_emit(cs, patch_control_points * vs->output_size / 4); 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci const uint32_t wavesize = 64; 1204bf215546Sopenharmony_ci const uint32_t max_wave_input_size = 64; 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_ci /* note: if HS is really just the VS extended, then this 1207bf215546Sopenharmony_ci * should be by MAX2(patch_control_points, hs->tess.tcs_vertices_out) 1208bf215546Sopenharmony_ci * however that doesn't match the blob, and fails some dEQP tests. 1209bf215546Sopenharmony_ci */ 1210bf215546Sopenharmony_ci uint32_t prims_per_wave = wavesize / hs->tess.tcs_vertices_out; 1211bf215546Sopenharmony_ci uint32_t max_prims_per_wave = 1212bf215546Sopenharmony_ci max_wave_input_size * wavesize / (vs->output_size * patch_control_points); 1213bf215546Sopenharmony_ci prims_per_wave = MIN2(prims_per_wave, max_prims_per_wave); 1214bf215546Sopenharmony_ci 1215bf215546Sopenharmony_ci uint32_t total_size = vs->output_size * patch_control_points * prims_per_wave; 1216bf215546Sopenharmony_ci uint32_t wave_input_size = DIV_ROUND_UP(total_size, wavesize); 1217bf215546Sopenharmony_ci 1218bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1); 1219bf215546Sopenharmony_ci tu_cs_emit(cs, wave_input_size); 1220bf215546Sopenharmony_ci 1221bf215546Sopenharmony_ci /* In SPIR-V generated from GLSL, the tessellation primitive params are 1222bf215546Sopenharmony_ci * are specified in the tess eval shader, but in SPIR-V generated from 1223bf215546Sopenharmony_ci * HLSL, they are specified in the tess control shader. */ 1224bf215546Sopenharmony_ci const struct ir3_shader_variant *tess = 1225bf215546Sopenharmony_ci ds->tess.spacing == TESS_SPACING_UNSPECIFIED ? hs : ds; 1226bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_PC_TESS_CNTL, 1); 1227bf215546Sopenharmony_ci uint32_t output; 1228bf215546Sopenharmony_ci if (tess->tess.point_mode) 1229bf215546Sopenharmony_ci output = TESS_POINTS; 1230bf215546Sopenharmony_ci else if (tess->tess.primitive_mode == TESS_PRIMITIVE_ISOLINES) 1231bf215546Sopenharmony_ci output = TESS_LINES; 1232bf215546Sopenharmony_ci else if (tess->tess.ccw) 1233bf215546Sopenharmony_ci output = TESS_CCW_TRIS; 1234bf215546Sopenharmony_ci else 1235bf215546Sopenharmony_ci output = TESS_CW_TRIS; 1236bf215546Sopenharmony_ci 1237bf215546Sopenharmony_ci enum a6xx_tess_spacing spacing; 1238bf215546Sopenharmony_ci switch (tess->tess.spacing) { 1239bf215546Sopenharmony_ci case TESS_SPACING_EQUAL: 1240bf215546Sopenharmony_ci spacing = TESS_EQUAL; 1241bf215546Sopenharmony_ci break; 1242bf215546Sopenharmony_ci case TESS_SPACING_FRACTIONAL_ODD: 1243bf215546Sopenharmony_ci spacing = TESS_FRACTIONAL_ODD; 1244bf215546Sopenharmony_ci break; 1245bf215546Sopenharmony_ci case TESS_SPACING_FRACTIONAL_EVEN: 1246bf215546Sopenharmony_ci spacing = TESS_FRACTIONAL_EVEN; 1247bf215546Sopenharmony_ci break; 1248bf215546Sopenharmony_ci case TESS_SPACING_UNSPECIFIED: 1249bf215546Sopenharmony_ci default: 1250bf215546Sopenharmony_ci unreachable("invalid tess spacing"); 1251bf215546Sopenharmony_ci } 1252bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_PC_TESS_CNTL_SPACING(spacing) | 1253bf215546Sopenharmony_ci A6XX_PC_TESS_CNTL_OUTPUT(output)); 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_ci tu6_emit_link_map(cs, vs, hs, SB6_HS_SHADER); 1256bf215546Sopenharmony_ci tu6_emit_link_map(cs, hs, ds, SB6_DS_SHADER); 1257bf215546Sopenharmony_ci } 1258bf215546Sopenharmony_ci 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_ci if (gs) { 1261bf215546Sopenharmony_ci uint32_t vertices_out, invocations, output, vec4_size; 1262bf215546Sopenharmony_ci uint32_t prev_stage_output_size = ds ? ds->output_size : vs->output_size; 1263bf215546Sopenharmony_ci 1264bf215546Sopenharmony_ci if (hs) { 1265bf215546Sopenharmony_ci tu6_emit_link_map(cs, ds, gs, SB6_GS_SHADER); 1266bf215546Sopenharmony_ci } else { 1267bf215546Sopenharmony_ci tu6_emit_link_map(cs, vs, gs, SB6_GS_SHADER); 1268bf215546Sopenharmony_ci } 1269bf215546Sopenharmony_ci vertices_out = gs->gs.vertices_out - 1; 1270bf215546Sopenharmony_ci output = primitive_to_tess(gs->gs.output_primitive); 1271bf215546Sopenharmony_ci invocations = gs->gs.invocations - 1; 1272bf215546Sopenharmony_ci /* Size of per-primitive alloction in ldlw memory in vec4s. */ 1273bf215546Sopenharmony_ci vec4_size = gs->gs.vertices_in * 1274bf215546Sopenharmony_ci DIV_ROUND_UP(prev_stage_output_size, 4); 1275bf215546Sopenharmony_ci 1276bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_5, 1); 1277bf215546Sopenharmony_ci tu_cs_emit(cs, 1278bf215546Sopenharmony_ci A6XX_PC_PRIMITIVE_CNTL_5_GS_VERTICES_OUT(vertices_out) | 1279bf215546Sopenharmony_ci A6XX_PC_PRIMITIVE_CNTL_5_GS_OUTPUT(output) | 1280bf215546Sopenharmony_ci A6XX_PC_PRIMITIVE_CNTL_5_GS_INVOCATIONS(invocations)); 1281bf215546Sopenharmony_ci 1282bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_VPC_GS_PARAM, 1); 1283bf215546Sopenharmony_ci tu_cs_emit(cs, 0xff); 1284bf215546Sopenharmony_ci 1285bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_PC_PRIMITIVE_CNTL_6, 1); 1286bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_PC_PRIMITIVE_CNTL_6_STRIDE_IN_VPC(vec4_size)); 1287bf215546Sopenharmony_ci 1288bf215546Sopenharmony_ci uint32_t prim_size = prev_stage_output_size; 1289bf215546Sopenharmony_ci if (prim_size > 64) 1290bf215546Sopenharmony_ci prim_size = 64; 1291bf215546Sopenharmony_ci else if (prim_size == 64) 1292bf215546Sopenharmony_ci prim_size = 63; 1293bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_GS_PRIM_SIZE, 1); 1294bf215546Sopenharmony_ci tu_cs_emit(cs, prim_size); 1295bf215546Sopenharmony_ci } 1296bf215546Sopenharmony_ci} 1297bf215546Sopenharmony_ci 1298bf215546Sopenharmony_cistatic int 1299bf215546Sopenharmony_citu6_vpc_varying_mode(const struct ir3_shader_variant *fs, 1300bf215546Sopenharmony_ci uint32_t index, 1301bf215546Sopenharmony_ci uint8_t *interp_mode, 1302bf215546Sopenharmony_ci uint8_t *ps_repl_mode) 1303bf215546Sopenharmony_ci{ 1304bf215546Sopenharmony_ci enum 1305bf215546Sopenharmony_ci { 1306bf215546Sopenharmony_ci INTERP_SMOOTH = 0, 1307bf215546Sopenharmony_ci INTERP_FLAT = 1, 1308bf215546Sopenharmony_ci INTERP_ZERO = 2, 1309bf215546Sopenharmony_ci INTERP_ONE = 3, 1310bf215546Sopenharmony_ci }; 1311bf215546Sopenharmony_ci enum 1312bf215546Sopenharmony_ci { 1313bf215546Sopenharmony_ci PS_REPL_NONE = 0, 1314bf215546Sopenharmony_ci PS_REPL_S = 1, 1315bf215546Sopenharmony_ci PS_REPL_T = 2, 1316bf215546Sopenharmony_ci PS_REPL_ONE_MINUS_T = 3, 1317bf215546Sopenharmony_ci }; 1318bf215546Sopenharmony_ci 1319bf215546Sopenharmony_ci const uint32_t compmask = fs->inputs[index].compmask; 1320bf215546Sopenharmony_ci 1321bf215546Sopenharmony_ci /* NOTE: varyings are packed, so if compmask is 0xb then first, second, and 1322bf215546Sopenharmony_ci * fourth component occupy three consecutive varying slots 1323bf215546Sopenharmony_ci */ 1324bf215546Sopenharmony_ci int shift = 0; 1325bf215546Sopenharmony_ci *interp_mode = 0; 1326bf215546Sopenharmony_ci *ps_repl_mode = 0; 1327bf215546Sopenharmony_ci if (fs->inputs[index].slot == VARYING_SLOT_PNTC) { 1328bf215546Sopenharmony_ci if (compmask & 0x1) { 1329bf215546Sopenharmony_ci *ps_repl_mode |= PS_REPL_S << shift; 1330bf215546Sopenharmony_ci shift += 2; 1331bf215546Sopenharmony_ci } 1332bf215546Sopenharmony_ci if (compmask & 0x2) { 1333bf215546Sopenharmony_ci *ps_repl_mode |= PS_REPL_T << shift; 1334bf215546Sopenharmony_ci shift += 2; 1335bf215546Sopenharmony_ci } 1336bf215546Sopenharmony_ci if (compmask & 0x4) { 1337bf215546Sopenharmony_ci *interp_mode |= INTERP_ZERO << shift; 1338bf215546Sopenharmony_ci shift += 2; 1339bf215546Sopenharmony_ci } 1340bf215546Sopenharmony_ci if (compmask & 0x8) { 1341bf215546Sopenharmony_ci *interp_mode |= INTERP_ONE << 6; 1342bf215546Sopenharmony_ci shift += 2; 1343bf215546Sopenharmony_ci } 1344bf215546Sopenharmony_ci } else if (fs->inputs[index].flat) { 1345bf215546Sopenharmony_ci for (int i = 0; i < 4; i++) { 1346bf215546Sopenharmony_ci if (compmask & (1 << i)) { 1347bf215546Sopenharmony_ci *interp_mode |= INTERP_FLAT << shift; 1348bf215546Sopenharmony_ci shift += 2; 1349bf215546Sopenharmony_ci } 1350bf215546Sopenharmony_ci } 1351bf215546Sopenharmony_ci } 1352bf215546Sopenharmony_ci 1353bf215546Sopenharmony_ci return shift; 1354bf215546Sopenharmony_ci} 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_cistatic void 1357bf215546Sopenharmony_citu6_emit_vpc_varying_modes(struct tu_cs *cs, 1358bf215546Sopenharmony_ci const struct ir3_shader_variant *fs) 1359bf215546Sopenharmony_ci{ 1360bf215546Sopenharmony_ci uint32_t interp_modes[8] = { 0 }; 1361bf215546Sopenharmony_ci uint32_t ps_repl_modes[8] = { 0 }; 1362bf215546Sopenharmony_ci 1363bf215546Sopenharmony_ci if (fs) { 1364bf215546Sopenharmony_ci for (int i = -1; 1365bf215546Sopenharmony_ci (i = ir3_next_varying(fs, i)) < (int) fs->inputs_count;) { 1366bf215546Sopenharmony_ci 1367bf215546Sopenharmony_ci /* get the mode for input i */ 1368bf215546Sopenharmony_ci uint8_t interp_mode; 1369bf215546Sopenharmony_ci uint8_t ps_repl_mode; 1370bf215546Sopenharmony_ci const int bits = 1371bf215546Sopenharmony_ci tu6_vpc_varying_mode(fs, i, &interp_mode, &ps_repl_mode); 1372bf215546Sopenharmony_ci 1373bf215546Sopenharmony_ci /* OR the mode into the array */ 1374bf215546Sopenharmony_ci const uint32_t inloc = fs->inputs[i].inloc * 2; 1375bf215546Sopenharmony_ci uint32_t n = inloc / 32; 1376bf215546Sopenharmony_ci uint32_t shift = inloc % 32; 1377bf215546Sopenharmony_ci interp_modes[n] |= interp_mode << shift; 1378bf215546Sopenharmony_ci ps_repl_modes[n] |= ps_repl_mode << shift; 1379bf215546Sopenharmony_ci if (shift + bits > 32) { 1380bf215546Sopenharmony_ci n++; 1381bf215546Sopenharmony_ci shift = 32 - shift; 1382bf215546Sopenharmony_ci 1383bf215546Sopenharmony_ci interp_modes[n] |= interp_mode >> shift; 1384bf215546Sopenharmony_ci ps_repl_modes[n] |= ps_repl_mode >> shift; 1385bf215546Sopenharmony_ci } 1386bf215546Sopenharmony_ci } 1387bf215546Sopenharmony_ci } 1388bf215546Sopenharmony_ci 1389bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_INTERP_MODE(0), 8); 1390bf215546Sopenharmony_ci tu_cs_emit_array(cs, interp_modes, 8); 1391bf215546Sopenharmony_ci 1392bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_VPC_VARYING_PS_REPL_MODE(0), 8); 1393bf215546Sopenharmony_ci tu_cs_emit_array(cs, ps_repl_modes, 8); 1394bf215546Sopenharmony_ci} 1395bf215546Sopenharmony_ci 1396bf215546Sopenharmony_civoid 1397bf215546Sopenharmony_citu6_emit_fs_inputs(struct tu_cs *cs, const struct ir3_shader_variant *fs) 1398bf215546Sopenharmony_ci{ 1399bf215546Sopenharmony_ci uint32_t face_regid, coord_regid, zwcoord_regid, samp_id_regid; 1400bf215546Sopenharmony_ci uint32_t ij_regid[IJ_COUNT]; 1401bf215546Sopenharmony_ci uint32_t smask_in_regid; 1402bf215546Sopenharmony_ci 1403bf215546Sopenharmony_ci bool sample_shading = fs->per_samp | fs->key.sample_shading; 1404bf215546Sopenharmony_ci bool enable_varyings = fs->total_in > 0; 1405bf215546Sopenharmony_ci 1406bf215546Sopenharmony_ci samp_id_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_ID); 1407bf215546Sopenharmony_ci smask_in_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_SAMPLE_MASK_IN); 1408bf215546Sopenharmony_ci face_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRONT_FACE); 1409bf215546Sopenharmony_ci coord_regid = ir3_find_sysval_regid(fs, SYSTEM_VALUE_FRAG_COORD); 1410bf215546Sopenharmony_ci zwcoord_regid = VALIDREG(coord_regid) ? coord_regid + 2 : regid(63, 0); 1411bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(ij_regid); i++) 1412bf215546Sopenharmony_ci ij_regid[i] = ir3_find_sysval_regid(fs, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL + i); 1413bf215546Sopenharmony_ci 1414bf215546Sopenharmony_ci if (fs->num_sampler_prefetch > 0) { 1415bf215546Sopenharmony_ci assert(VALIDREG(ij_regid[IJ_PERSP_PIXEL])); 1416bf215546Sopenharmony_ci /* also, it seems like ij_pix is *required* to be r0.x */ 1417bf215546Sopenharmony_ci assert(ij_regid[IJ_PERSP_PIXEL] == regid(0, 0)); 1418bf215546Sopenharmony_ci } 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_PREFETCH_CNTL, 1 + fs->num_sampler_prefetch); 1421bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CNTL_COUNT(fs->num_sampler_prefetch) | 1422bf215546Sopenharmony_ci A6XX_SP_FS_PREFETCH_CNTL_UNK4(regid(63, 0)) | 1423bf215546Sopenharmony_ci 0x7000); // XXX); 1424bf215546Sopenharmony_ci for (int i = 0; i < fs->num_sampler_prefetch; i++) { 1425bf215546Sopenharmony_ci const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i]; 1426bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_FS_PREFETCH_CMD_SRC(prefetch->src) | 1427bf215546Sopenharmony_ci A6XX_SP_FS_PREFETCH_CMD_SAMP_ID(prefetch->samp_id) | 1428bf215546Sopenharmony_ci A6XX_SP_FS_PREFETCH_CMD_TEX_ID(prefetch->tex_id) | 1429bf215546Sopenharmony_ci A6XX_SP_FS_PREFETCH_CMD_DST(prefetch->dst) | 1430bf215546Sopenharmony_ci A6XX_SP_FS_PREFETCH_CMD_WRMASK(prefetch->wrmask) | 1431bf215546Sopenharmony_ci COND(prefetch->half_precision, A6XX_SP_FS_PREFETCH_CMD_HALF) | 1432bf215546Sopenharmony_ci A6XX_SP_FS_PREFETCH_CMD_CMD(prefetch->cmd)); 1433bf215546Sopenharmony_ci } 1434bf215546Sopenharmony_ci 1435bf215546Sopenharmony_ci if (fs->num_sampler_prefetch > 0) { 1436bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_BINDLESS_PREFETCH_CMD(0), fs->num_sampler_prefetch); 1437bf215546Sopenharmony_ci for (int i = 0; i < fs->num_sampler_prefetch; i++) { 1438bf215546Sopenharmony_ci const struct ir3_sampler_prefetch *prefetch = &fs->sampler_prefetch[i]; 1439bf215546Sopenharmony_ci tu_cs_emit(cs, 1440bf215546Sopenharmony_ci A6XX_SP_FS_BINDLESS_PREFETCH_CMD_SAMP_ID(prefetch->samp_bindless_id) | 1441bf215546Sopenharmony_ci A6XX_SP_FS_BINDLESS_PREFETCH_CMD_TEX_ID(prefetch->tex_bindless_id)); 1442bf215546Sopenharmony_ci } 1443bf215546Sopenharmony_ci } 1444bf215546Sopenharmony_ci 1445bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_CONTROL_1_REG, 5); 1446bf215546Sopenharmony_ci tu_cs_emit(cs, 0x7); 1447bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_HLSQ_CONTROL_2_REG_FACEREGID(face_regid) | 1448bf215546Sopenharmony_ci A6XX_HLSQ_CONTROL_2_REG_SAMPLEID(samp_id_regid) | 1449bf215546Sopenharmony_ci A6XX_HLSQ_CONTROL_2_REG_SAMPLEMASK(smask_in_regid) | 1450bf215546Sopenharmony_ci A6XX_HLSQ_CONTROL_2_REG_CENTERRHW(ij_regid[IJ_PERSP_CENTER_RHW])); 1451bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_PIXEL(ij_regid[IJ_PERSP_PIXEL]) | 1452bf215546Sopenharmony_ci A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_PIXEL(ij_regid[IJ_LINEAR_PIXEL]) | 1453bf215546Sopenharmony_ci A6XX_HLSQ_CONTROL_3_REG_IJ_PERSP_CENTROID(ij_regid[IJ_PERSP_CENTROID]) | 1454bf215546Sopenharmony_ci A6XX_HLSQ_CONTROL_3_REG_IJ_LINEAR_CENTROID(ij_regid[IJ_LINEAR_CENTROID])); 1455bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_HLSQ_CONTROL_4_REG_XYCOORDREGID(coord_regid) | 1456bf215546Sopenharmony_ci A6XX_HLSQ_CONTROL_4_REG_ZWCOORDREGID(zwcoord_regid) | 1457bf215546Sopenharmony_ci A6XX_HLSQ_CONTROL_4_REG_IJ_PERSP_SAMPLE(ij_regid[IJ_PERSP_SAMPLE]) | 1458bf215546Sopenharmony_ci A6XX_HLSQ_CONTROL_4_REG_IJ_LINEAR_SAMPLE(ij_regid[IJ_LINEAR_SAMPLE])); 1459bf215546Sopenharmony_ci tu_cs_emit(cs, 0xfcfc); 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_ci enum a6xx_threadsize thrsz = fs->info.double_threadsize ? THREAD128 : THREAD64; 1462bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_HLSQ_FS_CNTL_0, 1); 1463bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_HLSQ_FS_CNTL_0_THREADSIZE(thrsz) | 1464bf215546Sopenharmony_ci COND(enable_varyings, A6XX_HLSQ_FS_CNTL_0_VARYINGS)); 1465bf215546Sopenharmony_ci 1466bf215546Sopenharmony_ci bool need_size = fs->frag_face || fs->fragcoord_compmask != 0; 1467bf215546Sopenharmony_ci bool need_size_persamp = false; 1468bf215546Sopenharmony_ci if (VALIDREG(ij_regid[IJ_PERSP_CENTER_RHW])) { 1469bf215546Sopenharmony_ci if (sample_shading) 1470bf215546Sopenharmony_ci need_size_persamp = true; 1471bf215546Sopenharmony_ci else 1472bf215546Sopenharmony_ci need_size = true; 1473bf215546Sopenharmony_ci } 1474bf215546Sopenharmony_ci 1475bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CNTL, 1); 1476bf215546Sopenharmony_ci tu_cs_emit(cs, 1477bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_GRAS_CNTL_IJ_PERSP_PIXEL) | 1478bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_GRAS_CNTL_IJ_PERSP_CENTROID) | 1479bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_GRAS_CNTL_IJ_PERSP_SAMPLE) | 1480bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_GRAS_CNTL_IJ_LINEAR_PIXEL) | 1481bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_GRAS_CNTL_IJ_LINEAR_CENTROID) | 1482bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_GRAS_CNTL_IJ_LINEAR_SAMPLE) | 1483bf215546Sopenharmony_ci COND(need_size, A6XX_GRAS_CNTL_IJ_LINEAR_PIXEL) | 1484bf215546Sopenharmony_ci COND(need_size_persamp, A6XX_GRAS_CNTL_IJ_LINEAR_SAMPLE) | 1485bf215546Sopenharmony_ci COND(fs->fragcoord_compmask != 0, A6XX_GRAS_CNTL_COORD_MASK(fs->fragcoord_compmask))); 1486bf215546Sopenharmony_ci 1487bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_RENDER_CONTROL0, 2); 1488bf215546Sopenharmony_ci tu_cs_emit(cs, 1489bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_PIXEL], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_PIXEL) | 1490bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_CENTROID], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_CENTROID) | 1491bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_SAMPLE], A6XX_RB_RENDER_CONTROL0_IJ_PERSP_SAMPLE) | 1492bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_LINEAR_PIXEL], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_PIXEL) | 1493bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_LINEAR_CENTROID], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_CENTROID) | 1494bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_LINEAR_SAMPLE], A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_SAMPLE) | 1495bf215546Sopenharmony_ci COND(need_size, A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_PIXEL) | 1496bf215546Sopenharmony_ci COND(enable_varyings, A6XX_RB_RENDER_CONTROL0_UNK10) | 1497bf215546Sopenharmony_ci COND(need_size_persamp, A6XX_RB_RENDER_CONTROL0_IJ_LINEAR_SAMPLE) | 1498bf215546Sopenharmony_ci COND(fs->fragcoord_compmask != 0, 1499bf215546Sopenharmony_ci A6XX_RB_RENDER_CONTROL0_COORD_MASK(fs->fragcoord_compmask))); 1500bf215546Sopenharmony_ci tu_cs_emit(cs, 1501bf215546Sopenharmony_ci A6XX_RB_RENDER_CONTROL1_FRAGCOORDSAMPLEMODE( 1502bf215546Sopenharmony_ci sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER) | 1503bf215546Sopenharmony_ci CONDREG(smask_in_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEMASK) | 1504bf215546Sopenharmony_ci CONDREG(samp_id_regid, A6XX_RB_RENDER_CONTROL1_SAMPLEID) | 1505bf215546Sopenharmony_ci CONDREG(ij_regid[IJ_PERSP_CENTER_RHW], A6XX_RB_RENDER_CONTROL1_CENTERRHW) | 1506bf215546Sopenharmony_ci COND(fs->frag_face, A6XX_RB_RENDER_CONTROL1_FACENESS)); 1507bf215546Sopenharmony_ci 1508bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CNTL, 1); 1509bf215546Sopenharmony_ci tu_cs_emit(cs, COND(sample_shading, A6XX_RB_SAMPLE_CNTL_PER_SAMP_MODE)); 1510bf215546Sopenharmony_ci 1511bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_LRZ_PS_INPUT_CNTL, 1); 1512bf215546Sopenharmony_ci tu_cs_emit(cs, CONDREG(samp_id_regid, A6XX_GRAS_LRZ_PS_INPUT_CNTL_SAMPLEID) | 1513bf215546Sopenharmony_ci A6XX_GRAS_LRZ_PS_INPUT_CNTL_FRAGCOORDSAMPLEMODE( 1514bf215546Sopenharmony_ci sample_shading ? FRAGCOORD_SAMPLE : FRAGCOORD_CENTER)); 1515bf215546Sopenharmony_ci 1516bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CNTL, 1); 1517bf215546Sopenharmony_ci tu_cs_emit(cs, COND(sample_shading, A6XX_GRAS_SAMPLE_CNTL_PER_SAMP_MODE)); 1518bf215546Sopenharmony_ci} 1519bf215546Sopenharmony_ci 1520bf215546Sopenharmony_cistatic void 1521bf215546Sopenharmony_citu6_emit_fs_outputs(struct tu_cs *cs, 1522bf215546Sopenharmony_ci const struct ir3_shader_variant *fs, 1523bf215546Sopenharmony_ci uint32_t mrt_count, bool dual_src_blend, 1524bf215546Sopenharmony_ci uint32_t render_components, 1525bf215546Sopenharmony_ci bool no_earlyz, 1526bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 1527bf215546Sopenharmony_ci{ 1528bf215546Sopenharmony_ci uint32_t smask_regid, posz_regid, stencilref_regid; 1529bf215546Sopenharmony_ci 1530bf215546Sopenharmony_ci posz_regid = ir3_find_output_regid(fs, FRAG_RESULT_DEPTH); 1531bf215546Sopenharmony_ci smask_regid = ir3_find_output_regid(fs, FRAG_RESULT_SAMPLE_MASK); 1532bf215546Sopenharmony_ci stencilref_regid = ir3_find_output_regid(fs, FRAG_RESULT_STENCIL); 1533bf215546Sopenharmony_ci 1534bf215546Sopenharmony_ci uint32_t fragdata_regid[8]; 1535bf215546Sopenharmony_ci if (fs->color0_mrt) { 1536bf215546Sopenharmony_ci fragdata_regid[0] = ir3_find_output_regid(fs, FRAG_RESULT_COLOR); 1537bf215546Sopenharmony_ci for (uint32_t i = 1; i < ARRAY_SIZE(fragdata_regid); i++) 1538bf215546Sopenharmony_ci fragdata_regid[i] = fragdata_regid[0]; 1539bf215546Sopenharmony_ci } else { 1540bf215546Sopenharmony_ci for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) 1541bf215546Sopenharmony_ci fragdata_regid[i] = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + i); 1542bf215546Sopenharmony_ci } 1543bf215546Sopenharmony_ci 1544bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); 1545bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(posz_regid) | 1546bf215546Sopenharmony_ci A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(smask_regid) | 1547bf215546Sopenharmony_ci A6XX_SP_FS_OUTPUT_CNTL0_STENCILREF_REGID(stencilref_regid) | 1548bf215546Sopenharmony_ci COND(dual_src_blend, A6XX_SP_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE)); 1549bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count)); 1550bf215546Sopenharmony_ci 1551bf215546Sopenharmony_ci uint32_t fs_render_components = 0; 1552bf215546Sopenharmony_ci 1553bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 8); 1554bf215546Sopenharmony_ci for (uint32_t i = 0; i < ARRAY_SIZE(fragdata_regid); i++) { 1555bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(fragdata_regid[i]) | 1556bf215546Sopenharmony_ci (COND(fragdata_regid[i] & HALF_REG_ID, 1557bf215546Sopenharmony_ci A6XX_SP_FS_OUTPUT_REG_HALF_PRECISION))); 1558bf215546Sopenharmony_ci 1559bf215546Sopenharmony_ci if (VALIDREG(fragdata_regid[i])) { 1560bf215546Sopenharmony_ci fs_render_components |= 0xf << (i * 4); 1561bf215546Sopenharmony_ci } 1562bf215546Sopenharmony_ci } 1563bf215546Sopenharmony_ci 1564bf215546Sopenharmony_ci /* dual source blending has an extra fs output in the 2nd slot */ 1565bf215546Sopenharmony_ci if (dual_src_blend) { 1566bf215546Sopenharmony_ci fs_render_components |= 0xf << 4; 1567bf215546Sopenharmony_ci } 1568bf215546Sopenharmony_ci 1569bf215546Sopenharmony_ci /* There is no point in having component enabled which is not written 1570bf215546Sopenharmony_ci * by the shader. Per VK spec it is an UB, however a few apps depend on 1571bf215546Sopenharmony_ci * attachment not being changed if FS doesn't have corresponding output. 1572bf215546Sopenharmony_ci */ 1573bf215546Sopenharmony_ci fs_render_components &= render_components; 1574bf215546Sopenharmony_ci 1575bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 1576bf215546Sopenharmony_ci A6XX_SP_FS_RENDER_COMPONENTS(.dword = fs_render_components)); 1577bf215546Sopenharmony_ci 1578bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_FS_OUTPUT_CNTL0, 2); 1579bf215546Sopenharmony_ci tu_cs_emit(cs, COND(fs->writes_pos, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_Z) | 1580bf215546Sopenharmony_ci COND(fs->writes_smask, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_SAMPMASK) | 1581bf215546Sopenharmony_ci COND(fs->writes_stencilref, A6XX_RB_FS_OUTPUT_CNTL0_FRAG_WRITES_STENCILREF) | 1582bf215546Sopenharmony_ci COND(dual_src_blend, A6XX_RB_FS_OUTPUT_CNTL0_DUAL_COLOR_IN_ENABLE)); 1583bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_RB_FS_OUTPUT_CNTL1_MRT(mrt_count)); 1584bf215546Sopenharmony_ci 1585bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 1586bf215546Sopenharmony_ci A6XX_RB_RENDER_COMPONENTS(.dword = fs_render_components)); 1587bf215546Sopenharmony_ci 1588bf215546Sopenharmony_ci if (pipeline) { 1589bf215546Sopenharmony_ci pipeline->lrz.fs_has_kill = fs->has_kill; 1590bf215546Sopenharmony_ci pipeline->lrz.early_fragment_tests = fs->fs.early_fragment_tests; 1591bf215546Sopenharmony_ci 1592bf215546Sopenharmony_ci if (!fs->fs.early_fragment_tests && 1593bf215546Sopenharmony_ci (fs->no_earlyz || fs->has_kill || fs->writes_pos || fs->writes_stencilref || no_earlyz || fs->writes_smask)) { 1594bf215546Sopenharmony_ci pipeline->lrz.force_late_z = true; 1595bf215546Sopenharmony_ci } 1596bf215546Sopenharmony_ci } 1597bf215546Sopenharmony_ci} 1598bf215546Sopenharmony_ci 1599bf215546Sopenharmony_cistatic void 1600bf215546Sopenharmony_citu6_emit_geom_tess_consts(struct tu_cs *cs, 1601bf215546Sopenharmony_ci const struct ir3_shader_variant *vs, 1602bf215546Sopenharmony_ci const struct ir3_shader_variant *hs, 1603bf215546Sopenharmony_ci const struct ir3_shader_variant *ds, 1604bf215546Sopenharmony_ci const struct ir3_shader_variant *gs, 1605bf215546Sopenharmony_ci uint32_t cps_per_patch) 1606bf215546Sopenharmony_ci{ 1607bf215546Sopenharmony_ci struct tu_device *dev = cs->device; 1608bf215546Sopenharmony_ci 1609bf215546Sopenharmony_ci uint32_t num_vertices = 1610bf215546Sopenharmony_ci hs ? cps_per_patch : gs->gs.vertices_in; 1611bf215546Sopenharmony_ci 1612bf215546Sopenharmony_ci uint32_t vs_params[4] = { 1613bf215546Sopenharmony_ci vs->output_size * num_vertices * 4, /* vs primitive stride */ 1614bf215546Sopenharmony_ci vs->output_size * 4, /* vs vertex stride */ 1615bf215546Sopenharmony_ci 0, 1616bf215546Sopenharmony_ci 0, 1617bf215546Sopenharmony_ci }; 1618bf215546Sopenharmony_ci uint32_t vs_base = ir3_const_state(vs)->offsets.primitive_param; 1619bf215546Sopenharmony_ci tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, vs_base, SB6_VS_SHADER, 0, 1620bf215546Sopenharmony_ci ARRAY_SIZE(vs_params), vs_params); 1621bf215546Sopenharmony_ci 1622bf215546Sopenharmony_ci if (hs) { 1623bf215546Sopenharmony_ci assert(ds->type != MESA_SHADER_NONE); 1624bf215546Sopenharmony_ci 1625bf215546Sopenharmony_ci /* Create the shared tess factor BO the first time tess is used on the device. */ 1626bf215546Sopenharmony_ci mtx_lock(&dev->mutex); 1627bf215546Sopenharmony_ci if (!dev->tess_bo) 1628bf215546Sopenharmony_ci tu_bo_init_new(dev, &dev->tess_bo, TU_TESS_BO_SIZE, TU_BO_ALLOC_NO_FLAGS); 1629bf215546Sopenharmony_ci mtx_unlock(&dev->mutex); 1630bf215546Sopenharmony_ci 1631bf215546Sopenharmony_ci uint64_t tess_factor_iova = dev->tess_bo->iova; 1632bf215546Sopenharmony_ci uint64_t tess_param_iova = tess_factor_iova + TU_TESS_FACTOR_SIZE; 1633bf215546Sopenharmony_ci 1634bf215546Sopenharmony_ci uint32_t hs_params[8] = { 1635bf215546Sopenharmony_ci vs->output_size * num_vertices * 4, /* hs primitive stride */ 1636bf215546Sopenharmony_ci vs->output_size * 4, /* hs vertex stride */ 1637bf215546Sopenharmony_ci hs->output_size, 1638bf215546Sopenharmony_ci cps_per_patch, 1639bf215546Sopenharmony_ci tess_param_iova, 1640bf215546Sopenharmony_ci tess_param_iova >> 32, 1641bf215546Sopenharmony_ci tess_factor_iova, 1642bf215546Sopenharmony_ci tess_factor_iova >> 32, 1643bf215546Sopenharmony_ci }; 1644bf215546Sopenharmony_ci 1645bf215546Sopenharmony_ci uint32_t hs_base = hs->const_state->offsets.primitive_param; 1646bf215546Sopenharmony_ci uint32_t hs_param_dwords = MIN2((hs->constlen - hs_base) * 4, ARRAY_SIZE(hs_params)); 1647bf215546Sopenharmony_ci tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, hs_base, SB6_HS_SHADER, 0, 1648bf215546Sopenharmony_ci hs_param_dwords, hs_params); 1649bf215546Sopenharmony_ci if (gs) 1650bf215546Sopenharmony_ci num_vertices = gs->gs.vertices_in; 1651bf215546Sopenharmony_ci 1652bf215546Sopenharmony_ci uint32_t ds_params[8] = { 1653bf215546Sopenharmony_ci ds->output_size * num_vertices * 4, /* ds primitive stride */ 1654bf215546Sopenharmony_ci ds->output_size * 4, /* ds vertex stride */ 1655bf215546Sopenharmony_ci hs->output_size, /* hs vertex stride (dwords) */ 1656bf215546Sopenharmony_ci hs->tess.tcs_vertices_out, 1657bf215546Sopenharmony_ci tess_param_iova, 1658bf215546Sopenharmony_ci tess_param_iova >> 32, 1659bf215546Sopenharmony_ci tess_factor_iova, 1660bf215546Sopenharmony_ci tess_factor_iova >> 32, 1661bf215546Sopenharmony_ci }; 1662bf215546Sopenharmony_ci 1663bf215546Sopenharmony_ci uint32_t ds_base = ds->const_state->offsets.primitive_param; 1664bf215546Sopenharmony_ci uint32_t ds_param_dwords = MIN2((ds->constlen - ds_base) * 4, ARRAY_SIZE(ds_params)); 1665bf215546Sopenharmony_ci tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, ds_base, SB6_DS_SHADER, 0, 1666bf215546Sopenharmony_ci ds_param_dwords, ds_params); 1667bf215546Sopenharmony_ci } 1668bf215546Sopenharmony_ci 1669bf215546Sopenharmony_ci if (gs) { 1670bf215546Sopenharmony_ci const struct ir3_shader_variant *prev = ds ? ds : vs; 1671bf215546Sopenharmony_ci uint32_t gs_params[4] = { 1672bf215546Sopenharmony_ci prev->output_size * num_vertices * 4, /* gs primitive stride */ 1673bf215546Sopenharmony_ci prev->output_size * 4, /* gs vertex stride */ 1674bf215546Sopenharmony_ci 0, 1675bf215546Sopenharmony_ci 0, 1676bf215546Sopenharmony_ci }; 1677bf215546Sopenharmony_ci uint32_t gs_base = gs->const_state->offsets.primitive_param; 1678bf215546Sopenharmony_ci tu6_emit_const(cs, CP_LOAD_STATE6_GEOM, gs_base, SB6_GS_SHADER, 0, 1679bf215546Sopenharmony_ci ARRAY_SIZE(gs_params), gs_params); 1680bf215546Sopenharmony_ci } 1681bf215546Sopenharmony_ci} 1682bf215546Sopenharmony_ci 1683bf215546Sopenharmony_cistatic void 1684bf215546Sopenharmony_citu6_emit_program_config(struct tu_cs *cs, 1685bf215546Sopenharmony_ci struct tu_pipeline_builder *builder) 1686bf215546Sopenharmony_ci{ 1687bf215546Sopenharmony_ci gl_shader_stage stage = MESA_SHADER_VERTEX; 1688bf215546Sopenharmony_ci 1689bf215546Sopenharmony_ci STATIC_ASSERT(MESA_SHADER_VERTEX == 0); 1690bf215546Sopenharmony_ci 1691bf215546Sopenharmony_ci bool shared_consts_enable = tu6_shared_constants_enable(builder->layout, 1692bf215546Sopenharmony_ci builder->device->compiler); 1693bf215546Sopenharmony_ci tu6_emit_shared_consts_enable(cs, shared_consts_enable); 1694bf215546Sopenharmony_ci 1695bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( 1696bf215546Sopenharmony_ci .vs_state = true, 1697bf215546Sopenharmony_ci .hs_state = true, 1698bf215546Sopenharmony_ci .ds_state = true, 1699bf215546Sopenharmony_ci .gs_state = true, 1700bf215546Sopenharmony_ci .fs_state = true, 1701bf215546Sopenharmony_ci .gfx_ibo = true, 1702bf215546Sopenharmony_ci .gfx_shared_const = shared_consts_enable)); 1703bf215546Sopenharmony_ci for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) { 1704bf215546Sopenharmony_ci tu6_emit_xs_config(cs, stage, builder->shaders->variants[stage]); 1705bf215546Sopenharmony_ci } 1706bf215546Sopenharmony_ci} 1707bf215546Sopenharmony_ci 1708bf215546Sopenharmony_cistatic void 1709bf215546Sopenharmony_citu6_emit_program(struct tu_cs *cs, 1710bf215546Sopenharmony_ci struct tu_pipeline_builder *builder, 1711bf215546Sopenharmony_ci bool binning_pass, 1712bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 1713bf215546Sopenharmony_ci{ 1714bf215546Sopenharmony_ci const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX]; 1715bf215546Sopenharmony_ci const struct ir3_shader_variant *bs = builder->binning_variant; 1716bf215546Sopenharmony_ci const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL]; 1717bf215546Sopenharmony_ci const struct ir3_shader_variant *ds = builder->shaders->variants[MESA_SHADER_TESS_EVAL]; 1718bf215546Sopenharmony_ci const struct ir3_shader_variant *gs = builder->shaders->variants[MESA_SHADER_GEOMETRY]; 1719bf215546Sopenharmony_ci const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT]; 1720bf215546Sopenharmony_ci gl_shader_stage stage = MESA_SHADER_VERTEX; 1721bf215546Sopenharmony_ci uint32_t cps_per_patch = builder->create_info->pTessellationState ? 1722bf215546Sopenharmony_ci builder->create_info->pTessellationState->patchControlPoints : 0; 1723bf215546Sopenharmony_ci bool multi_pos_output = builder->shaders->multi_pos_output; 1724bf215546Sopenharmony_ci 1725bf215546Sopenharmony_ci /* Don't use the binning pass variant when GS is present because we don't 1726bf215546Sopenharmony_ci * support compiling correct binning pass variants with GS. 1727bf215546Sopenharmony_ci */ 1728bf215546Sopenharmony_ci if (binning_pass && !gs) { 1729bf215546Sopenharmony_ci vs = bs; 1730bf215546Sopenharmony_ci tu6_emit_xs(cs, stage, bs, &builder->pvtmem, builder->binning_vs_iova); 1731bf215546Sopenharmony_ci stage++; 1732bf215546Sopenharmony_ci } 1733bf215546Sopenharmony_ci 1734bf215546Sopenharmony_ci for (; stage < ARRAY_SIZE(builder->shader_iova); stage++) { 1735bf215546Sopenharmony_ci const struct ir3_shader_variant *xs = builder->shaders->variants[stage]; 1736bf215546Sopenharmony_ci 1737bf215546Sopenharmony_ci if (stage == MESA_SHADER_FRAGMENT && binning_pass) 1738bf215546Sopenharmony_ci fs = xs = NULL; 1739bf215546Sopenharmony_ci 1740bf215546Sopenharmony_ci tu6_emit_xs(cs, stage, xs, &builder->pvtmem, builder->shader_iova[stage]); 1741bf215546Sopenharmony_ci } 1742bf215546Sopenharmony_ci 1743bf215546Sopenharmony_ci uint32_t multiview_views = util_logbase2(builder->multiview_mask) + 1; 1744bf215546Sopenharmony_ci uint32_t multiview_cntl = builder->multiview_mask ? 1745bf215546Sopenharmony_ci A6XX_PC_MULTIVIEW_CNTL_ENABLE | 1746bf215546Sopenharmony_ci A6XX_PC_MULTIVIEW_CNTL_VIEWS(multiview_views) | 1747bf215546Sopenharmony_ci COND(!multi_pos_output, A6XX_PC_MULTIVIEW_CNTL_DISABLEMULTIPOS) 1748bf215546Sopenharmony_ci : 0; 1749bf215546Sopenharmony_ci 1750bf215546Sopenharmony_ci /* Copy what the blob does here. This will emit an extra 0x3f 1751bf215546Sopenharmony_ci * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what 1752bf215546Sopenharmony_ci * this is working around yet. 1753bf215546Sopenharmony_ci */ 1754bf215546Sopenharmony_ci if (builder->device->physical_device->info->a6xx.has_cp_reg_write) { 1755bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); 1756bf215546Sopenharmony_ci tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE)); 1757bf215546Sopenharmony_ci tu_cs_emit(cs, REG_A6XX_PC_MULTIVIEW_CNTL); 1758bf215546Sopenharmony_ci } else { 1759bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_CNTL, 1); 1760bf215546Sopenharmony_ci } 1761bf215546Sopenharmony_ci tu_cs_emit(cs, multiview_cntl); 1762bf215546Sopenharmony_ci 1763bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_VFD_MULTIVIEW_CNTL, 1); 1764bf215546Sopenharmony_ci tu_cs_emit(cs, multiview_cntl); 1765bf215546Sopenharmony_ci 1766bf215546Sopenharmony_ci if (multiview_cntl && 1767bf215546Sopenharmony_ci builder->device->physical_device->info->a6xx.supports_multiview_mask) { 1768bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_PC_MULTIVIEW_MASK, 1); 1769bf215546Sopenharmony_ci tu_cs_emit(cs, builder->multiview_mask); 1770bf215546Sopenharmony_ci } 1771bf215546Sopenharmony_ci 1772bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_HS_WAVE_INPUT_SIZE, 1); 1773bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 1774bf215546Sopenharmony_ci 1775bf215546Sopenharmony_ci tu6_emit_vpc(cs, vs, hs, ds, gs, fs, cps_per_patch); 1776bf215546Sopenharmony_ci tu6_emit_vpc_varying_modes(cs, fs); 1777bf215546Sopenharmony_ci 1778bf215546Sopenharmony_ci bool no_earlyz = builder->depth_attachment_format == VK_FORMAT_S8_UINT; 1779bf215546Sopenharmony_ci uint32_t mrt_count = builder->color_attachment_count; 1780bf215546Sopenharmony_ci uint32_t render_components = builder->render_components; 1781bf215546Sopenharmony_ci 1782bf215546Sopenharmony_ci if (builder->alpha_to_coverage) { 1783bf215546Sopenharmony_ci /* alpha to coverage can behave like a discard */ 1784bf215546Sopenharmony_ci no_earlyz = true; 1785bf215546Sopenharmony_ci /* alpha value comes from first mrt */ 1786bf215546Sopenharmony_ci render_components |= 0xf; 1787bf215546Sopenharmony_ci if (!mrt_count) { 1788bf215546Sopenharmony_ci mrt_count = 1; 1789bf215546Sopenharmony_ci /* Disable memory write for dummy mrt because it doesn't get set otherwise */ 1790bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0, .component_enable = 0)); 1791bf215546Sopenharmony_ci } 1792bf215546Sopenharmony_ci } 1793bf215546Sopenharmony_ci 1794bf215546Sopenharmony_ci if (fs) { 1795bf215546Sopenharmony_ci tu6_emit_fs_inputs(cs, fs); 1796bf215546Sopenharmony_ci tu6_emit_fs_outputs(cs, fs, mrt_count, 1797bf215546Sopenharmony_ci builder->use_dual_src_blend, 1798bf215546Sopenharmony_ci render_components, 1799bf215546Sopenharmony_ci no_earlyz, 1800bf215546Sopenharmony_ci pipeline); 1801bf215546Sopenharmony_ci } else { 1802bf215546Sopenharmony_ci /* TODO: check if these can be skipped if fs is disabled */ 1803bf215546Sopenharmony_ci struct ir3_shader_variant dummy_variant = {}; 1804bf215546Sopenharmony_ci tu6_emit_fs_inputs(cs, &dummy_variant); 1805bf215546Sopenharmony_ci tu6_emit_fs_outputs(cs, &dummy_variant, mrt_count, 1806bf215546Sopenharmony_ci builder->use_dual_src_blend, 1807bf215546Sopenharmony_ci render_components, 1808bf215546Sopenharmony_ci no_earlyz, 1809bf215546Sopenharmony_ci NULL); 1810bf215546Sopenharmony_ci } 1811bf215546Sopenharmony_ci 1812bf215546Sopenharmony_ci if (gs || hs) { 1813bf215546Sopenharmony_ci tu6_emit_geom_tess_consts(cs, vs, hs, ds, gs, cps_per_patch); 1814bf215546Sopenharmony_ci } 1815bf215546Sopenharmony_ci} 1816bf215546Sopenharmony_ci 1817bf215546Sopenharmony_ci#define TU6_EMIT_VERTEX_INPUT_MAX_DWORDS (MAX_VERTEX_ATTRIBS * 5 + 4) 1818bf215546Sopenharmony_ci 1819bf215546Sopenharmony_cistatic void 1820bf215546Sopenharmony_citu6_emit_vertex_input(struct tu_pipeline *pipeline, 1821bf215546Sopenharmony_ci struct tu_draw_state *vi_state, 1822bf215546Sopenharmony_ci const struct ir3_shader_variant *vs, 1823bf215546Sopenharmony_ci const VkPipelineVertexInputStateCreateInfo *info) 1824bf215546Sopenharmony_ci{ 1825bf215546Sopenharmony_ci uint32_t binding_instanced = 0; /* bitmask of instanced bindings */ 1826bf215546Sopenharmony_ci uint32_t step_rate[MAX_VBS]; 1827bf215546Sopenharmony_ci 1828bf215546Sopenharmony_ci struct tu_cs cs; 1829bf215546Sopenharmony_ci tu_cs_begin_sub_stream(&pipeline->cs, 1830bf215546Sopenharmony_ci TU6_EMIT_VERTEX_INPUT_MAX_DWORDS, &cs); 1831bf215546Sopenharmony_ci 1832bf215546Sopenharmony_ci for (uint32_t i = 0; i < info->vertexBindingDescriptionCount; i++) { 1833bf215546Sopenharmony_ci const VkVertexInputBindingDescription *binding = 1834bf215546Sopenharmony_ci &info->pVertexBindingDescriptions[i]; 1835bf215546Sopenharmony_ci 1836bf215546Sopenharmony_ci if (!(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_VB_STRIDE))) { 1837bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, 1838bf215546Sopenharmony_ci A6XX_VFD_FETCH_STRIDE(binding->binding, binding->stride)); 1839bf215546Sopenharmony_ci } 1840bf215546Sopenharmony_ci 1841bf215546Sopenharmony_ci if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) 1842bf215546Sopenharmony_ci binding_instanced |= 1 << binding->binding; 1843bf215546Sopenharmony_ci 1844bf215546Sopenharmony_ci step_rate[binding->binding] = 1; 1845bf215546Sopenharmony_ci } 1846bf215546Sopenharmony_ci 1847bf215546Sopenharmony_ci const VkPipelineVertexInputDivisorStateCreateInfoEXT *div_state = 1848bf215546Sopenharmony_ci vk_find_struct_const(info->pNext, PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT); 1849bf215546Sopenharmony_ci if (div_state) { 1850bf215546Sopenharmony_ci for (uint32_t i = 0; i < div_state->vertexBindingDivisorCount; i++) { 1851bf215546Sopenharmony_ci const VkVertexInputBindingDivisorDescriptionEXT *desc = 1852bf215546Sopenharmony_ci &div_state->pVertexBindingDivisors[i]; 1853bf215546Sopenharmony_ci step_rate[desc->binding] = desc->divisor; 1854bf215546Sopenharmony_ci } 1855bf215546Sopenharmony_ci } 1856bf215546Sopenharmony_ci 1857bf215546Sopenharmony_ci int32_t input_for_attr[MAX_VERTEX_ATTRIBS]; 1858bf215546Sopenharmony_ci uint32_t used_attrs_count = 0; 1859bf215546Sopenharmony_ci 1860bf215546Sopenharmony_ci for (uint32_t attr_idx = 0; attr_idx < info->vertexAttributeDescriptionCount; attr_idx++) { 1861bf215546Sopenharmony_ci input_for_attr[attr_idx] = -1; 1862bf215546Sopenharmony_ci for (uint32_t input_idx = 0; input_idx < vs->inputs_count; input_idx++) { 1863bf215546Sopenharmony_ci if ((vs->inputs[input_idx].slot - VERT_ATTRIB_GENERIC0) == 1864bf215546Sopenharmony_ci info->pVertexAttributeDescriptions[attr_idx].location) { 1865bf215546Sopenharmony_ci input_for_attr[attr_idx] = input_idx; 1866bf215546Sopenharmony_ci used_attrs_count++; 1867bf215546Sopenharmony_ci break; 1868bf215546Sopenharmony_ci } 1869bf215546Sopenharmony_ci } 1870bf215546Sopenharmony_ci } 1871bf215546Sopenharmony_ci 1872bf215546Sopenharmony_ci if (used_attrs_count) 1873bf215546Sopenharmony_ci tu_cs_emit_pkt4(&cs, REG_A6XX_VFD_DECODE_INSTR(0), used_attrs_count * 2); 1874bf215546Sopenharmony_ci 1875bf215546Sopenharmony_ci for (uint32_t attr_idx = 0; attr_idx < info->vertexAttributeDescriptionCount; attr_idx++) { 1876bf215546Sopenharmony_ci const VkVertexInputAttributeDescription *attr = 1877bf215546Sopenharmony_ci &info->pVertexAttributeDescriptions[attr_idx]; 1878bf215546Sopenharmony_ci 1879bf215546Sopenharmony_ci if (input_for_attr[attr_idx] == -1) 1880bf215546Sopenharmony_ci continue; 1881bf215546Sopenharmony_ci 1882bf215546Sopenharmony_ci const struct tu_native_format format = tu6_format_vtx(attr->format); 1883bf215546Sopenharmony_ci tu_cs_emit(&cs, A6XX_VFD_DECODE_INSTR(0, 1884bf215546Sopenharmony_ci .idx = attr->binding, 1885bf215546Sopenharmony_ci .offset = attr->offset, 1886bf215546Sopenharmony_ci .instanced = binding_instanced & (1 << attr->binding), 1887bf215546Sopenharmony_ci .format = format.fmt, 1888bf215546Sopenharmony_ci .swap = format.swap, 1889bf215546Sopenharmony_ci .unk30 = 1, 1890bf215546Sopenharmony_ci ._float = !vk_format_is_int(attr->format)).value); 1891bf215546Sopenharmony_ci tu_cs_emit(&cs, A6XX_VFD_DECODE_STEP_RATE(0, step_rate[attr->binding]).value); 1892bf215546Sopenharmony_ci } 1893bf215546Sopenharmony_ci 1894bf215546Sopenharmony_ci if (used_attrs_count) 1895bf215546Sopenharmony_ci tu_cs_emit_pkt4(&cs, REG_A6XX_VFD_DEST_CNTL_INSTR(0), used_attrs_count); 1896bf215546Sopenharmony_ci 1897bf215546Sopenharmony_ci for (uint32_t attr_idx = 0; attr_idx < info->vertexAttributeDescriptionCount; attr_idx++) { 1898bf215546Sopenharmony_ci int32_t input_idx = input_for_attr[attr_idx]; 1899bf215546Sopenharmony_ci if (input_idx == -1) 1900bf215546Sopenharmony_ci continue; 1901bf215546Sopenharmony_ci 1902bf215546Sopenharmony_ci tu_cs_emit(&cs, A6XX_VFD_DEST_CNTL_INSTR(0, 1903bf215546Sopenharmony_ci .writemask = vs->inputs[input_idx].compmask, 1904bf215546Sopenharmony_ci .regid = vs->inputs[input_idx].regid).value); 1905bf215546Sopenharmony_ci } 1906bf215546Sopenharmony_ci 1907bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, 1908bf215546Sopenharmony_ci A6XX_VFD_CONTROL_0( 1909bf215546Sopenharmony_ci .fetch_cnt = used_attrs_count, /* decode_cnt for binning pass ? */ 1910bf215546Sopenharmony_ci .decode_cnt = used_attrs_count)); 1911bf215546Sopenharmony_ci 1912bf215546Sopenharmony_ci *vi_state = tu_cs_end_draw_state(&pipeline->cs, &cs); 1913bf215546Sopenharmony_ci} 1914bf215546Sopenharmony_ci 1915bf215546Sopenharmony_civoid 1916bf215546Sopenharmony_citu6_emit_viewport(struct tu_cs *cs, const VkViewport *viewports, uint32_t num_viewport, 1917bf215546Sopenharmony_ci bool z_negative_one_to_one) 1918bf215546Sopenharmony_ci{ 1919bf215546Sopenharmony_ci VkExtent2D guardband = {511, 511}; 1920bf215546Sopenharmony_ci 1921bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_VPORT_XOFFSET(0), num_viewport * 6); 1922bf215546Sopenharmony_ci for (uint32_t i = 0; i < num_viewport; i++) { 1923bf215546Sopenharmony_ci const VkViewport *viewport = &viewports[i]; 1924bf215546Sopenharmony_ci float offsets[3]; 1925bf215546Sopenharmony_ci float scales[3]; 1926bf215546Sopenharmony_ci scales[0] = viewport->width / 2.0f; 1927bf215546Sopenharmony_ci scales[1] = viewport->height / 2.0f; 1928bf215546Sopenharmony_ci if (z_negative_one_to_one) { 1929bf215546Sopenharmony_ci scales[2] = 0.5 * (viewport->maxDepth - viewport->minDepth); 1930bf215546Sopenharmony_ci } else { 1931bf215546Sopenharmony_ci scales[2] = viewport->maxDepth - viewport->minDepth; 1932bf215546Sopenharmony_ci } 1933bf215546Sopenharmony_ci 1934bf215546Sopenharmony_ci offsets[0] = viewport->x + scales[0]; 1935bf215546Sopenharmony_ci offsets[1] = viewport->y + scales[1]; 1936bf215546Sopenharmony_ci if (z_negative_one_to_one) { 1937bf215546Sopenharmony_ci offsets[2] = 0.5 * (viewport->minDepth + viewport->maxDepth); 1938bf215546Sopenharmony_ci } else { 1939bf215546Sopenharmony_ci offsets[2] = viewport->minDepth; 1940bf215546Sopenharmony_ci } 1941bf215546Sopenharmony_ci 1942bf215546Sopenharmony_ci for (uint32_t j = 0; j < 3; j++) { 1943bf215546Sopenharmony_ci tu_cs_emit(cs, fui(offsets[j])); 1944bf215546Sopenharmony_ci tu_cs_emit(cs, fui(scales[j])); 1945bf215546Sopenharmony_ci } 1946bf215546Sopenharmony_ci 1947bf215546Sopenharmony_ci guardband.width = 1948bf215546Sopenharmony_ci MIN2(guardband.width, fd_calc_guardband(offsets[0], scales[0], false)); 1949bf215546Sopenharmony_ci guardband.height = 1950bf215546Sopenharmony_ci MIN2(guardband.height, fd_calc_guardband(offsets[1], scales[1], false)); 1951bf215546Sopenharmony_ci } 1952bf215546Sopenharmony_ci 1953bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0), num_viewport * 2); 1954bf215546Sopenharmony_ci for (uint32_t i = 0; i < num_viewport; i++) { 1955bf215546Sopenharmony_ci const VkViewport *viewport = &viewports[i]; 1956bf215546Sopenharmony_ci VkOffset2D min; 1957bf215546Sopenharmony_ci VkOffset2D max; 1958bf215546Sopenharmony_ci min.x = (int32_t) viewport->x; 1959bf215546Sopenharmony_ci max.x = (int32_t) ceilf(viewport->x + viewport->width); 1960bf215546Sopenharmony_ci if (viewport->height >= 0.0f) { 1961bf215546Sopenharmony_ci min.y = (int32_t) viewport->y; 1962bf215546Sopenharmony_ci max.y = (int32_t) ceilf(viewport->y + viewport->height); 1963bf215546Sopenharmony_ci } else { 1964bf215546Sopenharmony_ci min.y = (int32_t)(viewport->y + viewport->height); 1965bf215546Sopenharmony_ci max.y = (int32_t) ceilf(viewport->y); 1966bf215546Sopenharmony_ci } 1967bf215546Sopenharmony_ci /* the spec allows viewport->height to be 0.0f */ 1968bf215546Sopenharmony_ci if (min.y == max.y) 1969bf215546Sopenharmony_ci max.y++; 1970bf215546Sopenharmony_ci /* allow viewport->width = 0.0f for un-initialized viewports: */ 1971bf215546Sopenharmony_ci if (min.x == max.x) 1972bf215546Sopenharmony_ci max.x++; 1973bf215546Sopenharmony_ci 1974bf215546Sopenharmony_ci min.x = MAX2(min.x, 0); 1975bf215546Sopenharmony_ci min.y = MAX2(min.y, 0); 1976bf215546Sopenharmony_ci max.x = MAX2(max.x, 1); 1977bf215546Sopenharmony_ci max.y = MAX2(max.y, 1); 1978bf215546Sopenharmony_ci 1979bf215546Sopenharmony_ci assert(min.x < max.x); 1980bf215546Sopenharmony_ci assert(min.y < max.y); 1981bf215546Sopenharmony_ci 1982bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_X(min.x) | 1983bf215546Sopenharmony_ci A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL_Y(min.y)); 1984bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_X(max.x - 1) | 1985bf215546Sopenharmony_ci A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR_Y(max.y - 1)); 1986bf215546Sopenharmony_ci } 1987bf215546Sopenharmony_ci 1988bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_Z_CLAMP(0), num_viewport * 2); 1989bf215546Sopenharmony_ci for (uint32_t i = 0; i < num_viewport; i++) { 1990bf215546Sopenharmony_ci const VkViewport *viewport = &viewports[i]; 1991bf215546Sopenharmony_ci tu_cs_emit(cs, fui(MIN2(viewport->minDepth, viewport->maxDepth))); 1992bf215546Sopenharmony_ci tu_cs_emit(cs, fui(MAX2(viewport->minDepth, viewport->maxDepth))); 1993bf215546Sopenharmony_ci } 1994bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ, 1); 1995bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_HORZ(guardband.width) | 1996bf215546Sopenharmony_ci A6XX_GRAS_CL_GUARDBAND_CLIP_ADJ_VERT(guardband.height)); 1997bf215546Sopenharmony_ci 1998bf215546Sopenharmony_ci /* TODO: what to do about this and multi viewport ? */ 1999bf215546Sopenharmony_ci float z_clamp_min = num_viewport ? MIN2(viewports[0].minDepth, viewports[0].maxDepth) : 0; 2000bf215546Sopenharmony_ci float z_clamp_max = num_viewport ? MAX2(viewports[0].minDepth, viewports[0].maxDepth) : 0; 2001bf215546Sopenharmony_ci 2002bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 2003bf215546Sopenharmony_ci A6XX_RB_Z_CLAMP_MIN(z_clamp_min), 2004bf215546Sopenharmony_ci A6XX_RB_Z_CLAMP_MAX(z_clamp_max)); 2005bf215546Sopenharmony_ci} 2006bf215546Sopenharmony_ci 2007bf215546Sopenharmony_civoid 2008bf215546Sopenharmony_citu6_emit_scissor(struct tu_cs *cs, const VkRect2D *scissors, uint32_t scissor_count) 2009bf215546Sopenharmony_ci{ 2010bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0), scissor_count * 2); 2011bf215546Sopenharmony_ci 2012bf215546Sopenharmony_ci for (uint32_t i = 0; i < scissor_count; i++) { 2013bf215546Sopenharmony_ci const VkRect2D *scissor = &scissors[i]; 2014bf215546Sopenharmony_ci 2015bf215546Sopenharmony_ci uint32_t min_x = scissor->offset.x; 2016bf215546Sopenharmony_ci uint32_t min_y = scissor->offset.y; 2017bf215546Sopenharmony_ci uint32_t max_x = min_x + scissor->extent.width - 1; 2018bf215546Sopenharmony_ci uint32_t max_y = min_y + scissor->extent.height - 1; 2019bf215546Sopenharmony_ci 2020bf215546Sopenharmony_ci if (!scissor->extent.width || !scissor->extent.height) { 2021bf215546Sopenharmony_ci min_x = min_y = 1; 2022bf215546Sopenharmony_ci max_x = max_y = 0; 2023bf215546Sopenharmony_ci } else { 2024bf215546Sopenharmony_ci /* avoid overflow */ 2025bf215546Sopenharmony_ci uint32_t scissor_max = BITFIELD_MASK(15); 2026bf215546Sopenharmony_ci min_x = MIN2(scissor_max, min_x); 2027bf215546Sopenharmony_ci min_y = MIN2(scissor_max, min_y); 2028bf215546Sopenharmony_ci max_x = MIN2(scissor_max, max_x); 2029bf215546Sopenharmony_ci max_y = MIN2(scissor_max, max_y); 2030bf215546Sopenharmony_ci } 2031bf215546Sopenharmony_ci 2032bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_TL_X(min_x) | 2033bf215546Sopenharmony_ci A6XX_GRAS_SC_SCREEN_SCISSOR_TL_Y(min_y)); 2034bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_GRAS_SC_SCREEN_SCISSOR_BR_X(max_x) | 2035bf215546Sopenharmony_ci A6XX_GRAS_SC_SCREEN_SCISSOR_BR_Y(max_y)); 2036bf215546Sopenharmony_ci } 2037bf215546Sopenharmony_ci} 2038bf215546Sopenharmony_ci 2039bf215546Sopenharmony_civoid 2040bf215546Sopenharmony_citu6_emit_sample_locations(struct tu_cs *cs, const VkSampleLocationsInfoEXT *samp_loc) 2041bf215546Sopenharmony_ci{ 2042bf215546Sopenharmony_ci if (!samp_loc) { 2043bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 1); 2044bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 2045bf215546Sopenharmony_ci 2046bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 1); 2047bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 2048bf215546Sopenharmony_ci 2049bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 1); 2050bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 2051bf215546Sopenharmony_ci return; 2052bf215546Sopenharmony_ci } 2053bf215546Sopenharmony_ci 2054bf215546Sopenharmony_ci assert(samp_loc->sampleLocationsPerPixel == samp_loc->sampleLocationsCount); 2055bf215546Sopenharmony_ci assert(samp_loc->sampleLocationGridSize.width == 1); 2056bf215546Sopenharmony_ci assert(samp_loc->sampleLocationGridSize.height == 1); 2057bf215546Sopenharmony_ci 2058bf215546Sopenharmony_ci uint32_t sample_config = 2059bf215546Sopenharmony_ci A6XX_RB_SAMPLE_CONFIG_LOCATION_ENABLE; 2060bf215546Sopenharmony_ci uint32_t sample_locations = 0; 2061bf215546Sopenharmony_ci for (uint32_t i = 0; i < samp_loc->sampleLocationsCount; i++) { 2062bf215546Sopenharmony_ci sample_locations |= 2063bf215546Sopenharmony_ci (A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_X(samp_loc->pSampleLocations[i].x) | 2064bf215546Sopenharmony_ci A6XX_RB_SAMPLE_LOCATION_0_SAMPLE_0_Y(samp_loc->pSampleLocations[i].y)) << i*8; 2065bf215546Sopenharmony_ci } 2066bf215546Sopenharmony_ci 2067bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SAMPLE_CONFIG, 2); 2068bf215546Sopenharmony_ci tu_cs_emit(cs, sample_config); 2069bf215546Sopenharmony_ci tu_cs_emit(cs, sample_locations); 2070bf215546Sopenharmony_ci 2071bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_SAMPLE_CONFIG, 2); 2072bf215546Sopenharmony_ci tu_cs_emit(cs, sample_config); 2073bf215546Sopenharmony_ci tu_cs_emit(cs, sample_locations); 2074bf215546Sopenharmony_ci 2075bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_TP_SAMPLE_CONFIG, 2); 2076bf215546Sopenharmony_ci tu_cs_emit(cs, sample_config); 2077bf215546Sopenharmony_ci tu_cs_emit(cs, sample_locations); 2078bf215546Sopenharmony_ci} 2079bf215546Sopenharmony_ci 2080bf215546Sopenharmony_cistatic uint32_t 2081bf215546Sopenharmony_citu6_gras_su_cntl(const VkPipelineRasterizationStateCreateInfo *rast_info, 2082bf215546Sopenharmony_ci enum a5xx_line_mode line_mode, 2083bf215546Sopenharmony_ci bool multiview) 2084bf215546Sopenharmony_ci{ 2085bf215546Sopenharmony_ci uint32_t gras_su_cntl = 0; 2086bf215546Sopenharmony_ci 2087bf215546Sopenharmony_ci if (rast_info->cullMode & VK_CULL_MODE_FRONT_BIT) 2088bf215546Sopenharmony_ci gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_FRONT; 2089bf215546Sopenharmony_ci if (rast_info->cullMode & VK_CULL_MODE_BACK_BIT) 2090bf215546Sopenharmony_ci gras_su_cntl |= A6XX_GRAS_SU_CNTL_CULL_BACK; 2091bf215546Sopenharmony_ci 2092bf215546Sopenharmony_ci if (rast_info->frontFace == VK_FRONT_FACE_CLOCKWISE) 2093bf215546Sopenharmony_ci gras_su_cntl |= A6XX_GRAS_SU_CNTL_FRONT_CW; 2094bf215546Sopenharmony_ci 2095bf215546Sopenharmony_ci gras_su_cntl |= 2096bf215546Sopenharmony_ci A6XX_GRAS_SU_CNTL_LINEHALFWIDTH(rast_info->lineWidth / 2.0f); 2097bf215546Sopenharmony_ci 2098bf215546Sopenharmony_ci if (rast_info->depthBiasEnable) 2099bf215546Sopenharmony_ci gras_su_cntl |= A6XX_GRAS_SU_CNTL_POLY_OFFSET; 2100bf215546Sopenharmony_ci 2101bf215546Sopenharmony_ci gras_su_cntl |= A6XX_GRAS_SU_CNTL_LINE_MODE(line_mode); 2102bf215546Sopenharmony_ci 2103bf215546Sopenharmony_ci if (multiview) { 2104bf215546Sopenharmony_ci gras_su_cntl |= 2105bf215546Sopenharmony_ci A6XX_GRAS_SU_CNTL_UNK17 | 2106bf215546Sopenharmony_ci A6XX_GRAS_SU_CNTL_MULTIVIEW_ENABLE; 2107bf215546Sopenharmony_ci } 2108bf215546Sopenharmony_ci 2109bf215546Sopenharmony_ci return gras_su_cntl; 2110bf215546Sopenharmony_ci} 2111bf215546Sopenharmony_ci 2112bf215546Sopenharmony_civoid 2113bf215546Sopenharmony_citu6_emit_depth_bias(struct tu_cs *cs, 2114bf215546Sopenharmony_ci float constant_factor, 2115bf215546Sopenharmony_ci float clamp, 2116bf215546Sopenharmony_ci float slope_factor) 2117bf215546Sopenharmony_ci{ 2118bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_SU_POLY_OFFSET_SCALE, 3); 2119bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_SCALE(slope_factor).value); 2120bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET(constant_factor).value); 2121bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP(clamp).value); 2122bf215546Sopenharmony_ci} 2123bf215546Sopenharmony_ci 2124bf215546Sopenharmony_cistatic uint32_t 2125bf215546Sopenharmony_citu6_rb_mrt_blend_control(const VkPipelineColorBlendAttachmentState *att, 2126bf215546Sopenharmony_ci bool has_alpha) 2127bf215546Sopenharmony_ci{ 2128bf215546Sopenharmony_ci const enum a3xx_rb_blend_opcode color_op = tu6_blend_op(att->colorBlendOp); 2129bf215546Sopenharmony_ci const enum adreno_rb_blend_factor src_color_factor = tu6_blend_factor( 2130bf215546Sopenharmony_ci has_alpha ? att->srcColorBlendFactor 2131bf215546Sopenharmony_ci : tu_blend_factor_no_dst_alpha(att->srcColorBlendFactor)); 2132bf215546Sopenharmony_ci const enum adreno_rb_blend_factor dst_color_factor = tu6_blend_factor( 2133bf215546Sopenharmony_ci has_alpha ? att->dstColorBlendFactor 2134bf215546Sopenharmony_ci : tu_blend_factor_no_dst_alpha(att->dstColorBlendFactor)); 2135bf215546Sopenharmony_ci const enum a3xx_rb_blend_opcode alpha_op = tu6_blend_op(att->alphaBlendOp); 2136bf215546Sopenharmony_ci const enum adreno_rb_blend_factor src_alpha_factor = 2137bf215546Sopenharmony_ci tu6_blend_factor(att->srcAlphaBlendFactor); 2138bf215546Sopenharmony_ci const enum adreno_rb_blend_factor dst_alpha_factor = 2139bf215546Sopenharmony_ci tu6_blend_factor(att->dstAlphaBlendFactor); 2140bf215546Sopenharmony_ci 2141bf215546Sopenharmony_ci return A6XX_RB_MRT_BLEND_CONTROL_RGB_SRC_FACTOR(src_color_factor) | 2142bf215546Sopenharmony_ci A6XX_RB_MRT_BLEND_CONTROL_RGB_BLEND_OPCODE(color_op) | 2143bf215546Sopenharmony_ci A6XX_RB_MRT_BLEND_CONTROL_RGB_DEST_FACTOR(dst_color_factor) | 2144bf215546Sopenharmony_ci A6XX_RB_MRT_BLEND_CONTROL_ALPHA_SRC_FACTOR(src_alpha_factor) | 2145bf215546Sopenharmony_ci A6XX_RB_MRT_BLEND_CONTROL_ALPHA_BLEND_OPCODE(alpha_op) | 2146bf215546Sopenharmony_ci A6XX_RB_MRT_BLEND_CONTROL_ALPHA_DEST_FACTOR(dst_alpha_factor); 2147bf215546Sopenharmony_ci} 2148bf215546Sopenharmony_ci 2149bf215546Sopenharmony_cistatic uint32_t 2150bf215546Sopenharmony_citu6_rb_mrt_control(const VkPipelineColorBlendAttachmentState *att, 2151bf215546Sopenharmony_ci uint32_t rb_mrt_control_rop, 2152bf215546Sopenharmony_ci bool has_alpha) 2153bf215546Sopenharmony_ci{ 2154bf215546Sopenharmony_ci uint32_t rb_mrt_control = 2155bf215546Sopenharmony_ci A6XX_RB_MRT_CONTROL_COMPONENT_ENABLE(att->colorWriteMask); 2156bf215546Sopenharmony_ci 2157bf215546Sopenharmony_ci rb_mrt_control |= rb_mrt_control_rop; 2158bf215546Sopenharmony_ci 2159bf215546Sopenharmony_ci if (att->blendEnable) { 2160bf215546Sopenharmony_ci rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND; 2161bf215546Sopenharmony_ci 2162bf215546Sopenharmony_ci if (has_alpha) 2163bf215546Sopenharmony_ci rb_mrt_control |= A6XX_RB_MRT_CONTROL_BLEND2; 2164bf215546Sopenharmony_ci } 2165bf215546Sopenharmony_ci 2166bf215546Sopenharmony_ci return rb_mrt_control; 2167bf215546Sopenharmony_ci} 2168bf215546Sopenharmony_ci 2169bf215546Sopenharmony_ciuint32_t 2170bf215546Sopenharmony_citu6_rb_mrt_control_rop(VkLogicOp op, bool *rop_reads_dst) 2171bf215546Sopenharmony_ci{ 2172bf215546Sopenharmony_ci *rop_reads_dst = tu_logic_op_reads_dst(op); 2173bf215546Sopenharmony_ci return A6XX_RB_MRT_CONTROL_ROP_ENABLE | 2174bf215546Sopenharmony_ci A6XX_RB_MRT_CONTROL_ROP_CODE(tu6_rop(op)); 2175bf215546Sopenharmony_ci} 2176bf215546Sopenharmony_ci 2177bf215546Sopenharmony_cistatic void 2178bf215546Sopenharmony_citu6_emit_rb_mrt_controls(struct tu_pipeline *pipeline, 2179bf215546Sopenharmony_ci const VkPipelineColorBlendStateCreateInfo *blend_info, 2180bf215546Sopenharmony_ci const VkFormat attachment_formats[MAX_RTS], 2181bf215546Sopenharmony_ci bool *rop_reads_dst, 2182bf215546Sopenharmony_ci uint32_t *color_bandwidth_per_sample) 2183bf215546Sopenharmony_ci{ 2184bf215546Sopenharmony_ci const VkPipelineColorWriteCreateInfoEXT *color_info = 2185bf215546Sopenharmony_ci vk_find_struct_const(blend_info->pNext, 2186bf215546Sopenharmony_ci PIPELINE_COLOR_WRITE_CREATE_INFO_EXT); 2187bf215546Sopenharmony_ci 2188bf215546Sopenharmony_ci /* The static state is ignored if it's dynamic. In that case assume 2189bf215546Sopenharmony_ci * everything is enabled and then the appropriate registers will be zero'd 2190bf215546Sopenharmony_ci * dynamically. 2191bf215546Sopenharmony_ci */ 2192bf215546Sopenharmony_ci if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE)) 2193bf215546Sopenharmony_ci color_info = NULL; 2194bf215546Sopenharmony_ci 2195bf215546Sopenharmony_ci *rop_reads_dst = false; 2196bf215546Sopenharmony_ci *color_bandwidth_per_sample = 0; 2197bf215546Sopenharmony_ci 2198bf215546Sopenharmony_ci uint32_t rb_mrt_control_rop = 0; 2199bf215546Sopenharmony_ci if (blend_info->logicOpEnable) { 2200bf215546Sopenharmony_ci pipeline->logic_op_enabled = true; 2201bf215546Sopenharmony_ci rb_mrt_control_rop = tu6_rb_mrt_control_rop(blend_info->logicOp, 2202bf215546Sopenharmony_ci rop_reads_dst); 2203bf215546Sopenharmony_ci } 2204bf215546Sopenharmony_ci 2205bf215546Sopenharmony_ci uint32_t total_bpp = 0; 2206bf215546Sopenharmony_ci pipeline->num_rts = blend_info->attachmentCount; 2207bf215546Sopenharmony_ci for (uint32_t i = 0; i < blend_info->attachmentCount; i++) { 2208bf215546Sopenharmony_ci const VkPipelineColorBlendAttachmentState *att = 2209bf215546Sopenharmony_ci &blend_info->pAttachments[i]; 2210bf215546Sopenharmony_ci const VkFormat format = attachment_formats[i]; 2211bf215546Sopenharmony_ci 2212bf215546Sopenharmony_ci uint32_t rb_mrt_control = 0; 2213bf215546Sopenharmony_ci uint32_t rb_mrt_blend_control = 0; 2214bf215546Sopenharmony_ci if (format != VK_FORMAT_UNDEFINED && 2215bf215546Sopenharmony_ci (!color_info || color_info->pColorWriteEnables[i])) { 2216bf215546Sopenharmony_ci const bool has_alpha = vk_format_has_alpha(format); 2217bf215546Sopenharmony_ci 2218bf215546Sopenharmony_ci rb_mrt_control = 2219bf215546Sopenharmony_ci tu6_rb_mrt_control(att, rb_mrt_control_rop, has_alpha); 2220bf215546Sopenharmony_ci rb_mrt_blend_control = tu6_rb_mrt_blend_control(att, has_alpha); 2221bf215546Sopenharmony_ci 2222bf215546Sopenharmony_ci /* calculate bpp based on format and write mask */ 2223bf215546Sopenharmony_ci uint32_t write_bpp = 0; 2224bf215546Sopenharmony_ci if (att->colorWriteMask == 0xf) { 2225bf215546Sopenharmony_ci write_bpp = vk_format_get_blocksizebits(format); 2226bf215546Sopenharmony_ci } else { 2227bf215546Sopenharmony_ci const enum pipe_format pipe_format = vk_format_to_pipe_format(format); 2228bf215546Sopenharmony_ci for (uint32_t i = 0; i < 4; i++) { 2229bf215546Sopenharmony_ci if (att->colorWriteMask & (1 << i)) { 2230bf215546Sopenharmony_ci write_bpp += util_format_get_component_bits(pipe_format, 2231bf215546Sopenharmony_ci UTIL_FORMAT_COLORSPACE_RGB, i); 2232bf215546Sopenharmony_ci } 2233bf215546Sopenharmony_ci } 2234bf215546Sopenharmony_ci } 2235bf215546Sopenharmony_ci total_bpp += write_bpp; 2236bf215546Sopenharmony_ci 2237bf215546Sopenharmony_ci pipeline->color_write_enable |= BIT(i); 2238bf215546Sopenharmony_ci if (att->blendEnable) 2239bf215546Sopenharmony_ci pipeline->blend_enable |= BIT(i); 2240bf215546Sopenharmony_ci 2241bf215546Sopenharmony_ci if (att->blendEnable || *rop_reads_dst) { 2242bf215546Sopenharmony_ci total_bpp += write_bpp; 2243bf215546Sopenharmony_ci } 2244bf215546Sopenharmony_ci } 2245bf215546Sopenharmony_ci 2246bf215546Sopenharmony_ci pipeline->rb_mrt_control[i] = rb_mrt_control & pipeline->rb_mrt_control_mask; 2247bf215546Sopenharmony_ci pipeline->rb_mrt_blend_control[i] = rb_mrt_blend_control; 2248bf215546Sopenharmony_ci } 2249bf215546Sopenharmony_ci 2250bf215546Sopenharmony_ci *color_bandwidth_per_sample = total_bpp / 8; 2251bf215546Sopenharmony_ci} 2252bf215546Sopenharmony_ci 2253bf215546Sopenharmony_cistatic void 2254bf215546Sopenharmony_citu6_emit_blend_control(struct tu_pipeline *pipeline, 2255bf215546Sopenharmony_ci uint32_t blend_enable_mask, 2256bf215546Sopenharmony_ci bool dual_src_blend, 2257bf215546Sopenharmony_ci const VkPipelineMultisampleStateCreateInfo *msaa_info) 2258bf215546Sopenharmony_ci{ 2259bf215546Sopenharmony_ci const uint32_t sample_mask = 2260bf215546Sopenharmony_ci msaa_info->pSampleMask ? (*msaa_info->pSampleMask & 0xffff) 2261bf215546Sopenharmony_ci : ((1 << msaa_info->rasterizationSamples) - 1); 2262bf215546Sopenharmony_ci 2263bf215546Sopenharmony_ci 2264bf215546Sopenharmony_ci pipeline->sp_blend_cntl = 2265bf215546Sopenharmony_ci A6XX_SP_BLEND_CNTL(.enable_blend = blend_enable_mask, 2266bf215546Sopenharmony_ci .dual_color_in_enable = dual_src_blend, 2267bf215546Sopenharmony_ci .alpha_to_coverage = msaa_info->alphaToCoverageEnable, 2268bf215546Sopenharmony_ci .unk8 = true).value & pipeline->sp_blend_cntl_mask; 2269bf215546Sopenharmony_ci 2270bf215546Sopenharmony_ci /* set A6XX_RB_BLEND_CNTL_INDEPENDENT_BLEND only when enabled? */ 2271bf215546Sopenharmony_ci pipeline->rb_blend_cntl = 2272bf215546Sopenharmony_ci A6XX_RB_BLEND_CNTL(.enable_blend = blend_enable_mask, 2273bf215546Sopenharmony_ci .independent_blend = true, 2274bf215546Sopenharmony_ci .sample_mask = sample_mask, 2275bf215546Sopenharmony_ci .dual_color_in_enable = dual_src_blend, 2276bf215546Sopenharmony_ci .alpha_to_coverage = msaa_info->alphaToCoverageEnable, 2277bf215546Sopenharmony_ci .alpha_to_one = msaa_info->alphaToOneEnable).value & 2278bf215546Sopenharmony_ci pipeline->rb_blend_cntl_mask; 2279bf215546Sopenharmony_ci} 2280bf215546Sopenharmony_ci 2281bf215546Sopenharmony_cistatic void 2282bf215546Sopenharmony_citu6_emit_blend(struct tu_cs *cs, 2283bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 2284bf215546Sopenharmony_ci{ 2285bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL(.dword = pipeline->sp_blend_cntl)); 2286bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.dword = pipeline->rb_blend_cntl)); 2287bf215546Sopenharmony_ci 2288bf215546Sopenharmony_ci for (unsigned i = 0; i < pipeline->num_rts; i++) { 2289bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 2290bf215546Sopenharmony_ci A6XX_RB_MRT_CONTROL(i, .dword = pipeline->rb_mrt_control[i]), 2291bf215546Sopenharmony_ci A6XX_RB_MRT_BLEND_CONTROL(i, .dword = pipeline->rb_mrt_blend_control[i])); 2292bf215546Sopenharmony_ci } 2293bf215546Sopenharmony_ci} 2294bf215546Sopenharmony_ci 2295bf215546Sopenharmony_cistatic uint32_t 2296bf215546Sopenharmony_cicalc_pvtmem_size(struct tu_device *dev, struct tu_pvtmem_config *config, 2297bf215546Sopenharmony_ci uint32_t pvtmem_bytes) 2298bf215546Sopenharmony_ci{ 2299bf215546Sopenharmony_ci uint32_t per_fiber_size = ALIGN(pvtmem_bytes, 512); 2300bf215546Sopenharmony_ci uint32_t per_sp_size = 2301bf215546Sopenharmony_ci ALIGN(per_fiber_size * dev->physical_device->info->a6xx.fibers_per_sp, 1 << 12); 2302bf215546Sopenharmony_ci 2303bf215546Sopenharmony_ci if (config) { 2304bf215546Sopenharmony_ci config->per_fiber_size = per_fiber_size; 2305bf215546Sopenharmony_ci config->per_sp_size = per_sp_size; 2306bf215546Sopenharmony_ci } 2307bf215546Sopenharmony_ci 2308bf215546Sopenharmony_ci return dev->physical_device->info->num_sp_cores * per_sp_size; 2309bf215546Sopenharmony_ci} 2310bf215546Sopenharmony_ci 2311bf215546Sopenharmony_cistatic VkResult 2312bf215546Sopenharmony_citu_setup_pvtmem(struct tu_device *dev, 2313bf215546Sopenharmony_ci struct tu_pipeline *pipeline, 2314bf215546Sopenharmony_ci struct tu_pvtmem_config *config, 2315bf215546Sopenharmony_ci uint32_t pvtmem_bytes, bool per_wave) 2316bf215546Sopenharmony_ci{ 2317bf215546Sopenharmony_ci if (!pvtmem_bytes) { 2318bf215546Sopenharmony_ci memset(config, 0, sizeof(*config)); 2319bf215546Sopenharmony_ci return VK_SUCCESS; 2320bf215546Sopenharmony_ci } 2321bf215546Sopenharmony_ci 2322bf215546Sopenharmony_ci uint32_t total_size = calc_pvtmem_size(dev, config, pvtmem_bytes); 2323bf215546Sopenharmony_ci config->per_wave = per_wave; 2324bf215546Sopenharmony_ci 2325bf215546Sopenharmony_ci VkResult result = 2326bf215546Sopenharmony_ci tu_bo_init_new(dev, &pipeline->pvtmem_bo, total_size, 2327bf215546Sopenharmony_ci TU_BO_ALLOC_NO_FLAGS); 2328bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2329bf215546Sopenharmony_ci return result; 2330bf215546Sopenharmony_ci 2331bf215546Sopenharmony_ci config->iova = pipeline->pvtmem_bo->iova; 2332bf215546Sopenharmony_ci 2333bf215546Sopenharmony_ci return result; 2334bf215546Sopenharmony_ci} 2335bf215546Sopenharmony_ci 2336bf215546Sopenharmony_ci 2337bf215546Sopenharmony_cistatic VkResult 2338bf215546Sopenharmony_citu_pipeline_allocate_cs(struct tu_device *dev, 2339bf215546Sopenharmony_ci struct tu_pipeline *pipeline, 2340bf215546Sopenharmony_ci struct tu_pipeline_layout *layout, 2341bf215546Sopenharmony_ci struct tu_pipeline_builder *builder, 2342bf215546Sopenharmony_ci struct ir3_shader_variant *compute) 2343bf215546Sopenharmony_ci{ 2344bf215546Sopenharmony_ci uint32_t size = 1024 + tu6_load_state_size(pipeline, layout, compute); 2345bf215546Sopenharmony_ci 2346bf215546Sopenharmony_ci /* graphics case: */ 2347bf215546Sopenharmony_ci if (builder) { 2348bf215546Sopenharmony_ci size += 2 * TU6_EMIT_VERTEX_INPUT_MAX_DWORDS; 2349bf215546Sopenharmony_ci 2350bf215546Sopenharmony_ci for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { 2351bf215546Sopenharmony_ci if (builder->shaders->variants[i]) { 2352bf215546Sopenharmony_ci size += builder->shaders->variants[i]->info.size / 4; 2353bf215546Sopenharmony_ci } 2354bf215546Sopenharmony_ci } 2355bf215546Sopenharmony_ci 2356bf215546Sopenharmony_ci size += builder->binning_variant->info.size / 4; 2357bf215546Sopenharmony_ci 2358bf215546Sopenharmony_ci builder->additional_cs_reserve_size = 0; 2359bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { 2360bf215546Sopenharmony_ci struct ir3_shader_variant *variant = builder->shaders->variants[i]; 2361bf215546Sopenharmony_ci if (variant) { 2362bf215546Sopenharmony_ci builder->additional_cs_reserve_size += 2363bf215546Sopenharmony_ci tu_xs_get_additional_cs_size_dwords(variant); 2364bf215546Sopenharmony_ci 2365bf215546Sopenharmony_ci if (variant->binning) { 2366bf215546Sopenharmony_ci builder->additional_cs_reserve_size += 2367bf215546Sopenharmony_ci tu_xs_get_additional_cs_size_dwords(variant->binning); 2368bf215546Sopenharmony_ci } 2369bf215546Sopenharmony_ci } 2370bf215546Sopenharmony_ci } 2371bf215546Sopenharmony_ci 2372bf215546Sopenharmony_ci /* The additional size is used twice, once per tu6_emit_program() call. */ 2373bf215546Sopenharmony_ci size += builder->additional_cs_reserve_size * 2; 2374bf215546Sopenharmony_ci } else { 2375bf215546Sopenharmony_ci size += compute->info.size / 4; 2376bf215546Sopenharmony_ci 2377bf215546Sopenharmony_ci size += tu_xs_get_additional_cs_size_dwords(compute); 2378bf215546Sopenharmony_ci } 2379bf215546Sopenharmony_ci 2380bf215546Sopenharmony_ci /* Allocate the space for the pipeline out of the device's RO suballocator. 2381bf215546Sopenharmony_ci * 2382bf215546Sopenharmony_ci * Sub-allocating BOs saves memory and also kernel overhead in refcounting of 2383bf215546Sopenharmony_ci * BOs at exec time. 2384bf215546Sopenharmony_ci * 2385bf215546Sopenharmony_ci * The pipeline cache would seem like a natural place to stick the 2386bf215546Sopenharmony_ci * suballocator, except that it is not guaranteed to outlive the pipelines 2387bf215546Sopenharmony_ci * created from it, so you can't store any long-lived state there, and you 2388bf215546Sopenharmony_ci * can't use its EXTERNALLY_SYNCHRONIZED flag to avoid atomics because 2389bf215546Sopenharmony_ci * pipeline destroy isn't synchronized by the cache. 2390bf215546Sopenharmony_ci */ 2391bf215546Sopenharmony_ci pthread_mutex_lock(&dev->pipeline_mutex); 2392bf215546Sopenharmony_ci VkResult result = tu_suballoc_bo_alloc(&pipeline->bo, &dev->pipeline_suballoc, 2393bf215546Sopenharmony_ci size * 4, 128); 2394bf215546Sopenharmony_ci pthread_mutex_unlock(&dev->pipeline_mutex); 2395bf215546Sopenharmony_ci if (result != VK_SUCCESS) 2396bf215546Sopenharmony_ci return result; 2397bf215546Sopenharmony_ci 2398bf215546Sopenharmony_ci tu_cs_init_suballoc(&pipeline->cs, dev, &pipeline->bo); 2399bf215546Sopenharmony_ci 2400bf215546Sopenharmony_ci return VK_SUCCESS; 2401bf215546Sopenharmony_ci} 2402bf215546Sopenharmony_ci 2403bf215546Sopenharmony_cistatic void 2404bf215546Sopenharmony_citu_pipeline_shader_key_init(struct ir3_shader_key *key, 2405bf215546Sopenharmony_ci const struct tu_pipeline *pipeline, 2406bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pipeline_info) 2407bf215546Sopenharmony_ci{ 2408bf215546Sopenharmony_ci for (uint32_t i = 0; i < pipeline_info->stageCount; i++) { 2409bf215546Sopenharmony_ci if (pipeline_info->pStages[i].stage == VK_SHADER_STAGE_GEOMETRY_BIT) { 2410bf215546Sopenharmony_ci key->has_gs = true; 2411bf215546Sopenharmony_ci break; 2412bf215546Sopenharmony_ci } 2413bf215546Sopenharmony_ci } 2414bf215546Sopenharmony_ci 2415bf215546Sopenharmony_ci if (pipeline_info->pRasterizationState->rasterizerDiscardEnable && 2416bf215546Sopenharmony_ci !(pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD))) 2417bf215546Sopenharmony_ci return; 2418bf215546Sopenharmony_ci 2419bf215546Sopenharmony_ci const VkPipelineMultisampleStateCreateInfo *msaa_info = pipeline_info->pMultisampleState; 2420bf215546Sopenharmony_ci const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations = 2421bf215546Sopenharmony_ci vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); 2422bf215546Sopenharmony_ci if (msaa_info->rasterizationSamples > 1 || 2423bf215546Sopenharmony_ci /* also set msaa key when sample location is not the default 2424bf215546Sopenharmony_ci * since this affects varying interpolation */ 2425bf215546Sopenharmony_ci (sample_locations && sample_locations->sampleLocationsEnable)) { 2426bf215546Sopenharmony_ci key->msaa = true; 2427bf215546Sopenharmony_ci } 2428bf215546Sopenharmony_ci 2429bf215546Sopenharmony_ci /* The 1.3.215 spec says: 2430bf215546Sopenharmony_ci * 2431bf215546Sopenharmony_ci * Sample shading can be used to specify a minimum number of unique 2432bf215546Sopenharmony_ci * samples to process for each fragment. If sample shading is enabled, 2433bf215546Sopenharmony_ci * an implementation must provide a minimum of 2434bf215546Sopenharmony_ci * 2435bf215546Sopenharmony_ci * max(ceil(minSampleShadingFactor * totalSamples), 1) 2436bf215546Sopenharmony_ci * 2437bf215546Sopenharmony_ci * unique associated data for each fragment, where 2438bf215546Sopenharmony_ci * minSampleShadingFactor is the minimum fraction of sample shading. 2439bf215546Sopenharmony_ci * 2440bf215546Sopenharmony_ci * The definition is pretty much the same as OpenGL's GL_SAMPLE_SHADING. 2441bf215546Sopenharmony_ci * They both require unique associated data. 2442bf215546Sopenharmony_ci * 2443bf215546Sopenharmony_ci * There are discussions to change the definition, such that 2444bf215546Sopenharmony_ci * sampleShadingEnable does not imply unique associated data. Before the 2445bf215546Sopenharmony_ci * discussions are settled and before apps (i.e., ANGLE) are fixed to 2446bf215546Sopenharmony_ci * follow the new and incompatible definition, we should stick to the 2447bf215546Sopenharmony_ci * current definition. 2448bf215546Sopenharmony_ci * 2449bf215546Sopenharmony_ci * Note that ir3_shader_key::sample_shading is not actually used by ir3, 2450bf215546Sopenharmony_ci * just checked in tu6_emit_fs_inputs. We will also copy the value to 2451bf215546Sopenharmony_ci * tu_shader_key::force_sample_interp in a bit. 2452bf215546Sopenharmony_ci */ 2453bf215546Sopenharmony_ci if (msaa_info->sampleShadingEnable && 2454bf215546Sopenharmony_ci (msaa_info->minSampleShading * msaa_info->rasterizationSamples) > 1.0f) 2455bf215546Sopenharmony_ci key->sample_shading = true; 2456bf215546Sopenharmony_ci 2457bf215546Sopenharmony_ci /* We set this after we compile to NIR because we need the prim mode */ 2458bf215546Sopenharmony_ci key->tessellation = IR3_TESS_NONE; 2459bf215546Sopenharmony_ci} 2460bf215546Sopenharmony_ci 2461bf215546Sopenharmony_cistatic uint32_t 2462bf215546Sopenharmony_citu6_get_tessmode(struct tu_shader* shader) 2463bf215546Sopenharmony_ci{ 2464bf215546Sopenharmony_ci enum tess_primitive_mode primitive_mode = shader->ir3_shader->nir->info.tess._primitive_mode; 2465bf215546Sopenharmony_ci switch (primitive_mode) { 2466bf215546Sopenharmony_ci case TESS_PRIMITIVE_ISOLINES: 2467bf215546Sopenharmony_ci return IR3_TESS_ISOLINES; 2468bf215546Sopenharmony_ci case TESS_PRIMITIVE_TRIANGLES: 2469bf215546Sopenharmony_ci return IR3_TESS_TRIANGLES; 2470bf215546Sopenharmony_ci case TESS_PRIMITIVE_QUADS: 2471bf215546Sopenharmony_ci return IR3_TESS_QUADS; 2472bf215546Sopenharmony_ci case TESS_PRIMITIVE_UNSPECIFIED: 2473bf215546Sopenharmony_ci return IR3_TESS_NONE; 2474bf215546Sopenharmony_ci default: 2475bf215546Sopenharmony_ci unreachable("bad tessmode"); 2476bf215546Sopenharmony_ci } 2477bf215546Sopenharmony_ci} 2478bf215546Sopenharmony_ci 2479bf215546Sopenharmony_cistatic uint64_t 2480bf215546Sopenharmony_citu_upload_variant(struct tu_pipeline *pipeline, 2481bf215546Sopenharmony_ci const struct ir3_shader_variant *variant) 2482bf215546Sopenharmony_ci{ 2483bf215546Sopenharmony_ci struct tu_cs_memory memory; 2484bf215546Sopenharmony_ci 2485bf215546Sopenharmony_ci if (!variant) 2486bf215546Sopenharmony_ci return 0; 2487bf215546Sopenharmony_ci 2488bf215546Sopenharmony_ci /* this expects to get enough alignment because shaders are allocated first 2489bf215546Sopenharmony_ci * and total size is always aligned correctly 2490bf215546Sopenharmony_ci * note: an assert in tu6_emit_xs_config validates the alignment 2491bf215546Sopenharmony_ci */ 2492bf215546Sopenharmony_ci tu_cs_alloc(&pipeline->cs, variant->info.size / 4, 1, &memory); 2493bf215546Sopenharmony_ci 2494bf215546Sopenharmony_ci memcpy(memory.map, variant->bin, variant->info.size); 2495bf215546Sopenharmony_ci return memory.iova; 2496bf215546Sopenharmony_ci} 2497bf215546Sopenharmony_ci 2498bf215546Sopenharmony_cistatic void 2499bf215546Sopenharmony_citu_append_executable(struct tu_pipeline *pipeline, struct ir3_shader_variant *variant, 2500bf215546Sopenharmony_ci char *nir_from_spirv) 2501bf215546Sopenharmony_ci{ 2502bf215546Sopenharmony_ci struct tu_pipeline_executable exe = { 2503bf215546Sopenharmony_ci .stage = variant->type, 2504bf215546Sopenharmony_ci .nir_from_spirv = nir_from_spirv, 2505bf215546Sopenharmony_ci .nir_final = ralloc_strdup(pipeline->executables_mem_ctx, variant->disasm_info.nir), 2506bf215546Sopenharmony_ci .disasm = ralloc_strdup(pipeline->executables_mem_ctx, variant->disasm_info.disasm), 2507bf215546Sopenharmony_ci .stats = variant->info, 2508bf215546Sopenharmony_ci .is_binning = variant->binning_pass, 2509bf215546Sopenharmony_ci }; 2510bf215546Sopenharmony_ci 2511bf215546Sopenharmony_ci util_dynarray_append(&pipeline->executables, struct tu_pipeline_executable, exe); 2512bf215546Sopenharmony_ci} 2513bf215546Sopenharmony_ci 2514bf215546Sopenharmony_cistatic void 2515bf215546Sopenharmony_citu_link_shaders(struct tu_pipeline_builder *builder, 2516bf215546Sopenharmony_ci nir_shader **shaders, unsigned shaders_count) 2517bf215546Sopenharmony_ci{ 2518bf215546Sopenharmony_ci nir_shader *consumer = NULL; 2519bf215546Sopenharmony_ci for (gl_shader_stage stage = shaders_count - 1; 2520bf215546Sopenharmony_ci stage >= MESA_SHADER_VERTEX; stage--) { 2521bf215546Sopenharmony_ci if (!shaders[stage]) 2522bf215546Sopenharmony_ci continue; 2523bf215546Sopenharmony_ci 2524bf215546Sopenharmony_ci nir_shader *producer = shaders[stage]; 2525bf215546Sopenharmony_ci if (!consumer) { 2526bf215546Sopenharmony_ci consumer = producer; 2527bf215546Sopenharmony_ci continue; 2528bf215546Sopenharmony_ci } 2529bf215546Sopenharmony_ci 2530bf215546Sopenharmony_ci if (nir_link_opt_varyings(producer, consumer)) { 2531bf215546Sopenharmony_ci NIR_PASS_V(consumer, nir_opt_constant_folding); 2532bf215546Sopenharmony_ci NIR_PASS_V(consumer, nir_opt_algebraic); 2533bf215546Sopenharmony_ci NIR_PASS_V(consumer, nir_opt_dce); 2534bf215546Sopenharmony_ci } 2535bf215546Sopenharmony_ci 2536bf215546Sopenharmony_ci NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL); 2537bf215546Sopenharmony_ci NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL); 2538bf215546Sopenharmony_ci 2539bf215546Sopenharmony_ci bool progress = nir_remove_unused_varyings(producer, consumer); 2540bf215546Sopenharmony_ci 2541bf215546Sopenharmony_ci nir_compact_varyings(producer, consumer, true); 2542bf215546Sopenharmony_ci if (progress) { 2543bf215546Sopenharmony_ci if (nir_lower_global_vars_to_local(producer)) { 2544bf215546Sopenharmony_ci /* Remove dead writes, which can remove input loads */ 2545bf215546Sopenharmony_ci NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_temp, NULL); 2546bf215546Sopenharmony_ci NIR_PASS_V(producer, nir_opt_dce); 2547bf215546Sopenharmony_ci } 2548bf215546Sopenharmony_ci nir_lower_global_vars_to_local(consumer); 2549bf215546Sopenharmony_ci } 2550bf215546Sopenharmony_ci 2551bf215546Sopenharmony_ci consumer = producer; 2552bf215546Sopenharmony_ci } 2553bf215546Sopenharmony_ci} 2554bf215546Sopenharmony_ci 2555bf215546Sopenharmony_cistatic void 2556bf215546Sopenharmony_citu_shader_key_init(struct tu_shader_key *key, 2557bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage_info, 2558bf215546Sopenharmony_ci struct tu_device *dev) 2559bf215546Sopenharmony_ci{ 2560bf215546Sopenharmony_ci enum ir3_wavesize_option api_wavesize, real_wavesize; 2561bf215546Sopenharmony_ci 2562bf215546Sopenharmony_ci if (stage_info) { 2563bf215546Sopenharmony_ci if (stage_info->flags & 2564bf215546Sopenharmony_ci VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT) { 2565bf215546Sopenharmony_ci api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE; 2566bf215546Sopenharmony_ci } else { 2567bf215546Sopenharmony_ci const VkPipelineShaderStageRequiredSubgroupSizeCreateInfo *size_info = 2568bf215546Sopenharmony_ci vk_find_struct_const(stage_info->pNext, 2569bf215546Sopenharmony_ci PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO); 2570bf215546Sopenharmony_ci 2571bf215546Sopenharmony_ci if (size_info) { 2572bf215546Sopenharmony_ci if (size_info->requiredSubgroupSize == dev->compiler->threadsize_base) { 2573bf215546Sopenharmony_ci api_wavesize = IR3_SINGLE_ONLY; 2574bf215546Sopenharmony_ci } else { 2575bf215546Sopenharmony_ci assert(size_info->requiredSubgroupSize == dev->compiler->threadsize_base * 2); 2576bf215546Sopenharmony_ci api_wavesize = IR3_DOUBLE_ONLY; 2577bf215546Sopenharmony_ci } 2578bf215546Sopenharmony_ci } else { 2579bf215546Sopenharmony_ci /* Match the exposed subgroupSize. */ 2580bf215546Sopenharmony_ci api_wavesize = IR3_DOUBLE_ONLY; 2581bf215546Sopenharmony_ci } 2582bf215546Sopenharmony_ci 2583bf215546Sopenharmony_ci if (stage_info->flags & 2584bf215546Sopenharmony_ci VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT) 2585bf215546Sopenharmony_ci real_wavesize = api_wavesize; 2586bf215546Sopenharmony_ci else if (api_wavesize == IR3_SINGLE_ONLY) 2587bf215546Sopenharmony_ci real_wavesize = IR3_SINGLE_ONLY; 2588bf215546Sopenharmony_ci else 2589bf215546Sopenharmony_ci real_wavesize = IR3_SINGLE_OR_DOUBLE; 2590bf215546Sopenharmony_ci } 2591bf215546Sopenharmony_ci } else { 2592bf215546Sopenharmony_ci api_wavesize = real_wavesize = IR3_SINGLE_OR_DOUBLE; 2593bf215546Sopenharmony_ci } 2594bf215546Sopenharmony_ci 2595bf215546Sopenharmony_ci key->api_wavesize = api_wavesize; 2596bf215546Sopenharmony_ci key->real_wavesize = real_wavesize; 2597bf215546Sopenharmony_ci} 2598bf215546Sopenharmony_ci 2599bf215546Sopenharmony_cistatic void 2600bf215546Sopenharmony_citu_hash_stage(struct mesa_sha1 *ctx, 2601bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage, 2602bf215546Sopenharmony_ci const struct tu_shader_key *key) 2603bf215546Sopenharmony_ci{ 2604bf215546Sopenharmony_ci unsigned char stage_hash[SHA1_DIGEST_LENGTH]; 2605bf215546Sopenharmony_ci 2606bf215546Sopenharmony_ci vk_pipeline_hash_shader_stage(stage, stage_hash); 2607bf215546Sopenharmony_ci _mesa_sha1_update(ctx, stage_hash, sizeof(stage_hash)); 2608bf215546Sopenharmony_ci _mesa_sha1_update(ctx, key, sizeof(*key)); 2609bf215546Sopenharmony_ci} 2610bf215546Sopenharmony_ci 2611bf215546Sopenharmony_ci/* Hash flags which can affect ir3 shader compilation which aren't known until 2612bf215546Sopenharmony_ci * logical device creation. 2613bf215546Sopenharmony_ci */ 2614bf215546Sopenharmony_cistatic void 2615bf215546Sopenharmony_citu_hash_compiler(struct mesa_sha1 *ctx, const struct ir3_compiler *compiler) 2616bf215546Sopenharmony_ci{ 2617bf215546Sopenharmony_ci _mesa_sha1_update(ctx, &compiler->robust_buffer_access2, 2618bf215546Sopenharmony_ci sizeof(compiler->robust_buffer_access2)); 2619bf215546Sopenharmony_ci _mesa_sha1_update(ctx, &ir3_shader_debug, sizeof(ir3_shader_debug)); 2620bf215546Sopenharmony_ci} 2621bf215546Sopenharmony_ci 2622bf215546Sopenharmony_cistatic void 2623bf215546Sopenharmony_citu_hash_shaders(unsigned char *hash, 2624bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo **stages, 2625bf215546Sopenharmony_ci const struct tu_pipeline_layout *layout, 2626bf215546Sopenharmony_ci const struct tu_shader_key *keys, 2627bf215546Sopenharmony_ci const struct ir3_shader_key *ir3_key, 2628bf215546Sopenharmony_ci const struct ir3_compiler *compiler) 2629bf215546Sopenharmony_ci{ 2630bf215546Sopenharmony_ci struct mesa_sha1 ctx; 2631bf215546Sopenharmony_ci 2632bf215546Sopenharmony_ci _mesa_sha1_init(&ctx); 2633bf215546Sopenharmony_ci 2634bf215546Sopenharmony_ci if (layout) 2635bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 2636bf215546Sopenharmony_ci 2637bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, ir3_key, sizeof(ir3_key)); 2638bf215546Sopenharmony_ci 2639bf215546Sopenharmony_ci for (int i = 0; i < MESA_SHADER_STAGES; ++i) { 2640bf215546Sopenharmony_ci if (stages[i]) { 2641bf215546Sopenharmony_ci tu_hash_stage(&ctx, stages[i], &keys[i]); 2642bf215546Sopenharmony_ci } 2643bf215546Sopenharmony_ci } 2644bf215546Sopenharmony_ci tu_hash_compiler(&ctx, compiler); 2645bf215546Sopenharmony_ci _mesa_sha1_final(&ctx, hash); 2646bf215546Sopenharmony_ci} 2647bf215546Sopenharmony_ci 2648bf215546Sopenharmony_cistatic void 2649bf215546Sopenharmony_citu_hash_compute(unsigned char *hash, 2650bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage, 2651bf215546Sopenharmony_ci const struct tu_pipeline_layout *layout, 2652bf215546Sopenharmony_ci const struct tu_shader_key *key, 2653bf215546Sopenharmony_ci const struct ir3_compiler *compiler) 2654bf215546Sopenharmony_ci{ 2655bf215546Sopenharmony_ci struct mesa_sha1 ctx; 2656bf215546Sopenharmony_ci 2657bf215546Sopenharmony_ci _mesa_sha1_init(&ctx); 2658bf215546Sopenharmony_ci 2659bf215546Sopenharmony_ci if (layout) 2660bf215546Sopenharmony_ci _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); 2661bf215546Sopenharmony_ci 2662bf215546Sopenharmony_ci tu_hash_stage(&ctx, stage, key); 2663bf215546Sopenharmony_ci 2664bf215546Sopenharmony_ci tu_hash_compiler(&ctx, compiler); 2665bf215546Sopenharmony_ci _mesa_sha1_final(&ctx, hash); 2666bf215546Sopenharmony_ci} 2667bf215546Sopenharmony_ci 2668bf215546Sopenharmony_cistatic bool 2669bf215546Sopenharmony_citu_shaders_serialize(struct vk_pipeline_cache_object *object, 2670bf215546Sopenharmony_ci struct blob *blob); 2671bf215546Sopenharmony_ci 2672bf215546Sopenharmony_cistatic struct vk_pipeline_cache_object * 2673bf215546Sopenharmony_citu_shaders_deserialize(struct vk_device *device, 2674bf215546Sopenharmony_ci const void *key_data, size_t key_size, 2675bf215546Sopenharmony_ci struct blob_reader *blob); 2676bf215546Sopenharmony_ci 2677bf215546Sopenharmony_cistatic void 2678bf215546Sopenharmony_citu_shaders_destroy(struct vk_pipeline_cache_object *object) 2679bf215546Sopenharmony_ci{ 2680bf215546Sopenharmony_ci struct tu_compiled_shaders *shaders = 2681bf215546Sopenharmony_ci container_of(object, struct tu_compiled_shaders, base); 2682bf215546Sopenharmony_ci 2683bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) 2684bf215546Sopenharmony_ci ralloc_free(shaders->variants[i]); 2685bf215546Sopenharmony_ci 2686bf215546Sopenharmony_ci vk_pipeline_cache_object_finish(&shaders->base); 2687bf215546Sopenharmony_ci vk_free(&object->device->alloc, shaders); 2688bf215546Sopenharmony_ci} 2689bf215546Sopenharmony_ci 2690bf215546Sopenharmony_ciconst struct vk_pipeline_cache_object_ops tu_shaders_ops = { 2691bf215546Sopenharmony_ci .serialize = tu_shaders_serialize, 2692bf215546Sopenharmony_ci .deserialize = tu_shaders_deserialize, 2693bf215546Sopenharmony_ci .destroy = tu_shaders_destroy, 2694bf215546Sopenharmony_ci}; 2695bf215546Sopenharmony_ci 2696bf215546Sopenharmony_cistatic struct tu_compiled_shaders * 2697bf215546Sopenharmony_citu_shaders_init(struct tu_device *dev, const void *key_data, size_t key_size) 2698bf215546Sopenharmony_ci{ 2699bf215546Sopenharmony_ci VK_MULTIALLOC(ma); 2700bf215546Sopenharmony_ci VK_MULTIALLOC_DECL(&ma, struct tu_compiled_shaders, shaders, 1); 2701bf215546Sopenharmony_ci VK_MULTIALLOC_DECL_SIZE(&ma, void, obj_key_data, key_size); 2702bf215546Sopenharmony_ci 2703bf215546Sopenharmony_ci if (!vk_multialloc_zalloc(&ma, &dev->vk.alloc, 2704bf215546Sopenharmony_ci VK_SYSTEM_ALLOCATION_SCOPE_DEVICE)) 2705bf215546Sopenharmony_ci return NULL; 2706bf215546Sopenharmony_ci 2707bf215546Sopenharmony_ci memcpy(obj_key_data, key_data, key_size); 2708bf215546Sopenharmony_ci vk_pipeline_cache_object_init(&dev->vk, &shaders->base, 2709bf215546Sopenharmony_ci &tu_shaders_ops, obj_key_data, key_size); 2710bf215546Sopenharmony_ci 2711bf215546Sopenharmony_ci return shaders; 2712bf215546Sopenharmony_ci} 2713bf215546Sopenharmony_ci 2714bf215546Sopenharmony_cistatic bool 2715bf215546Sopenharmony_citu_shaders_serialize(struct vk_pipeline_cache_object *object, 2716bf215546Sopenharmony_ci struct blob *blob) 2717bf215546Sopenharmony_ci{ 2718bf215546Sopenharmony_ci struct tu_compiled_shaders *shaders = 2719bf215546Sopenharmony_ci container_of(object, struct tu_compiled_shaders, base); 2720bf215546Sopenharmony_ci 2721bf215546Sopenharmony_ci blob_write_bytes(blob, shaders->push_consts, sizeof(shaders->push_consts)); 2722bf215546Sopenharmony_ci blob_write_uint8(blob, shaders->active_desc_sets); 2723bf215546Sopenharmony_ci blob_write_uint8(blob, shaders->multi_pos_output); 2724bf215546Sopenharmony_ci 2725bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) { 2726bf215546Sopenharmony_ci if (shaders->variants[i]) { 2727bf215546Sopenharmony_ci blob_write_uint8(blob, 1); 2728bf215546Sopenharmony_ci ir3_store_variant(blob, shaders->variants[i]); 2729bf215546Sopenharmony_ci } else { 2730bf215546Sopenharmony_ci blob_write_uint8(blob, 0); 2731bf215546Sopenharmony_ci } 2732bf215546Sopenharmony_ci } 2733bf215546Sopenharmony_ci 2734bf215546Sopenharmony_ci return true; 2735bf215546Sopenharmony_ci} 2736bf215546Sopenharmony_ci 2737bf215546Sopenharmony_cistatic struct vk_pipeline_cache_object * 2738bf215546Sopenharmony_citu_shaders_deserialize(struct vk_device *_device, 2739bf215546Sopenharmony_ci const void *key_data, size_t key_size, 2740bf215546Sopenharmony_ci struct blob_reader *blob) 2741bf215546Sopenharmony_ci{ 2742bf215546Sopenharmony_ci struct tu_device *dev = container_of(_device, struct tu_device, vk); 2743bf215546Sopenharmony_ci struct tu_compiled_shaders *shaders = 2744bf215546Sopenharmony_ci tu_shaders_init(dev, key_data, key_size); 2745bf215546Sopenharmony_ci 2746bf215546Sopenharmony_ci if (!shaders) 2747bf215546Sopenharmony_ci return NULL; 2748bf215546Sopenharmony_ci 2749bf215546Sopenharmony_ci blob_copy_bytes(blob, shaders->push_consts, sizeof(shaders->push_consts)); 2750bf215546Sopenharmony_ci shaders->active_desc_sets = blob_read_uint8(blob); 2751bf215546Sopenharmony_ci shaders->multi_pos_output = blob_read_uint8(blob); 2752bf215546Sopenharmony_ci 2753bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(shaders->variants); i++) { 2754bf215546Sopenharmony_ci bool has_shader = blob_read_uint8(blob); 2755bf215546Sopenharmony_ci if (has_shader) { 2756bf215546Sopenharmony_ci shaders->variants[i] = ir3_retrieve_variant(blob, dev->compiler, NULL); 2757bf215546Sopenharmony_ci } 2758bf215546Sopenharmony_ci } 2759bf215546Sopenharmony_ci 2760bf215546Sopenharmony_ci return &shaders->base; 2761bf215546Sopenharmony_ci} 2762bf215546Sopenharmony_ci 2763bf215546Sopenharmony_cistatic struct tu_compiled_shaders * 2764bf215546Sopenharmony_citu_pipeline_cache_lookup(struct vk_pipeline_cache *cache, 2765bf215546Sopenharmony_ci const void *key_data, size_t key_size, 2766bf215546Sopenharmony_ci bool *application_cache_hit) 2767bf215546Sopenharmony_ci{ 2768bf215546Sopenharmony_ci struct vk_pipeline_cache_object *object = 2769bf215546Sopenharmony_ci vk_pipeline_cache_lookup_object(cache, key_data, key_size, 2770bf215546Sopenharmony_ci &tu_shaders_ops, application_cache_hit); 2771bf215546Sopenharmony_ci if (object) 2772bf215546Sopenharmony_ci return container_of(object, struct tu_compiled_shaders, base); 2773bf215546Sopenharmony_ci else 2774bf215546Sopenharmony_ci return NULL; 2775bf215546Sopenharmony_ci} 2776bf215546Sopenharmony_ci 2777bf215546Sopenharmony_cistatic struct tu_compiled_shaders * 2778bf215546Sopenharmony_citu_pipeline_cache_insert(struct vk_pipeline_cache *cache, 2779bf215546Sopenharmony_ci struct tu_compiled_shaders *shaders) 2780bf215546Sopenharmony_ci{ 2781bf215546Sopenharmony_ci struct vk_pipeline_cache_object *object = 2782bf215546Sopenharmony_ci vk_pipeline_cache_add_object(cache, &shaders->base); 2783bf215546Sopenharmony_ci return container_of(object, struct tu_compiled_shaders, base); 2784bf215546Sopenharmony_ci} 2785bf215546Sopenharmony_ci 2786bf215546Sopenharmony_cistatic VkResult 2787bf215546Sopenharmony_citu_pipeline_builder_compile_shaders(struct tu_pipeline_builder *builder, 2788bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 2789bf215546Sopenharmony_ci{ 2790bf215546Sopenharmony_ci VkResult result = VK_SUCCESS; 2791bf215546Sopenharmony_ci const struct ir3_compiler *compiler = builder->device->compiler; 2792bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = { 2793bf215546Sopenharmony_ci NULL 2794bf215546Sopenharmony_ci }; 2795bf215546Sopenharmony_ci VkPipelineCreationFeedback pipeline_feedback = { 2796bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 2797bf215546Sopenharmony_ci }; 2798bf215546Sopenharmony_ci VkPipelineCreationFeedback stage_feedbacks[MESA_SHADER_STAGES] = { 0 }; 2799bf215546Sopenharmony_ci 2800bf215546Sopenharmony_ci int64_t pipeline_start = os_time_get_nano(); 2801bf215546Sopenharmony_ci 2802bf215546Sopenharmony_ci const VkPipelineCreationFeedbackCreateInfo *creation_feedback = 2803bf215546Sopenharmony_ci vk_find_struct_const(builder->create_info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 2804bf215546Sopenharmony_ci 2805bf215546Sopenharmony_ci for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { 2806bf215546Sopenharmony_ci gl_shader_stage stage = 2807bf215546Sopenharmony_ci vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage); 2808bf215546Sopenharmony_ci stage_infos[stage] = &builder->create_info->pStages[i]; 2809bf215546Sopenharmony_ci } 2810bf215546Sopenharmony_ci 2811bf215546Sopenharmony_ci if (tu6_shared_constants_enable(builder->layout, builder->device->compiler)) { 2812bf215546Sopenharmony_ci pipeline->shared_consts = (struct tu_push_constant_range) { 2813bf215546Sopenharmony_ci .lo = 0, 2814bf215546Sopenharmony_ci .dwords = builder->layout->push_constant_size / 4, 2815bf215546Sopenharmony_ci }; 2816bf215546Sopenharmony_ci } 2817bf215546Sopenharmony_ci 2818bf215546Sopenharmony_ci struct tu_shader_key keys[ARRAY_SIZE(stage_infos)] = { }; 2819bf215546Sopenharmony_ci for (gl_shader_stage stage = MESA_SHADER_VERTEX; 2820bf215546Sopenharmony_ci stage < ARRAY_SIZE(keys); stage++) { 2821bf215546Sopenharmony_ci tu_shader_key_init(&keys[stage], stage_infos[stage], builder->device); 2822bf215546Sopenharmony_ci } 2823bf215546Sopenharmony_ci 2824bf215546Sopenharmony_ci struct ir3_shader_key ir3_key = {}; 2825bf215546Sopenharmony_ci tu_pipeline_shader_key_init(&ir3_key, pipeline, builder->create_info); 2826bf215546Sopenharmony_ci 2827bf215546Sopenharmony_ci keys[MESA_SHADER_VERTEX].multiview_mask = builder->multiview_mask; 2828bf215546Sopenharmony_ci keys[MESA_SHADER_FRAGMENT].multiview_mask = builder->multiview_mask; 2829bf215546Sopenharmony_ci keys[MESA_SHADER_FRAGMENT].force_sample_interp = ir3_key.sample_shading; 2830bf215546Sopenharmony_ci 2831bf215546Sopenharmony_ci unsigned char pipeline_sha1[20]; 2832bf215546Sopenharmony_ci tu_hash_shaders(pipeline_sha1, stage_infos, builder->layout, keys, &ir3_key, compiler); 2833bf215546Sopenharmony_ci 2834bf215546Sopenharmony_ci const bool executable_info = builder->create_info->flags & 2835bf215546Sopenharmony_ci VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; 2836bf215546Sopenharmony_ci 2837bf215546Sopenharmony_ci char *nir_initial_disasm[ARRAY_SIZE(stage_infos)] = { NULL }; 2838bf215546Sopenharmony_ci 2839bf215546Sopenharmony_ci struct tu_compiled_shaders *compiled_shaders; 2840bf215546Sopenharmony_ci 2841bf215546Sopenharmony_ci if (!executable_info) { 2842bf215546Sopenharmony_ci bool application_cache_hit = false; 2843bf215546Sopenharmony_ci 2844bf215546Sopenharmony_ci compiled_shaders = 2845bf215546Sopenharmony_ci tu_pipeline_cache_lookup(builder->cache, &pipeline_sha1, 2846bf215546Sopenharmony_ci sizeof(pipeline_sha1), 2847bf215546Sopenharmony_ci &application_cache_hit); 2848bf215546Sopenharmony_ci 2849bf215546Sopenharmony_ci if (application_cache_hit && builder->cache != builder->device->mem_cache) { 2850bf215546Sopenharmony_ci pipeline_feedback.flags |= 2851bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 2852bf215546Sopenharmony_ci } 2853bf215546Sopenharmony_ci 2854bf215546Sopenharmony_ci if (compiled_shaders) 2855bf215546Sopenharmony_ci goto done; 2856bf215546Sopenharmony_ci } 2857bf215546Sopenharmony_ci 2858bf215546Sopenharmony_ci if (builder->create_info->flags & 2859bf215546Sopenharmony_ci VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) { 2860bf215546Sopenharmony_ci return VK_PIPELINE_COMPILE_REQUIRED; 2861bf215546Sopenharmony_ci } 2862bf215546Sopenharmony_ci 2863bf215546Sopenharmony_ci nir_shader *nir[ARRAY_SIZE(stage_infos)] = { NULL }; 2864bf215546Sopenharmony_ci 2865bf215546Sopenharmony_ci struct tu_shader *shaders[ARRAY_SIZE(nir)] = { NULL }; 2866bf215546Sopenharmony_ci 2867bf215546Sopenharmony_ci for (gl_shader_stage stage = MESA_SHADER_VERTEX; 2868bf215546Sopenharmony_ci stage < ARRAY_SIZE(nir); stage++) { 2869bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage]; 2870bf215546Sopenharmony_ci if (!stage_info) 2871bf215546Sopenharmony_ci continue; 2872bf215546Sopenharmony_ci 2873bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 2874bf215546Sopenharmony_ci 2875bf215546Sopenharmony_ci nir[stage] = tu_spirv_to_nir(builder->device, builder->mem_ctx, stage_info, stage); 2876bf215546Sopenharmony_ci if (!nir[stage]) { 2877bf215546Sopenharmony_ci result = VK_ERROR_OUT_OF_HOST_MEMORY; 2878bf215546Sopenharmony_ci goto fail; 2879bf215546Sopenharmony_ci } 2880bf215546Sopenharmony_ci 2881bf215546Sopenharmony_ci stage_feedbacks[stage].flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT; 2882bf215546Sopenharmony_ci stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; 2883bf215546Sopenharmony_ci } 2884bf215546Sopenharmony_ci 2885bf215546Sopenharmony_ci if (!nir[MESA_SHADER_FRAGMENT]) { 2886bf215546Sopenharmony_ci const nir_shader_compiler_options *nir_options = 2887bf215546Sopenharmony_ci ir3_get_compiler_options(builder->device->compiler); 2888bf215546Sopenharmony_ci nir_builder fs_b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 2889bf215546Sopenharmony_ci nir_options, 2890bf215546Sopenharmony_ci "noop_fs"); 2891bf215546Sopenharmony_ci nir[MESA_SHADER_FRAGMENT] = fs_b.shader; 2892bf215546Sopenharmony_ci } 2893bf215546Sopenharmony_ci 2894bf215546Sopenharmony_ci if (executable_info) { 2895bf215546Sopenharmony_ci for (gl_shader_stage stage = MESA_SHADER_VERTEX; 2896bf215546Sopenharmony_ci stage < ARRAY_SIZE(nir); stage++) { 2897bf215546Sopenharmony_ci if (!nir[stage]) 2898bf215546Sopenharmony_ci continue; 2899bf215546Sopenharmony_ci 2900bf215546Sopenharmony_ci nir_initial_disasm[stage] = 2901bf215546Sopenharmony_ci nir_shader_as_str(nir[stage], pipeline->executables_mem_ctx); 2902bf215546Sopenharmony_ci } 2903bf215546Sopenharmony_ci } 2904bf215546Sopenharmony_ci 2905bf215546Sopenharmony_ci tu_link_shaders(builder, nir, ARRAY_SIZE(nir)); 2906bf215546Sopenharmony_ci 2907bf215546Sopenharmony_ci uint32_t desc_sets = 0; 2908bf215546Sopenharmony_ci for (gl_shader_stage stage = MESA_SHADER_VERTEX; 2909bf215546Sopenharmony_ci stage < ARRAY_SIZE(nir); stage++) { 2910bf215546Sopenharmony_ci if (!nir[stage]) 2911bf215546Sopenharmony_ci continue; 2912bf215546Sopenharmony_ci 2913bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 2914bf215546Sopenharmony_ci 2915bf215546Sopenharmony_ci struct tu_shader *shader = 2916bf215546Sopenharmony_ci tu_shader_create(builder->device, nir[stage], &keys[stage], 2917bf215546Sopenharmony_ci builder->layout, builder->alloc); 2918bf215546Sopenharmony_ci if (!shader) { 2919bf215546Sopenharmony_ci result = VK_ERROR_OUT_OF_HOST_MEMORY; 2920bf215546Sopenharmony_ci goto fail; 2921bf215546Sopenharmony_ci } 2922bf215546Sopenharmony_ci 2923bf215546Sopenharmony_ci /* In SPIR-V generated from GLSL, the primitive mode is specified in the 2924bf215546Sopenharmony_ci * tessellation evaluation shader, but in SPIR-V generated from HLSL, 2925bf215546Sopenharmony_ci * the mode is specified in the tessellation control shader. */ 2926bf215546Sopenharmony_ci if ((stage == MESA_SHADER_TESS_EVAL || stage == MESA_SHADER_TESS_CTRL) && 2927bf215546Sopenharmony_ci ir3_key.tessellation == IR3_TESS_NONE) { 2928bf215546Sopenharmony_ci ir3_key.tessellation = tu6_get_tessmode(shader); 2929bf215546Sopenharmony_ci } 2930bf215546Sopenharmony_ci 2931bf215546Sopenharmony_ci if (stage > MESA_SHADER_TESS_CTRL) { 2932bf215546Sopenharmony_ci if (stage == MESA_SHADER_FRAGMENT) { 2933bf215546Sopenharmony_ci ir3_key.tcs_store_primid = ir3_key.tcs_store_primid || 2934bf215546Sopenharmony_ci (nir[stage]->info.inputs_read & (1ull << VARYING_SLOT_PRIMITIVE_ID)); 2935bf215546Sopenharmony_ci } else { 2936bf215546Sopenharmony_ci ir3_key.tcs_store_primid = ir3_key.tcs_store_primid || 2937bf215546Sopenharmony_ci BITSET_TEST(nir[stage]->info.system_values_read, SYSTEM_VALUE_PRIMITIVE_ID); 2938bf215546Sopenharmony_ci } 2939bf215546Sopenharmony_ci } 2940bf215546Sopenharmony_ci 2941bf215546Sopenharmony_ci /* Keep track of the status of each shader's active descriptor sets, 2942bf215546Sopenharmony_ci * which is set in tu_lower_io. */ 2943bf215546Sopenharmony_ci desc_sets |= shader->active_desc_sets; 2944bf215546Sopenharmony_ci 2945bf215546Sopenharmony_ci shaders[stage] = shader; 2946bf215546Sopenharmony_ci 2947bf215546Sopenharmony_ci stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; 2948bf215546Sopenharmony_ci } 2949bf215546Sopenharmony_ci 2950bf215546Sopenharmony_ci struct tu_shader *last_shader = shaders[MESA_SHADER_GEOMETRY]; 2951bf215546Sopenharmony_ci if (!last_shader) 2952bf215546Sopenharmony_ci last_shader = shaders[MESA_SHADER_TESS_EVAL]; 2953bf215546Sopenharmony_ci if (!last_shader) 2954bf215546Sopenharmony_ci last_shader = shaders[MESA_SHADER_VERTEX]; 2955bf215546Sopenharmony_ci 2956bf215546Sopenharmony_ci uint64_t outputs_written = last_shader->ir3_shader->nir->info.outputs_written; 2957bf215546Sopenharmony_ci 2958bf215546Sopenharmony_ci ir3_key.layer_zero = !(outputs_written & VARYING_BIT_LAYER); 2959bf215546Sopenharmony_ci ir3_key.view_zero = !(outputs_written & VARYING_BIT_VIEWPORT); 2960bf215546Sopenharmony_ci 2961bf215546Sopenharmony_ci compiled_shaders = 2962bf215546Sopenharmony_ci tu_shaders_init(builder->device, &pipeline_sha1, sizeof(pipeline_sha1)); 2963bf215546Sopenharmony_ci 2964bf215546Sopenharmony_ci if (!compiled_shaders) { 2965bf215546Sopenharmony_ci result = VK_ERROR_OUT_OF_HOST_MEMORY; 2966bf215546Sopenharmony_ci goto fail; 2967bf215546Sopenharmony_ci } 2968bf215546Sopenharmony_ci 2969bf215546Sopenharmony_ci compiled_shaders->active_desc_sets = desc_sets; 2970bf215546Sopenharmony_ci compiled_shaders->multi_pos_output = 2971bf215546Sopenharmony_ci shaders[MESA_SHADER_VERTEX]->multi_pos_output; 2972bf215546Sopenharmony_ci 2973bf215546Sopenharmony_ci for (gl_shader_stage stage = MESA_SHADER_VERTEX; 2974bf215546Sopenharmony_ci stage < ARRAY_SIZE(shaders); stage++) { 2975bf215546Sopenharmony_ci if (!shaders[stage]) 2976bf215546Sopenharmony_ci continue; 2977bf215546Sopenharmony_ci 2978bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 2979bf215546Sopenharmony_ci 2980bf215546Sopenharmony_ci compiled_shaders->variants[stage] = 2981bf215546Sopenharmony_ci ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key, 2982bf215546Sopenharmony_ci executable_info); 2983bf215546Sopenharmony_ci if (!compiled_shaders->variants[stage]) 2984bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 2985bf215546Sopenharmony_ci 2986bf215546Sopenharmony_ci compiled_shaders->push_consts[stage] = shaders[stage]->push_consts; 2987bf215546Sopenharmony_ci 2988bf215546Sopenharmony_ci stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; 2989bf215546Sopenharmony_ci } 2990bf215546Sopenharmony_ci 2991bf215546Sopenharmony_ci uint32_t safe_constlens = ir3_trim_constlen(compiled_shaders->variants, compiler); 2992bf215546Sopenharmony_ci 2993bf215546Sopenharmony_ci ir3_key.safe_constlen = true; 2994bf215546Sopenharmony_ci 2995bf215546Sopenharmony_ci for (gl_shader_stage stage = MESA_SHADER_VERTEX; 2996bf215546Sopenharmony_ci stage < ARRAY_SIZE(shaders); stage++) { 2997bf215546Sopenharmony_ci if (!shaders[stage]) 2998bf215546Sopenharmony_ci continue; 2999bf215546Sopenharmony_ci 3000bf215546Sopenharmony_ci if (safe_constlens & (1 << stage)) { 3001bf215546Sopenharmony_ci int64_t stage_start = os_time_get_nano(); 3002bf215546Sopenharmony_ci 3003bf215546Sopenharmony_ci ralloc_free(compiled_shaders->variants[stage]); 3004bf215546Sopenharmony_ci compiled_shaders->variants[stage] = 3005bf215546Sopenharmony_ci ir3_shader_create_variant(shaders[stage]->ir3_shader, &ir3_key, 3006bf215546Sopenharmony_ci executable_info); 3007bf215546Sopenharmony_ci if (!compiled_shaders->variants[stage]) { 3008bf215546Sopenharmony_ci result = VK_ERROR_OUT_OF_HOST_MEMORY; 3009bf215546Sopenharmony_ci goto fail; 3010bf215546Sopenharmony_ci } 3011bf215546Sopenharmony_ci 3012bf215546Sopenharmony_ci stage_feedbacks[stage].duration += os_time_get_nano() - stage_start; 3013bf215546Sopenharmony_ci } 3014bf215546Sopenharmony_ci } 3015bf215546Sopenharmony_ci 3016bf215546Sopenharmony_ci for (gl_shader_stage stage = MESA_SHADER_VERTEX; 3017bf215546Sopenharmony_ci stage < ARRAY_SIZE(nir); stage++) { 3018bf215546Sopenharmony_ci if (shaders[stage]) { 3019bf215546Sopenharmony_ci tu_shader_destroy(builder->device, shaders[stage], builder->alloc); 3020bf215546Sopenharmony_ci } 3021bf215546Sopenharmony_ci } 3022bf215546Sopenharmony_ci 3023bf215546Sopenharmony_ci compiled_shaders = 3024bf215546Sopenharmony_ci tu_pipeline_cache_insert(builder->cache, compiled_shaders); 3025bf215546Sopenharmony_ci 3026bf215546Sopenharmony_cidone: 3027bf215546Sopenharmony_ci for (gl_shader_stage stage = MESA_SHADER_VERTEX; 3028bf215546Sopenharmony_ci stage < ARRAY_SIZE(nir); stage++) { 3029bf215546Sopenharmony_ci if (compiled_shaders->variants[stage]) { 3030bf215546Sopenharmony_ci tu_append_executable(pipeline, compiled_shaders->variants[stage], 3031bf215546Sopenharmony_ci nir_initial_disasm[stage]); 3032bf215546Sopenharmony_ci } 3033bf215546Sopenharmony_ci } 3034bf215546Sopenharmony_ci 3035bf215546Sopenharmony_ci struct ir3_shader_variant *vs = 3036bf215546Sopenharmony_ci compiled_shaders->variants[MESA_SHADER_VERTEX]; 3037bf215546Sopenharmony_ci 3038bf215546Sopenharmony_ci struct ir3_shader_variant *variant; 3039bf215546Sopenharmony_ci if (!vs->stream_output.num_outputs && ir3_has_binning_vs(&vs->key)) { 3040bf215546Sopenharmony_ci tu_append_executable(pipeline, vs->binning, NULL); 3041bf215546Sopenharmony_ci variant = vs->binning; 3042bf215546Sopenharmony_ci } else { 3043bf215546Sopenharmony_ci variant = vs; 3044bf215546Sopenharmony_ci } 3045bf215546Sopenharmony_ci 3046bf215546Sopenharmony_ci builder->binning_variant = variant; 3047bf215546Sopenharmony_ci 3048bf215546Sopenharmony_ci builder->shaders = compiled_shaders; 3049bf215546Sopenharmony_ci 3050bf215546Sopenharmony_ci pipeline->active_desc_sets = compiled_shaders->active_desc_sets; 3051bf215546Sopenharmony_ci if (compiled_shaders->variants[MESA_SHADER_TESS_CTRL]) { 3052bf215546Sopenharmony_ci pipeline->tess.patch_type = 3053bf215546Sopenharmony_ci compiled_shaders->variants[MESA_SHADER_TESS_CTRL]->key.tessellation; 3054bf215546Sopenharmony_ci } 3055bf215546Sopenharmony_ci 3056bf215546Sopenharmony_ci pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 3057bf215546Sopenharmony_ci if (creation_feedback) { 3058bf215546Sopenharmony_ci *creation_feedback->pPipelineCreationFeedback = pipeline_feedback; 3059bf215546Sopenharmony_ci 3060bf215546Sopenharmony_ci assert(builder->create_info->stageCount == 3061bf215546Sopenharmony_ci creation_feedback->pipelineStageCreationFeedbackCount); 3062bf215546Sopenharmony_ci for (uint32_t i = 0; i < builder->create_info->stageCount; i++) { 3063bf215546Sopenharmony_ci gl_shader_stage s = 3064bf215546Sopenharmony_ci vk_to_mesa_shader_stage(builder->create_info->pStages[i].stage); 3065bf215546Sopenharmony_ci creation_feedback->pPipelineStageCreationFeedbacks[i] = stage_feedbacks[s]; 3066bf215546Sopenharmony_ci } 3067bf215546Sopenharmony_ci } 3068bf215546Sopenharmony_ci 3069bf215546Sopenharmony_ci return VK_SUCCESS; 3070bf215546Sopenharmony_ci 3071bf215546Sopenharmony_cifail: 3072bf215546Sopenharmony_ci for (gl_shader_stage stage = MESA_SHADER_VERTEX; 3073bf215546Sopenharmony_ci stage < ARRAY_SIZE(nir); stage++) { 3074bf215546Sopenharmony_ci if (shaders[stage]) { 3075bf215546Sopenharmony_ci tu_shader_destroy(builder->device, shaders[stage], builder->alloc); 3076bf215546Sopenharmony_ci } 3077bf215546Sopenharmony_ci } 3078bf215546Sopenharmony_ci 3079bf215546Sopenharmony_ci if (compiled_shaders) 3080bf215546Sopenharmony_ci vk_pipeline_cache_object_unref(&compiled_shaders->base); 3081bf215546Sopenharmony_ci 3082bf215546Sopenharmony_ci return result; 3083bf215546Sopenharmony_ci} 3084bf215546Sopenharmony_ci 3085bf215546Sopenharmony_cistatic void 3086bf215546Sopenharmony_citu_pipeline_builder_parse_dynamic(struct tu_pipeline_builder *builder, 3087bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 3088bf215546Sopenharmony_ci{ 3089bf215546Sopenharmony_ci const VkPipelineDynamicStateCreateInfo *dynamic_info = 3090bf215546Sopenharmony_ci builder->create_info->pDynamicState; 3091bf215546Sopenharmony_ci 3092bf215546Sopenharmony_ci pipeline->gras_su_cntl_mask = ~0u; 3093bf215546Sopenharmony_ci pipeline->rb_depth_cntl_mask = ~0u; 3094bf215546Sopenharmony_ci pipeline->rb_stencil_cntl_mask = ~0u; 3095bf215546Sopenharmony_ci pipeline->pc_raster_cntl_mask = ~0u; 3096bf215546Sopenharmony_ci pipeline->vpc_unknown_9107_mask = ~0u; 3097bf215546Sopenharmony_ci pipeline->sp_blend_cntl_mask = ~0u; 3098bf215546Sopenharmony_ci pipeline->rb_blend_cntl_mask = ~0u; 3099bf215546Sopenharmony_ci pipeline->rb_mrt_control_mask = ~0u; 3100bf215546Sopenharmony_ci 3101bf215546Sopenharmony_ci if (!dynamic_info) 3102bf215546Sopenharmony_ci return; 3103bf215546Sopenharmony_ci 3104bf215546Sopenharmony_ci for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) { 3105bf215546Sopenharmony_ci VkDynamicState state = dynamic_info->pDynamicStates[i]; 3106bf215546Sopenharmony_ci switch (state) { 3107bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE: 3108bf215546Sopenharmony_ci if (state == VK_DYNAMIC_STATE_LINE_WIDTH) 3109bf215546Sopenharmony_ci pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_LINEHALFWIDTH__MASK; 3110bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(state); 3111bf215546Sopenharmony_ci break; 3112bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT: 3113bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_SAMPLE_LOCATIONS); 3114bf215546Sopenharmony_ci break; 3115bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_CULL_MODE: 3116bf215546Sopenharmony_ci pipeline->gras_su_cntl_mask &= 3117bf215546Sopenharmony_ci ~(A6XX_GRAS_SU_CNTL_CULL_BACK | A6XX_GRAS_SU_CNTL_CULL_FRONT); 3118bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL); 3119bf215546Sopenharmony_ci break; 3120bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_FRONT_FACE: 3121bf215546Sopenharmony_ci pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_FRONT_CW; 3122bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL); 3123bf215546Sopenharmony_ci break; 3124bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY: 3125bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_TOPOLOGY); 3126bf215546Sopenharmony_ci break; 3127bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_VERTEX_INPUT_BINDING_STRIDE: 3128bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_VB_STRIDE); 3129bf215546Sopenharmony_ci break; 3130bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT: 3131bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_VIEWPORT); 3132bf215546Sopenharmony_ci break; 3133bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT: 3134bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(VK_DYNAMIC_STATE_SCISSOR); 3135bf215546Sopenharmony_ci break; 3136bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_TEST_ENABLE: 3137bf215546Sopenharmony_ci pipeline->rb_depth_cntl_mask &= 3138bf215546Sopenharmony_ci ~(A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE); 3139bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL); 3140bf215546Sopenharmony_ci break; 3141bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_WRITE_ENABLE: 3142bf215546Sopenharmony_ci pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; 3143bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL); 3144bf215546Sopenharmony_ci break; 3145bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_COMPARE_OP: 3146bf215546Sopenharmony_ci pipeline->rb_depth_cntl_mask &= ~A6XX_RB_DEPTH_CNTL_ZFUNC__MASK; 3147bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL); 3148bf215546Sopenharmony_ci break; 3149bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_BOUNDS_TEST_ENABLE: 3150bf215546Sopenharmony_ci pipeline->rb_depth_cntl_mask &= 3151bf215546Sopenharmony_ci ~(A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE); 3152bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL); 3153bf215546Sopenharmony_ci break; 3154bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_STENCIL_TEST_ENABLE: 3155bf215546Sopenharmony_ci pipeline->rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | 3156bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | 3157bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_STENCIL_READ); 3158bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL); 3159bf215546Sopenharmony_ci break; 3160bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_STENCIL_OP: 3161bf215546Sopenharmony_ci pipeline->rb_stencil_cntl_mask &= ~(A6XX_RB_STENCIL_CONTROL_FUNC__MASK | 3162bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_FAIL__MASK | 3163bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_ZPASS__MASK | 3164bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_ZFAIL__MASK | 3165bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_FUNC_BF__MASK | 3166bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_FAIL_BF__MASK | 3167bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_ZPASS_BF__MASK | 3168bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_ZFAIL_BF__MASK); 3169bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RB_STENCIL_CNTL); 3170bf215546Sopenharmony_ci break; 3171bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_DEPTH_BIAS_ENABLE: 3172bf215546Sopenharmony_ci pipeline->gras_su_cntl_mask &= ~A6XX_GRAS_SU_CNTL_POLY_OFFSET; 3173bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_GRAS_SU_CNTL); 3174bf215546Sopenharmony_ci break; 3175bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE: 3176bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_PRIMITIVE_RESTART_ENABLE); 3177bf215546Sopenharmony_ci break; 3178bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE: 3179bf215546Sopenharmony_ci pipeline->pc_raster_cntl_mask &= ~A6XX_PC_RASTER_CNTL_DISCARD; 3180bf215546Sopenharmony_ci pipeline->vpc_unknown_9107_mask &= ~A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD; 3181bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_RASTERIZER_DISCARD); 3182bf215546Sopenharmony_ci break; 3183bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_LOGIC_OP_EXT: 3184bf215546Sopenharmony_ci pipeline->sp_blend_cntl_mask &= ~A6XX_SP_BLEND_CNTL_ENABLE_BLEND__MASK; 3185bf215546Sopenharmony_ci pipeline->rb_blend_cntl_mask &= ~A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; 3186bf215546Sopenharmony_ci pipeline->rb_mrt_control_mask &= ~A6XX_RB_MRT_CONTROL_ROP_CODE__MASK; 3187bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_BLEND); 3188bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_LOGIC_OP); 3189bf215546Sopenharmony_ci break; 3190bf215546Sopenharmony_ci case VK_DYNAMIC_STATE_COLOR_WRITE_ENABLE_EXT: 3191bf215546Sopenharmony_ci pipeline->sp_blend_cntl_mask &= ~A6XX_SP_BLEND_CNTL_ENABLE_BLEND__MASK; 3192bf215546Sopenharmony_ci pipeline->rb_blend_cntl_mask &= ~A6XX_RB_BLEND_CNTL_ENABLE_BLEND__MASK; 3193bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_BLEND); 3194bf215546Sopenharmony_ci 3195bf215546Sopenharmony_ci /* Dynamic color write enable doesn't directly change any of the 3196bf215546Sopenharmony_ci * registers, but it causes us to make some of the registers 0, so we 3197bf215546Sopenharmony_ci * set this dynamic state instead of making the register dynamic. 3198bf215546Sopenharmony_ci */ 3199bf215546Sopenharmony_ci pipeline->dynamic_state_mask |= BIT(TU_DYNAMIC_STATE_COLOR_WRITE_ENABLE); 3200bf215546Sopenharmony_ci break; 3201bf215546Sopenharmony_ci default: 3202bf215546Sopenharmony_ci assert(!"unsupported dynamic state"); 3203bf215546Sopenharmony_ci break; 3204bf215546Sopenharmony_ci } 3205bf215546Sopenharmony_ci } 3206bf215546Sopenharmony_ci} 3207bf215546Sopenharmony_ci 3208bf215546Sopenharmony_cistatic void 3209bf215546Sopenharmony_citu_pipeline_set_linkage(struct tu_program_descriptor_linkage *link, 3210bf215546Sopenharmony_ci struct tu_push_constant_range *push_consts, 3211bf215546Sopenharmony_ci struct ir3_shader_variant *v) 3212bf215546Sopenharmony_ci{ 3213bf215546Sopenharmony_ci link->const_state = *ir3_const_state(v); 3214bf215546Sopenharmony_ci link->constlen = v->constlen; 3215bf215546Sopenharmony_ci link->push_consts = *push_consts; 3216bf215546Sopenharmony_ci} 3217bf215546Sopenharmony_ci 3218bf215546Sopenharmony_cistatic void 3219bf215546Sopenharmony_citu_pipeline_builder_parse_shader_stages(struct tu_pipeline_builder *builder, 3220bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 3221bf215546Sopenharmony_ci{ 3222bf215546Sopenharmony_ci struct tu_cs prog_cs; 3223bf215546Sopenharmony_ci 3224bf215546Sopenharmony_ci /* Emit HLSQ_xS_CNTL/HLSQ_SP_xS_CONFIG *first*, before emitting anything 3225bf215546Sopenharmony_ci * else that could depend on that state (like push constants) 3226bf215546Sopenharmony_ci * 3227bf215546Sopenharmony_ci * Note also that this always uses the full VS even in binning pass. The 3228bf215546Sopenharmony_ci * binning pass variant has the same const layout as the full VS, and 3229bf215546Sopenharmony_ci * the constlen for the VS will be the same or greater than the constlen 3230bf215546Sopenharmony_ci * for the binning pass variant. It is required that the constlen state 3231bf215546Sopenharmony_ci * matches between binning and draw passes, as some parts of the push 3232bf215546Sopenharmony_ci * consts are emitted in state groups that are shared between the binning 3233bf215546Sopenharmony_ci * and draw passes. 3234bf215546Sopenharmony_ci */ 3235bf215546Sopenharmony_ci tu_cs_begin_sub_stream(&pipeline->cs, 512, &prog_cs); 3236bf215546Sopenharmony_ci tu6_emit_program_config(&prog_cs, builder); 3237bf215546Sopenharmony_ci pipeline->program.config_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); 3238bf215546Sopenharmony_ci 3239bf215546Sopenharmony_ci tu_cs_begin_sub_stream(&pipeline->cs, 512 + builder->additional_cs_reserve_size, &prog_cs); 3240bf215546Sopenharmony_ci tu6_emit_program(&prog_cs, builder, false, pipeline); 3241bf215546Sopenharmony_ci pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); 3242bf215546Sopenharmony_ci 3243bf215546Sopenharmony_ci tu_cs_begin_sub_stream(&pipeline->cs, 512 + builder->additional_cs_reserve_size, &prog_cs); 3244bf215546Sopenharmony_ci tu6_emit_program(&prog_cs, builder, true, pipeline); 3245bf215546Sopenharmony_ci pipeline->program.binning_state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); 3246bf215546Sopenharmony_ci 3247bf215546Sopenharmony_ci VkShaderStageFlags stages = 0; 3248bf215546Sopenharmony_ci for (unsigned i = 0; i < builder->create_info->stageCount; i++) { 3249bf215546Sopenharmony_ci stages |= builder->create_info->pStages[i].stage; 3250bf215546Sopenharmony_ci } 3251bf215546Sopenharmony_ci pipeline->active_stages = stages; 3252bf215546Sopenharmony_ci 3253bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { 3254bf215546Sopenharmony_ci if (!builder->shaders->variants[i]) 3255bf215546Sopenharmony_ci continue; 3256bf215546Sopenharmony_ci 3257bf215546Sopenharmony_ci tu_pipeline_set_linkage(&pipeline->program.link[i], 3258bf215546Sopenharmony_ci &builder->shaders->push_consts[i], 3259bf215546Sopenharmony_ci builder->shaders->variants[i]); 3260bf215546Sopenharmony_ci } 3261bf215546Sopenharmony_ci} 3262bf215546Sopenharmony_ci 3263bf215546Sopenharmony_cistatic void 3264bf215546Sopenharmony_citu_pipeline_builder_parse_vertex_input(struct tu_pipeline_builder *builder, 3265bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 3266bf215546Sopenharmony_ci{ 3267bf215546Sopenharmony_ci const VkPipelineVertexInputStateCreateInfo *vi_info = 3268bf215546Sopenharmony_ci builder->create_info->pVertexInputState; 3269bf215546Sopenharmony_ci const struct ir3_shader_variant *vs = builder->shaders->variants[MESA_SHADER_VERTEX]; 3270bf215546Sopenharmony_ci const struct ir3_shader_variant *bs = builder->binning_variant; 3271bf215546Sopenharmony_ci 3272bf215546Sopenharmony_ci /* Bindings may contain holes */ 3273bf215546Sopenharmony_ci for (unsigned i = 0; i < vi_info->vertexBindingDescriptionCount; i++) { 3274bf215546Sopenharmony_ci pipeline->num_vbs = 3275bf215546Sopenharmony_ci MAX2(pipeline->num_vbs, vi_info->pVertexBindingDescriptions[i].binding + 1); 3276bf215546Sopenharmony_ci } 3277bf215546Sopenharmony_ci 3278bf215546Sopenharmony_ci tu6_emit_vertex_input(pipeline, &pipeline->vi.state, vs, vi_info); 3279bf215546Sopenharmony_ci if (bs) 3280bf215546Sopenharmony_ci tu6_emit_vertex_input(pipeline, &pipeline->vi.binning_state, bs, vi_info); 3281bf215546Sopenharmony_ci} 3282bf215546Sopenharmony_ci 3283bf215546Sopenharmony_cistatic void 3284bf215546Sopenharmony_citu_pipeline_builder_parse_input_assembly(struct tu_pipeline_builder *builder, 3285bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 3286bf215546Sopenharmony_ci{ 3287bf215546Sopenharmony_ci const VkPipelineInputAssemblyStateCreateInfo *ia_info = 3288bf215546Sopenharmony_ci builder->create_info->pInputAssemblyState; 3289bf215546Sopenharmony_ci 3290bf215546Sopenharmony_ci pipeline->ia.primtype = tu6_primtype(ia_info->topology); 3291bf215546Sopenharmony_ci pipeline->ia.primitive_restart = ia_info->primitiveRestartEnable; 3292bf215546Sopenharmony_ci} 3293bf215546Sopenharmony_ci 3294bf215546Sopenharmony_cistatic bool 3295bf215546Sopenharmony_citu_pipeline_static_state(struct tu_pipeline *pipeline, struct tu_cs *cs, 3296bf215546Sopenharmony_ci uint32_t id, uint32_t size) 3297bf215546Sopenharmony_ci{ 3298bf215546Sopenharmony_ci assert(id < ARRAY_SIZE(pipeline->dynamic_state)); 3299bf215546Sopenharmony_ci 3300bf215546Sopenharmony_ci if (pipeline->dynamic_state_mask & BIT(id)) 3301bf215546Sopenharmony_ci return false; 3302bf215546Sopenharmony_ci 3303bf215546Sopenharmony_ci pipeline->dynamic_state[id] = tu_cs_draw_state(&pipeline->cs, cs, size); 3304bf215546Sopenharmony_ci return true; 3305bf215546Sopenharmony_ci} 3306bf215546Sopenharmony_ci 3307bf215546Sopenharmony_cistatic void 3308bf215546Sopenharmony_citu_pipeline_builder_parse_tessellation(struct tu_pipeline_builder *builder, 3309bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 3310bf215546Sopenharmony_ci{ 3311bf215546Sopenharmony_ci if (!(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) || 3312bf215546Sopenharmony_ci !(pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) 3313bf215546Sopenharmony_ci return; 3314bf215546Sopenharmony_ci 3315bf215546Sopenharmony_ci const VkPipelineTessellationStateCreateInfo *tess_info = 3316bf215546Sopenharmony_ci builder->create_info->pTessellationState; 3317bf215546Sopenharmony_ci 3318bf215546Sopenharmony_ci assert(pipeline->ia.primtype == DI_PT_PATCHES0); 3319bf215546Sopenharmony_ci assert(tess_info->patchControlPoints <= 32); 3320bf215546Sopenharmony_ci pipeline->ia.primtype += tess_info->patchControlPoints; 3321bf215546Sopenharmony_ci const VkPipelineTessellationDomainOriginStateCreateInfo *domain_info = 3322bf215546Sopenharmony_ci vk_find_struct_const(tess_info->pNext, PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO); 3323bf215546Sopenharmony_ci pipeline->tess.upper_left_domain_origin = !domain_info || 3324bf215546Sopenharmony_ci domain_info->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT; 3325bf215546Sopenharmony_ci const struct ir3_shader_variant *hs = builder->shaders->variants[MESA_SHADER_TESS_CTRL]; 3326bf215546Sopenharmony_ci pipeline->tess.param_stride = hs->output_size * 4; 3327bf215546Sopenharmony_ci} 3328bf215546Sopenharmony_ci 3329bf215546Sopenharmony_cistatic void 3330bf215546Sopenharmony_citu_pipeline_builder_parse_viewport(struct tu_pipeline_builder *builder, 3331bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 3332bf215546Sopenharmony_ci{ 3333bf215546Sopenharmony_ci /* The spec says: 3334bf215546Sopenharmony_ci * 3335bf215546Sopenharmony_ci * pViewportState is a pointer to an instance of the 3336bf215546Sopenharmony_ci * VkPipelineViewportStateCreateInfo structure, and is ignored if the 3337bf215546Sopenharmony_ci * pipeline has rasterization disabled." 3338bf215546Sopenharmony_ci * 3339bf215546Sopenharmony_ci * We leave the relevant registers stale in that case. 3340bf215546Sopenharmony_ci */ 3341bf215546Sopenharmony_ci if (builder->rasterizer_discard) 3342bf215546Sopenharmony_ci return; 3343bf215546Sopenharmony_ci 3344bf215546Sopenharmony_ci const VkPipelineViewportStateCreateInfo *vp_info = 3345bf215546Sopenharmony_ci builder->create_info->pViewportState; 3346bf215546Sopenharmony_ci const VkPipelineViewportDepthClipControlCreateInfoEXT *depth_clip_info = 3347bf215546Sopenharmony_ci vk_find_struct_const(vp_info->pNext, PIPELINE_VIEWPORT_DEPTH_CLIP_CONTROL_CREATE_INFO_EXT); 3348bf215546Sopenharmony_ci pipeline->z_negative_one_to_one = depth_clip_info ? depth_clip_info->negativeOneToOne : false; 3349bf215546Sopenharmony_ci 3350bf215546Sopenharmony_ci struct tu_cs cs; 3351bf215546Sopenharmony_ci 3352bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_VIEWPORT, 8 + 10 * vp_info->viewportCount)) 3353bf215546Sopenharmony_ci tu6_emit_viewport(&cs, vp_info->pViewports, vp_info->viewportCount, pipeline->z_negative_one_to_one); 3354bf215546Sopenharmony_ci 3355bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_SCISSOR, 1 + 2 * vp_info->scissorCount)) 3356bf215546Sopenharmony_ci tu6_emit_scissor(&cs, vp_info->pScissors, vp_info->scissorCount); 3357bf215546Sopenharmony_ci} 3358bf215546Sopenharmony_ci 3359bf215546Sopenharmony_cistatic void 3360bf215546Sopenharmony_citu_pipeline_builder_parse_rasterization(struct tu_pipeline_builder *builder, 3361bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 3362bf215546Sopenharmony_ci{ 3363bf215546Sopenharmony_ci const VkPipelineRasterizationStateCreateInfo *rast_info = 3364bf215546Sopenharmony_ci builder->create_info->pRasterizationState; 3365bf215546Sopenharmony_ci 3366bf215546Sopenharmony_ci enum a6xx_polygon_mode mode = tu6_polygon_mode(rast_info->polygonMode); 3367bf215546Sopenharmony_ci 3368bf215546Sopenharmony_ci builder->depth_clip_disable = rast_info->depthClampEnable; 3369bf215546Sopenharmony_ci 3370bf215546Sopenharmony_ci const VkPipelineRasterizationDepthClipStateCreateInfoEXT *depth_clip_state = 3371bf215546Sopenharmony_ci vk_find_struct_const(rast_info, PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT); 3372bf215546Sopenharmony_ci if (depth_clip_state) 3373bf215546Sopenharmony_ci builder->depth_clip_disable = !depth_clip_state->depthClipEnable; 3374bf215546Sopenharmony_ci 3375bf215546Sopenharmony_ci pipeline->line_mode = RECTANGULAR; 3376bf215546Sopenharmony_ci 3377bf215546Sopenharmony_ci if (tu6_primtype_line(pipeline->ia.primtype) || 3378bf215546Sopenharmony_ci (tu6_primtype_patches(pipeline->ia.primtype) && 3379bf215546Sopenharmony_ci pipeline->tess.patch_type == IR3_TESS_ISOLINES)) { 3380bf215546Sopenharmony_ci const VkPipelineRasterizationLineStateCreateInfoEXT *rast_line_state = 3381bf215546Sopenharmony_ci vk_find_struct_const(rast_info->pNext, 3382bf215546Sopenharmony_ci PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT); 3383bf215546Sopenharmony_ci 3384bf215546Sopenharmony_ci if (rast_line_state && rast_line_state->lineRasterizationMode == 3385bf215546Sopenharmony_ci VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT) { 3386bf215546Sopenharmony_ci pipeline->line_mode = BRESENHAM; 3387bf215546Sopenharmony_ci } 3388bf215546Sopenharmony_ci } 3389bf215546Sopenharmony_ci 3390bf215546Sopenharmony_ci struct tu_cs cs; 3391bf215546Sopenharmony_ci uint32_t cs_size = 9 + 3392bf215546Sopenharmony_ci (builder->device->physical_device->info->a6xx.has_shading_rate ? 8 : 0) + 3393bf215546Sopenharmony_ci (builder->emit_msaa_state ? 11 : 0); 3394bf215546Sopenharmony_ci pipeline->rast_state = tu_cs_draw_state(&pipeline->cs, &cs, cs_size); 3395bf215546Sopenharmony_ci 3396bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, 3397bf215546Sopenharmony_ci A6XX_GRAS_CL_CNTL( 3398bf215546Sopenharmony_ci .znear_clip_disable = builder->depth_clip_disable, 3399bf215546Sopenharmony_ci .zfar_clip_disable = builder->depth_clip_disable, 3400bf215546Sopenharmony_ci /* TODO should this be depth_clip_disable instead? */ 3401bf215546Sopenharmony_ci .unk5 = rast_info->depthClampEnable, 3402bf215546Sopenharmony_ci .zero_gb_scale_z = pipeline->z_negative_one_to_one ? 0 : 1, 3403bf215546Sopenharmony_ci .vp_clip_code_ignore = 1)); 3404bf215546Sopenharmony_ci 3405bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, 3406bf215546Sopenharmony_ci A6XX_VPC_POLYGON_MODE(mode)); 3407bf215546Sopenharmony_ci 3408bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, 3409bf215546Sopenharmony_ci A6XX_PC_POLYGON_MODE(mode)); 3410bf215546Sopenharmony_ci 3411bf215546Sopenharmony_ci /* move to hw ctx init? */ 3412bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, 3413bf215546Sopenharmony_ci A6XX_GRAS_SU_POINT_MINMAX(.min = 1.0f / 16.0f, .max = 4092.0f), 3414bf215546Sopenharmony_ci A6XX_GRAS_SU_POINT_SIZE(1.0f)); 3415bf215546Sopenharmony_ci 3416bf215546Sopenharmony_ci if (builder->device->physical_device->info->a6xx.has_shading_rate) { 3417bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A00()); 3418bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A10()); 3419bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A20()); 3420bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, A6XX_RB_UNKNOWN_8A30()); 3421bf215546Sopenharmony_ci } 3422bf215546Sopenharmony_ci 3423bf215546Sopenharmony_ci /* If samples count couldn't be devised from the subpass, we should emit it here. 3424bf215546Sopenharmony_ci * It happens when subpass doesn't use any color/depth attachment. 3425bf215546Sopenharmony_ci */ 3426bf215546Sopenharmony_ci if (builder->emit_msaa_state) 3427bf215546Sopenharmony_ci tu6_emit_msaa(&cs, builder->samples, pipeline->line_mode); 3428bf215546Sopenharmony_ci 3429bf215546Sopenharmony_ci const VkPipelineRasterizationStateStreamCreateInfoEXT *stream_info = 3430bf215546Sopenharmony_ci vk_find_struct_const(rast_info->pNext, 3431bf215546Sopenharmony_ci PIPELINE_RASTERIZATION_STATE_STREAM_CREATE_INFO_EXT); 3432bf215546Sopenharmony_ci unsigned stream = stream_info ? stream_info->rasterizationStream : 0; 3433bf215546Sopenharmony_ci 3434bf215546Sopenharmony_ci pipeline->pc_raster_cntl = A6XX_PC_RASTER_CNTL_STREAM(stream); 3435bf215546Sopenharmony_ci pipeline->vpc_unknown_9107 = 0; 3436bf215546Sopenharmony_ci if (rast_info->rasterizerDiscardEnable) { 3437bf215546Sopenharmony_ci pipeline->pc_raster_cntl |= A6XX_PC_RASTER_CNTL_DISCARD; 3438bf215546Sopenharmony_ci pipeline->vpc_unknown_9107 |= A6XX_VPC_UNKNOWN_9107_RASTER_DISCARD; 3439bf215546Sopenharmony_ci } 3440bf215546Sopenharmony_ci 3441bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RASTERIZER_DISCARD, 4)) { 3442bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, A6XX_PC_RASTER_CNTL(.dword = pipeline->pc_raster_cntl)); 3443bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, A6XX_VPC_UNKNOWN_9107(.dword = pipeline->vpc_unknown_9107)); 3444bf215546Sopenharmony_ci } 3445bf215546Sopenharmony_ci 3446bf215546Sopenharmony_ci pipeline->gras_su_cntl = 3447bf215546Sopenharmony_ci tu6_gras_su_cntl(rast_info, pipeline->line_mode, builder->multiview_mask != 0); 3448bf215546Sopenharmony_ci 3449bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_GRAS_SU_CNTL, 2)) 3450bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, A6XX_GRAS_SU_CNTL(.dword = pipeline->gras_su_cntl)); 3451bf215546Sopenharmony_ci 3452bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BIAS, 4)) { 3453bf215546Sopenharmony_ci tu6_emit_depth_bias(&cs, rast_info->depthBiasConstantFactor, 3454bf215546Sopenharmony_ci rast_info->depthBiasClamp, 3455bf215546Sopenharmony_ci rast_info->depthBiasSlopeFactor); 3456bf215546Sopenharmony_ci } 3457bf215546Sopenharmony_ci 3458bf215546Sopenharmony_ci const struct VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *provoking_vtx_state = 3459bf215546Sopenharmony_ci vk_find_struct_const(rast_info->pNext, PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT); 3460bf215546Sopenharmony_ci pipeline->provoking_vertex_last = provoking_vtx_state && 3461bf215546Sopenharmony_ci provoking_vtx_state->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT; 3462bf215546Sopenharmony_ci} 3463bf215546Sopenharmony_ci 3464bf215546Sopenharmony_cistatic void 3465bf215546Sopenharmony_citu_pipeline_builder_parse_depth_stencil(struct tu_pipeline_builder *builder, 3466bf215546Sopenharmony_ci struct tu_pipeline *pipeline) 3467bf215546Sopenharmony_ci{ 3468bf215546Sopenharmony_ci /* The spec says: 3469bf215546Sopenharmony_ci * 3470bf215546Sopenharmony_ci * pDepthStencilState is a pointer to an instance of the 3471bf215546Sopenharmony_ci * VkPipelineDepthStencilStateCreateInfo structure, and is ignored if 3472bf215546Sopenharmony_ci * the pipeline has rasterization disabled or if the subpass of the 3473bf215546Sopenharmony_ci * render pass the pipeline is created against does not use a 3474bf215546Sopenharmony_ci * depth/stencil attachment. 3475bf215546Sopenharmony_ci */ 3476bf215546Sopenharmony_ci const VkPipelineDepthStencilStateCreateInfo *ds_info = 3477bf215546Sopenharmony_ci builder->create_info->pDepthStencilState; 3478bf215546Sopenharmony_ci const enum pipe_format pipe_format = 3479bf215546Sopenharmony_ci vk_format_to_pipe_format(builder->depth_attachment_format); 3480bf215546Sopenharmony_ci uint32_t rb_depth_cntl = 0, rb_stencil_cntl = 0; 3481bf215546Sopenharmony_ci struct tu_cs cs; 3482bf215546Sopenharmony_ci 3483bf215546Sopenharmony_ci if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED && 3484bf215546Sopenharmony_ci builder->depth_attachment_format != VK_FORMAT_S8_UINT) { 3485bf215546Sopenharmony_ci if (ds_info->depthTestEnable) { 3486bf215546Sopenharmony_ci rb_depth_cntl |= 3487bf215546Sopenharmony_ci A6XX_RB_DEPTH_CNTL_Z_TEST_ENABLE | 3488bf215546Sopenharmony_ci A6XX_RB_DEPTH_CNTL_ZFUNC(tu6_compare_func(ds_info->depthCompareOp)) | 3489bf215546Sopenharmony_ci A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE; /* TODO: don't set for ALWAYS/NEVER */ 3490bf215546Sopenharmony_ci 3491bf215546Sopenharmony_ci if (builder->depth_clip_disable) 3492bf215546Sopenharmony_ci rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_CLIP_DISABLE; 3493bf215546Sopenharmony_ci 3494bf215546Sopenharmony_ci if (ds_info->depthWriteEnable) 3495bf215546Sopenharmony_ci rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_WRITE_ENABLE; 3496bf215546Sopenharmony_ci } 3497bf215546Sopenharmony_ci 3498bf215546Sopenharmony_ci if (ds_info->depthBoundsTestEnable) 3499bf215546Sopenharmony_ci rb_depth_cntl |= A6XX_RB_DEPTH_CNTL_Z_BOUNDS_ENABLE | A6XX_RB_DEPTH_CNTL_Z_READ_ENABLE; 3500bf215546Sopenharmony_ci 3501bf215546Sopenharmony_ci if (ds_info->depthBoundsTestEnable && !ds_info->depthTestEnable) 3502bf215546Sopenharmony_ci tu6_apply_depth_bounds_workaround(builder->device, &rb_depth_cntl); 3503bf215546Sopenharmony_ci 3504bf215546Sopenharmony_ci pipeline->depth_cpp_per_sample = util_format_get_component_bits( 3505bf215546Sopenharmony_ci pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 0) / 8; 3506bf215546Sopenharmony_ci } else { 3507bf215546Sopenharmony_ci /* if RB_DEPTH_CNTL is set dynamically, we need to make sure it is set 3508bf215546Sopenharmony_ci * to 0 when this pipeline is used, as enabling depth test when there 3509bf215546Sopenharmony_ci * is no depth attachment is a problem (at least for the S8_UINT case) 3510bf215546Sopenharmony_ci */ 3511bf215546Sopenharmony_ci if (pipeline->dynamic_state_mask & BIT(TU_DYNAMIC_STATE_RB_DEPTH_CNTL)) 3512bf215546Sopenharmony_ci pipeline->rb_depth_cntl_disable = true; 3513bf215546Sopenharmony_ci } 3514bf215546Sopenharmony_ci 3515bf215546Sopenharmony_ci if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) { 3516bf215546Sopenharmony_ci const VkStencilOpState *front = &ds_info->front; 3517bf215546Sopenharmony_ci const VkStencilOpState *back = &ds_info->back; 3518bf215546Sopenharmony_ci 3519bf215546Sopenharmony_ci rb_stencil_cntl |= 3520bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_FUNC(tu6_compare_func(front->compareOp)) | 3521bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_FAIL(tu6_stencil_op(front->failOp)) | 3522bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_ZPASS(tu6_stencil_op(front->passOp)) | 3523bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_ZFAIL(tu6_stencil_op(front->depthFailOp)) | 3524bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_FUNC_BF(tu6_compare_func(back->compareOp)) | 3525bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_FAIL_BF(tu6_stencil_op(back->failOp)) | 3526bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_ZPASS_BF(tu6_stencil_op(back->passOp)) | 3527bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_ZFAIL_BF(tu6_stencil_op(back->depthFailOp)); 3528bf215546Sopenharmony_ci 3529bf215546Sopenharmony_ci if (ds_info->stencilTestEnable) { 3530bf215546Sopenharmony_ci rb_stencil_cntl |= 3531bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE | 3532bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_STENCIL_ENABLE_BF | 3533bf215546Sopenharmony_ci A6XX_RB_STENCIL_CONTROL_STENCIL_READ; 3534bf215546Sopenharmony_ci } 3535bf215546Sopenharmony_ci 3536bf215546Sopenharmony_ci pipeline->stencil_cpp_per_sample = util_format_get_component_bits( 3537bf215546Sopenharmony_ci pipe_format, UTIL_FORMAT_COLORSPACE_ZS, 1) / 8; 3538bf215546Sopenharmony_ci } 3539bf215546Sopenharmony_ci 3540bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_DEPTH_CNTL, 2)) { 3541bf215546Sopenharmony_ci tu_cs_emit_pkt4(&cs, REG_A6XX_RB_DEPTH_CNTL, 1); 3542bf215546Sopenharmony_ci tu_cs_emit(&cs, rb_depth_cntl); 3543bf215546Sopenharmony_ci } 3544bf215546Sopenharmony_ci pipeline->rb_depth_cntl = rb_depth_cntl; 3545bf215546Sopenharmony_ci 3546bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_RB_STENCIL_CNTL, 2)) { 3547bf215546Sopenharmony_ci tu_cs_emit_pkt4(&cs, REG_A6XX_RB_STENCIL_CONTROL, 1); 3548bf215546Sopenharmony_ci tu_cs_emit(&cs, rb_stencil_cntl); 3549bf215546Sopenharmony_ci } 3550bf215546Sopenharmony_ci pipeline->rb_stencil_cntl = rb_stencil_cntl; 3551bf215546Sopenharmony_ci 3552bf215546Sopenharmony_ci /* the remaining draw states arent used if there is no d/s, leave them empty */ 3553bf215546Sopenharmony_ci if (builder->depth_attachment_format == VK_FORMAT_UNDEFINED) 3554bf215546Sopenharmony_ci return; 3555bf215546Sopenharmony_ci 3556bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_DEPTH_BOUNDS, 3)) { 3557bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, 3558bf215546Sopenharmony_ci A6XX_RB_Z_BOUNDS_MIN(ds_info->minDepthBounds), 3559bf215546Sopenharmony_ci A6XX_RB_Z_BOUNDS_MAX(ds_info->maxDepthBounds)); 3560bf215546Sopenharmony_ci } 3561bf215546Sopenharmony_ci 3562bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK, 2)) { 3563bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, A6XX_RB_STENCILMASK(.mask = ds_info->front.compareMask & 0xff, 3564bf215546Sopenharmony_ci .bfmask = ds_info->back.compareMask & 0xff)); 3565bf215546Sopenharmony_ci } 3566bf215546Sopenharmony_ci 3567bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_WRITE_MASK, 2)) { 3568bf215546Sopenharmony_ci update_stencil_mask(&pipeline->stencil_wrmask, VK_STENCIL_FACE_FRONT_BIT, ds_info->front.writeMask); 3569bf215546Sopenharmony_ci update_stencil_mask(&pipeline->stencil_wrmask, VK_STENCIL_FACE_BACK_BIT, ds_info->back.writeMask); 3570bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, A6XX_RB_STENCILWRMASK(.dword = pipeline->stencil_wrmask)); 3571bf215546Sopenharmony_ci } 3572bf215546Sopenharmony_ci 3573bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_STENCIL_REFERENCE, 2)) { 3574bf215546Sopenharmony_ci tu_cs_emit_regs(&cs, A6XX_RB_STENCILREF(.ref = ds_info->front.reference & 0xff, 3575bf215546Sopenharmony_ci .bfref = ds_info->back.reference & 0xff)); 3576bf215546Sopenharmony_ci } 3577bf215546Sopenharmony_ci 3578bf215546Sopenharmony_ci if (builder->shaders->variants[MESA_SHADER_FRAGMENT]) { 3579bf215546Sopenharmony_ci const struct ir3_shader_variant *fs = builder->shaders->variants[MESA_SHADER_FRAGMENT]; 3580bf215546Sopenharmony_ci if (fs->has_kill || builder->alpha_to_coverage) { 3581bf215546Sopenharmony_ci pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE; 3582bf215546Sopenharmony_ci } 3583bf215546Sopenharmony_ci if (fs->no_earlyz || fs->writes_pos) { 3584bf215546Sopenharmony_ci pipeline->lrz.force_disable_mask = TU_LRZ_FORCE_DISABLE_LRZ; 3585bf215546Sopenharmony_ci } 3586bf215546Sopenharmony_ci } 3587bf215546Sopenharmony_ci} 3588bf215546Sopenharmony_ci 3589bf215546Sopenharmony_cistatic void 3590bf215546Sopenharmony_citu_pipeline_builder_parse_multisample_and_color_blend( 3591bf215546Sopenharmony_ci struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) 3592bf215546Sopenharmony_ci{ 3593bf215546Sopenharmony_ci /* The spec says: 3594bf215546Sopenharmony_ci * 3595bf215546Sopenharmony_ci * pMultisampleState is a pointer to an instance of the 3596bf215546Sopenharmony_ci * VkPipelineMultisampleStateCreateInfo, and is ignored if the pipeline 3597bf215546Sopenharmony_ci * has rasterization disabled. 3598bf215546Sopenharmony_ci * 3599bf215546Sopenharmony_ci * Also, 3600bf215546Sopenharmony_ci * 3601bf215546Sopenharmony_ci * pColorBlendState is a pointer to an instance of the 3602bf215546Sopenharmony_ci * VkPipelineColorBlendStateCreateInfo structure, and is ignored if the 3603bf215546Sopenharmony_ci * pipeline has rasterization disabled or if the subpass of the render 3604bf215546Sopenharmony_ci * pass the pipeline is created against does not use any color 3605bf215546Sopenharmony_ci * attachments. 3606bf215546Sopenharmony_ci * 3607bf215546Sopenharmony_ci * We leave the relevant registers stale when rasterization is disabled. 3608bf215546Sopenharmony_ci */ 3609bf215546Sopenharmony_ci if (builder->rasterizer_discard) 3610bf215546Sopenharmony_ci return; 3611bf215546Sopenharmony_ci 3612bf215546Sopenharmony_ci static const VkPipelineColorBlendStateCreateInfo dummy_blend_info; 3613bf215546Sopenharmony_ci const VkPipelineMultisampleStateCreateInfo *msaa_info = 3614bf215546Sopenharmony_ci builder->create_info->pMultisampleState; 3615bf215546Sopenharmony_ci const VkPipelineColorBlendStateCreateInfo *blend_info = 3616bf215546Sopenharmony_ci builder->use_color_attachments ? builder->create_info->pColorBlendState 3617bf215546Sopenharmony_ci : &dummy_blend_info; 3618bf215546Sopenharmony_ci 3619bf215546Sopenharmony_ci struct tu_cs cs; 3620bf215546Sopenharmony_ci tu6_emit_rb_mrt_controls(pipeline, blend_info, 3621bf215546Sopenharmony_ci builder->color_attachment_formats, 3622bf215546Sopenharmony_ci &pipeline->rop_reads_dst, 3623bf215546Sopenharmony_ci &pipeline->color_bandwidth_per_sample); 3624bf215546Sopenharmony_ci 3625bf215546Sopenharmony_ci uint32_t blend_enable_mask = 3626bf215546Sopenharmony_ci pipeline->rop_reads_dst ? pipeline->color_write_enable : pipeline->blend_enable; 3627bf215546Sopenharmony_ci tu6_emit_blend_control(pipeline, blend_enable_mask, 3628bf215546Sopenharmony_ci builder->use_dual_src_blend, msaa_info); 3629bf215546Sopenharmony_ci 3630bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_BLEND, 3631bf215546Sopenharmony_ci blend_info->attachmentCount * 3 + 4)) { 3632bf215546Sopenharmony_ci tu6_emit_blend(&cs, pipeline); 3633bf215546Sopenharmony_ci assert(cs.cur == cs.end); /* validate draw state size */ 3634bf215546Sopenharmony_ci } 3635bf215546Sopenharmony_ci 3636bf215546Sopenharmony_ci /* Disable LRZ writes when blend or logic op that reads the destination is 3637bf215546Sopenharmony_ci * enabled, since the resulting pixel value from the blend-draw depends on 3638bf215546Sopenharmony_ci * an earlier draw, which LRZ in the draw pass could early-reject if the 3639bf215546Sopenharmony_ci * previous blend-enabled draw wrote LRZ. 3640bf215546Sopenharmony_ci * 3641bf215546Sopenharmony_ci * TODO: We need to disable LRZ writes only for the binning pass. 3642bf215546Sopenharmony_ci * Therefore, we need to emit it in a separate draw state. We keep 3643bf215546Sopenharmony_ci * it disabled for sysmem path as well for the moment. 3644bf215546Sopenharmony_ci */ 3645bf215546Sopenharmony_ci if (blend_enable_mask) 3646bf215546Sopenharmony_ci pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE; 3647bf215546Sopenharmony_ci 3648bf215546Sopenharmony_ci for (int i = 0; i < blend_info->attachmentCount; i++) { 3649bf215546Sopenharmony_ci VkPipelineColorBlendAttachmentState blendAttachment = blend_info->pAttachments[i]; 3650bf215546Sopenharmony_ci /* From the PoV of LRZ, having masked color channels is 3651bf215546Sopenharmony_ci * the same as having blend enabled, in that the draw will 3652bf215546Sopenharmony_ci * care about the fragments from an earlier draw. 3653bf215546Sopenharmony_ci */ 3654bf215546Sopenharmony_ci VkFormat format = builder->color_attachment_formats[i]; 3655bf215546Sopenharmony_ci unsigned mask = MASK(vk_format_get_nr_components(format)); 3656bf215546Sopenharmony_ci if (format != VK_FORMAT_UNDEFINED && 3657bf215546Sopenharmony_ci ((blendAttachment.colorWriteMask & mask) != mask || 3658bf215546Sopenharmony_ci !(pipeline->color_write_enable & BIT(i)))) { 3659bf215546Sopenharmony_ci pipeline->lrz.force_disable_mask |= TU_LRZ_FORCE_DISABLE_WRITE; 3660bf215546Sopenharmony_ci } 3661bf215546Sopenharmony_ci } 3662bf215546Sopenharmony_ci 3663bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, VK_DYNAMIC_STATE_BLEND_CONSTANTS, 5)) { 3664bf215546Sopenharmony_ci tu_cs_emit_pkt4(&cs, REG_A6XX_RB_BLEND_RED_F32, 4); 3665bf215546Sopenharmony_ci tu_cs_emit_array(&cs, (const uint32_t *) blend_info->blendConstants, 4); 3666bf215546Sopenharmony_ci } 3667bf215546Sopenharmony_ci 3668bf215546Sopenharmony_ci const struct VkPipelineSampleLocationsStateCreateInfoEXT *sample_locations = 3669bf215546Sopenharmony_ci vk_find_struct_const(msaa_info->pNext, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT); 3670bf215546Sopenharmony_ci const VkSampleLocationsInfoEXT *samp_loc = NULL; 3671bf215546Sopenharmony_ci 3672bf215546Sopenharmony_ci if (sample_locations && sample_locations->sampleLocationsEnable) 3673bf215546Sopenharmony_ci samp_loc = &sample_locations->sampleLocationsInfo; 3674bf215546Sopenharmony_ci 3675bf215546Sopenharmony_ci if (tu_pipeline_static_state(pipeline, &cs, TU_DYNAMIC_STATE_SAMPLE_LOCATIONS, 3676bf215546Sopenharmony_ci samp_loc ? 9 : 6)) { 3677bf215546Sopenharmony_ci tu6_emit_sample_locations(&cs, samp_loc); 3678bf215546Sopenharmony_ci } 3679bf215546Sopenharmony_ci} 3680bf215546Sopenharmony_ci 3681bf215546Sopenharmony_cistatic void 3682bf215546Sopenharmony_citu_pipeline_builder_parse_rasterization_order( 3683bf215546Sopenharmony_ci struct tu_pipeline_builder *builder, struct tu_pipeline *pipeline) 3684bf215546Sopenharmony_ci{ 3685bf215546Sopenharmony_ci if (builder->rasterizer_discard) 3686bf215546Sopenharmony_ci return; 3687bf215546Sopenharmony_ci 3688bf215546Sopenharmony_ci pipeline->subpass_feedback_loop_ds = builder->subpass_feedback_loop_ds; 3689bf215546Sopenharmony_ci 3690bf215546Sopenharmony_ci const VkPipelineColorBlendStateCreateInfo *blend_info = 3691bf215546Sopenharmony_ci builder->create_info->pColorBlendState; 3692bf215546Sopenharmony_ci 3693bf215546Sopenharmony_ci const VkPipelineDepthStencilStateCreateInfo *ds_info = 3694bf215546Sopenharmony_ci builder->create_info->pDepthStencilState; 3695bf215546Sopenharmony_ci 3696bf215546Sopenharmony_ci if (builder->use_color_attachments) { 3697bf215546Sopenharmony_ci pipeline->raster_order_attachment_access = 3698bf215546Sopenharmony_ci blend_info->flags & 3699bf215546Sopenharmony_ci VK_PIPELINE_COLOR_BLEND_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_BIT_ARM; 3700bf215546Sopenharmony_ci } 3701bf215546Sopenharmony_ci 3702bf215546Sopenharmony_ci if (builder->depth_attachment_format != VK_FORMAT_UNDEFINED) { 3703bf215546Sopenharmony_ci pipeline->raster_order_attachment_access |= 3704bf215546Sopenharmony_ci ds_info->flags & 3705bf215546Sopenharmony_ci (VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM | 3706bf215546Sopenharmony_ci VK_PIPELINE_DEPTH_STENCIL_STATE_CREATE_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM); 3707bf215546Sopenharmony_ci } 3708bf215546Sopenharmony_ci 3709bf215546Sopenharmony_ci if (unlikely(builder->device->physical_device->instance->debug_flags & TU_DEBUG_RAST_ORDER)) 3710bf215546Sopenharmony_ci pipeline->raster_order_attachment_access = true; 3711bf215546Sopenharmony_ci 3712bf215546Sopenharmony_ci /* VK_EXT_blend_operation_advanced would also require ordered access 3713bf215546Sopenharmony_ci * when implemented in the future. 3714bf215546Sopenharmony_ci */ 3715bf215546Sopenharmony_ci 3716bf215546Sopenharmony_ci uint32_t sysmem_prim_mode = NO_FLUSH; 3717bf215546Sopenharmony_ci uint32_t gmem_prim_mode = NO_FLUSH; 3718bf215546Sopenharmony_ci 3719bf215546Sopenharmony_ci if (pipeline->raster_order_attachment_access) { 3720bf215546Sopenharmony_ci /* VK_ARM_rasterization_order_attachment_access: 3721bf215546Sopenharmony_ci * 3722bf215546Sopenharmony_ci * This extension allow access to framebuffer attachments when used as 3723bf215546Sopenharmony_ci * both input and color attachments from one fragment to the next, 3724bf215546Sopenharmony_ci * in rasterization order, without explicit synchronization. 3725bf215546Sopenharmony_ci */ 3726bf215546Sopenharmony_ci sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE; 3727bf215546Sopenharmony_ci gmem_prim_mode = FLUSH_PER_OVERLAP; 3728bf215546Sopenharmony_ci } else { 3729bf215546Sopenharmony_ci /* If there is a feedback loop, then the shader can read the previous value 3730bf215546Sopenharmony_ci * of a pixel being written out. It can also write some components and then 3731bf215546Sopenharmony_ci * read different components without a barrier in between. This is a 3732bf215546Sopenharmony_ci * problem in sysmem mode with UBWC, because the main buffer and flags 3733bf215546Sopenharmony_ci * buffer can get out-of-sync if only one is flushed. We fix this by 3734bf215546Sopenharmony_ci * setting the SINGLE_PRIM_MODE field to the same value that the blob does 3735bf215546Sopenharmony_ci * for advanced_blend in sysmem mode if a feedback loop is detected. 3736bf215546Sopenharmony_ci */ 3737bf215546Sopenharmony_ci if (builder->subpass_feedback_loop_color || 3738bf215546Sopenharmony_ci builder->subpass_feedback_loop_ds) { 3739bf215546Sopenharmony_ci sysmem_prim_mode = FLUSH_PER_OVERLAP_AND_OVERWRITE; 3740bf215546Sopenharmony_ci } 3741bf215546Sopenharmony_ci } 3742bf215546Sopenharmony_ci 3743bf215546Sopenharmony_ci struct tu_cs cs; 3744bf215546Sopenharmony_ci 3745bf215546Sopenharmony_ci pipeline->prim_order_state_gmem = tu_cs_draw_state(&pipeline->cs, &cs, 2); 3746bf215546Sopenharmony_ci tu_cs_emit_write_reg(&cs, REG_A6XX_GRAS_SC_CNTL, 3747bf215546Sopenharmony_ci A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) | 3748bf215546Sopenharmony_ci A6XX_GRAS_SC_CNTL_SINGLE_PRIM_MODE(gmem_prim_mode)); 3749bf215546Sopenharmony_ci 3750bf215546Sopenharmony_ci pipeline->prim_order_state_sysmem = tu_cs_draw_state(&pipeline->cs, &cs, 2); 3751bf215546Sopenharmony_ci tu_cs_emit_write_reg(&cs, REG_A6XX_GRAS_SC_CNTL, 3752bf215546Sopenharmony_ci A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2) | 3753bf215546Sopenharmony_ci A6XX_GRAS_SC_CNTL_SINGLE_PRIM_MODE(sysmem_prim_mode)); 3754bf215546Sopenharmony_ci} 3755bf215546Sopenharmony_ci 3756bf215546Sopenharmony_cistatic void 3757bf215546Sopenharmony_citu_pipeline_finish(struct tu_pipeline *pipeline, 3758bf215546Sopenharmony_ci struct tu_device *dev, 3759bf215546Sopenharmony_ci const VkAllocationCallbacks *alloc) 3760bf215546Sopenharmony_ci{ 3761bf215546Sopenharmony_ci tu_cs_finish(&pipeline->cs); 3762bf215546Sopenharmony_ci pthread_mutex_lock(&dev->pipeline_mutex); 3763bf215546Sopenharmony_ci tu_suballoc_bo_free(&dev->pipeline_suballoc, &pipeline->bo); 3764bf215546Sopenharmony_ci pthread_mutex_unlock(&dev->pipeline_mutex); 3765bf215546Sopenharmony_ci 3766bf215546Sopenharmony_ci if (pipeline->pvtmem_bo) 3767bf215546Sopenharmony_ci tu_bo_finish(dev, pipeline->pvtmem_bo); 3768bf215546Sopenharmony_ci 3769bf215546Sopenharmony_ci ralloc_free(pipeline->executables_mem_ctx); 3770bf215546Sopenharmony_ci} 3771bf215546Sopenharmony_ci 3772bf215546Sopenharmony_cistatic VkResult 3773bf215546Sopenharmony_citu_pipeline_builder_build(struct tu_pipeline_builder *builder, 3774bf215546Sopenharmony_ci struct tu_pipeline **pipeline) 3775bf215546Sopenharmony_ci{ 3776bf215546Sopenharmony_ci VkResult result; 3777bf215546Sopenharmony_ci 3778bf215546Sopenharmony_ci *pipeline = vk_object_zalloc(&builder->device->vk, builder->alloc, 3779bf215546Sopenharmony_ci sizeof(**pipeline), VK_OBJECT_TYPE_PIPELINE); 3780bf215546Sopenharmony_ci if (!*pipeline) 3781bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 3782bf215546Sopenharmony_ci 3783bf215546Sopenharmony_ci (*pipeline)->executables_mem_ctx = ralloc_context(NULL); 3784bf215546Sopenharmony_ci util_dynarray_init(&(*pipeline)->executables, (*pipeline)->executables_mem_ctx); 3785bf215546Sopenharmony_ci 3786bf215546Sopenharmony_ci /* compile and upload shaders */ 3787bf215546Sopenharmony_ci result = tu_pipeline_builder_compile_shaders(builder, *pipeline); 3788bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 3789bf215546Sopenharmony_ci vk_object_free(&builder->device->vk, builder->alloc, *pipeline); 3790bf215546Sopenharmony_ci return result; 3791bf215546Sopenharmony_ci } 3792bf215546Sopenharmony_ci 3793bf215546Sopenharmony_ci result = tu_pipeline_allocate_cs(builder->device, *pipeline, 3794bf215546Sopenharmony_ci builder->layout, builder, NULL); 3795bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 3796bf215546Sopenharmony_ci vk_object_free(&builder->device->vk, builder->alloc, *pipeline); 3797bf215546Sopenharmony_ci return result; 3798bf215546Sopenharmony_ci } 3799bf215546Sopenharmony_ci 3800bf215546Sopenharmony_ci for (uint32_t i = 0; i < ARRAY_SIZE(builder->shader_iova); i++) 3801bf215546Sopenharmony_ci builder->shader_iova[i] = 3802bf215546Sopenharmony_ci tu_upload_variant(*pipeline, builder->shaders->variants[i]); 3803bf215546Sopenharmony_ci 3804bf215546Sopenharmony_ci builder->binning_vs_iova = 3805bf215546Sopenharmony_ci tu_upload_variant(*pipeline, builder->binning_variant); 3806bf215546Sopenharmony_ci 3807bf215546Sopenharmony_ci /* Setup private memory. Note that because we're sharing the same private 3808bf215546Sopenharmony_ci * memory for all stages, all stages must use the same config, or else 3809bf215546Sopenharmony_ci * fibers from one stage might overwrite fibers in another. 3810bf215546Sopenharmony_ci */ 3811bf215546Sopenharmony_ci 3812bf215546Sopenharmony_ci uint32_t pvtmem_size = 0; 3813bf215546Sopenharmony_ci bool per_wave = true; 3814bf215546Sopenharmony_ci for (uint32_t i = 0; i < ARRAY_SIZE(builder->shaders->variants); i++) { 3815bf215546Sopenharmony_ci if (builder->shaders->variants[i]) { 3816bf215546Sopenharmony_ci pvtmem_size = MAX2(pvtmem_size, builder->shaders->variants[i]->pvtmem_size); 3817bf215546Sopenharmony_ci if (!builder->shaders->variants[i]->pvtmem_per_wave) 3818bf215546Sopenharmony_ci per_wave = false; 3819bf215546Sopenharmony_ci } 3820bf215546Sopenharmony_ci } 3821bf215546Sopenharmony_ci 3822bf215546Sopenharmony_ci if (builder->binning_variant) { 3823bf215546Sopenharmony_ci pvtmem_size = MAX2(pvtmem_size, builder->binning_variant->pvtmem_size); 3824bf215546Sopenharmony_ci if (!builder->binning_variant->pvtmem_per_wave) 3825bf215546Sopenharmony_ci per_wave = false; 3826bf215546Sopenharmony_ci } 3827bf215546Sopenharmony_ci 3828bf215546Sopenharmony_ci result = tu_setup_pvtmem(builder->device, *pipeline, &builder->pvtmem, 3829bf215546Sopenharmony_ci pvtmem_size, per_wave); 3830bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 3831bf215546Sopenharmony_ci vk_object_free(&builder->device->vk, builder->alloc, *pipeline); 3832bf215546Sopenharmony_ci return result; 3833bf215546Sopenharmony_ci } 3834bf215546Sopenharmony_ci 3835bf215546Sopenharmony_ci tu_pipeline_builder_parse_dynamic(builder, *pipeline); 3836bf215546Sopenharmony_ci tu_pipeline_builder_parse_shader_stages(builder, *pipeline); 3837bf215546Sopenharmony_ci tu_pipeline_builder_parse_vertex_input(builder, *pipeline); 3838bf215546Sopenharmony_ci tu_pipeline_builder_parse_input_assembly(builder, *pipeline); 3839bf215546Sopenharmony_ci tu_pipeline_builder_parse_tessellation(builder, *pipeline); 3840bf215546Sopenharmony_ci tu_pipeline_builder_parse_viewport(builder, *pipeline); 3841bf215546Sopenharmony_ci tu_pipeline_builder_parse_rasterization(builder, *pipeline); 3842bf215546Sopenharmony_ci tu_pipeline_builder_parse_depth_stencil(builder, *pipeline); 3843bf215546Sopenharmony_ci tu_pipeline_builder_parse_multisample_and_color_blend(builder, *pipeline); 3844bf215546Sopenharmony_ci tu_pipeline_builder_parse_rasterization_order(builder, *pipeline); 3845bf215546Sopenharmony_ci tu6_emit_load_state(*pipeline, builder->layout, false); 3846bf215546Sopenharmony_ci 3847bf215546Sopenharmony_ci return VK_SUCCESS; 3848bf215546Sopenharmony_ci} 3849bf215546Sopenharmony_ci 3850bf215546Sopenharmony_cistatic void 3851bf215546Sopenharmony_citu_pipeline_builder_finish(struct tu_pipeline_builder *builder) 3852bf215546Sopenharmony_ci{ 3853bf215546Sopenharmony_ci if (builder->shaders) 3854bf215546Sopenharmony_ci vk_pipeline_cache_object_unref(&builder->shaders->base); 3855bf215546Sopenharmony_ci ralloc_free(builder->mem_ctx); 3856bf215546Sopenharmony_ci} 3857bf215546Sopenharmony_ci 3858bf215546Sopenharmony_cistatic void 3859bf215546Sopenharmony_citu_pipeline_builder_init_graphics( 3860bf215546Sopenharmony_ci struct tu_pipeline_builder *builder, 3861bf215546Sopenharmony_ci struct tu_device *dev, 3862bf215546Sopenharmony_ci struct vk_pipeline_cache *cache, 3863bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *create_info, 3864bf215546Sopenharmony_ci const VkAllocationCallbacks *alloc) 3865bf215546Sopenharmony_ci{ 3866bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_pipeline_layout, layout, create_info->layout); 3867bf215546Sopenharmony_ci 3868bf215546Sopenharmony_ci *builder = (struct tu_pipeline_builder) { 3869bf215546Sopenharmony_ci .device = dev, 3870bf215546Sopenharmony_ci .mem_ctx = ralloc_context(NULL), 3871bf215546Sopenharmony_ci .cache = cache, 3872bf215546Sopenharmony_ci .create_info = create_info, 3873bf215546Sopenharmony_ci .alloc = alloc, 3874bf215546Sopenharmony_ci .layout = layout, 3875bf215546Sopenharmony_ci }; 3876bf215546Sopenharmony_ci 3877bf215546Sopenharmony_ci bool rasterizer_discard_dynamic = false; 3878bf215546Sopenharmony_ci if (create_info->pDynamicState) { 3879bf215546Sopenharmony_ci for (uint32_t i = 0; i < create_info->pDynamicState->dynamicStateCount; i++) { 3880bf215546Sopenharmony_ci if (create_info->pDynamicState->pDynamicStates[i] == 3881bf215546Sopenharmony_ci VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE) { 3882bf215546Sopenharmony_ci rasterizer_discard_dynamic = true; 3883bf215546Sopenharmony_ci break; 3884bf215546Sopenharmony_ci } 3885bf215546Sopenharmony_ci } 3886bf215546Sopenharmony_ci } 3887bf215546Sopenharmony_ci 3888bf215546Sopenharmony_ci builder->rasterizer_discard = 3889bf215546Sopenharmony_ci builder->create_info->pRasterizationState->rasterizerDiscardEnable && 3890bf215546Sopenharmony_ci !rasterizer_discard_dynamic; 3891bf215546Sopenharmony_ci 3892bf215546Sopenharmony_ci const VkPipelineRenderingCreateInfo *rendering_info = 3893bf215546Sopenharmony_ci vk_find_struct_const(create_info->pNext, PIPELINE_RENDERING_CREATE_INFO); 3894bf215546Sopenharmony_ci 3895bf215546Sopenharmony_ci if (unlikely(dev->instance->debug_flags & TU_DEBUG_DYNAMIC) && !rendering_info) 3896bf215546Sopenharmony_ci rendering_info = vk_get_pipeline_rendering_create_info(create_info); 3897bf215546Sopenharmony_ci 3898bf215546Sopenharmony_ci if (rendering_info) { 3899bf215546Sopenharmony_ci builder->subpass_raster_order_attachment_access = false; 3900bf215546Sopenharmony_ci builder->subpass_feedback_loop_ds = false; 3901bf215546Sopenharmony_ci builder->subpass_feedback_loop_color = false; 3902bf215546Sopenharmony_ci 3903bf215546Sopenharmony_ci builder->multiview_mask = rendering_info->viewMask; 3904bf215546Sopenharmony_ci 3905bf215546Sopenharmony_ci /* We don't know with dynamic rendering whether the pipeline will be 3906bf215546Sopenharmony_ci * used in a render pass with none of attachments enabled, so we have to 3907bf215546Sopenharmony_ci * dynamically emit MSAA state. 3908bf215546Sopenharmony_ci * 3909bf215546Sopenharmony_ci * TODO: Move MSAA state to a separate draw state and emit it 3910bf215546Sopenharmony_ci * dynamically only when the sample count is different from the 3911bf215546Sopenharmony_ci * subpass's sample count. 3912bf215546Sopenharmony_ci */ 3913bf215546Sopenharmony_ci builder->emit_msaa_state = !builder->rasterizer_discard; 3914bf215546Sopenharmony_ci 3915bf215546Sopenharmony_ci const VkRenderingSelfDependencyInfoMESA *self_dependency = 3916bf215546Sopenharmony_ci vk_find_struct_const(rendering_info->pNext, RENDERING_SELF_DEPENDENCY_INFO_MESA); 3917bf215546Sopenharmony_ci 3918bf215546Sopenharmony_ci if (self_dependency) { 3919bf215546Sopenharmony_ci builder->subpass_feedback_loop_ds = 3920bf215546Sopenharmony_ci self_dependency->depthSelfDependency || 3921bf215546Sopenharmony_ci self_dependency->stencilSelfDependency; 3922bf215546Sopenharmony_ci builder->subpass_feedback_loop_color = 3923bf215546Sopenharmony_ci self_dependency->colorSelfDependencies; 3924bf215546Sopenharmony_ci } 3925bf215546Sopenharmony_ci 3926bf215546Sopenharmony_ci if (!builder->rasterizer_discard) { 3927bf215546Sopenharmony_ci builder->depth_attachment_format = 3928bf215546Sopenharmony_ci rendering_info->depthAttachmentFormat == VK_FORMAT_UNDEFINED ? 3929bf215546Sopenharmony_ci rendering_info->stencilAttachmentFormat : 3930bf215546Sopenharmony_ci rendering_info->depthAttachmentFormat; 3931bf215546Sopenharmony_ci 3932bf215546Sopenharmony_ci builder->color_attachment_count = 3933bf215546Sopenharmony_ci rendering_info->colorAttachmentCount; 3934bf215546Sopenharmony_ci 3935bf215546Sopenharmony_ci for (unsigned i = 0; i < rendering_info->colorAttachmentCount; i++) { 3936bf215546Sopenharmony_ci builder->color_attachment_formats[i] = 3937bf215546Sopenharmony_ci rendering_info->pColorAttachmentFormats[i]; 3938bf215546Sopenharmony_ci if (builder->color_attachment_formats[i] != VK_FORMAT_UNDEFINED) { 3939bf215546Sopenharmony_ci builder->use_color_attachments = true; 3940bf215546Sopenharmony_ci builder->render_components |= 0xf << (i * 4); 3941bf215546Sopenharmony_ci } 3942bf215546Sopenharmony_ci } 3943bf215546Sopenharmony_ci } 3944bf215546Sopenharmony_ci } else { 3945bf215546Sopenharmony_ci const struct tu_render_pass *pass = 3946bf215546Sopenharmony_ci tu_render_pass_from_handle(create_info->renderPass); 3947bf215546Sopenharmony_ci const struct tu_subpass *subpass = 3948bf215546Sopenharmony_ci &pass->subpasses[create_info->subpass]; 3949bf215546Sopenharmony_ci 3950bf215546Sopenharmony_ci builder->subpass_raster_order_attachment_access = 3951bf215546Sopenharmony_ci subpass->raster_order_attachment_access; 3952bf215546Sopenharmony_ci builder->subpass_feedback_loop_color = subpass->feedback_loop_color; 3953bf215546Sopenharmony_ci builder->subpass_feedback_loop_ds = subpass->feedback_loop_ds; 3954bf215546Sopenharmony_ci 3955bf215546Sopenharmony_ci builder->multiview_mask = subpass->multiview_mask; 3956bf215546Sopenharmony_ci 3957bf215546Sopenharmony_ci /* variableMultisampleRate support */ 3958bf215546Sopenharmony_ci builder->emit_msaa_state = (subpass->samples == 0) && !builder->rasterizer_discard; 3959bf215546Sopenharmony_ci 3960bf215546Sopenharmony_ci if (!builder->rasterizer_discard) { 3961bf215546Sopenharmony_ci const uint32_t a = subpass->depth_stencil_attachment.attachment; 3962bf215546Sopenharmony_ci builder->depth_attachment_format = (a != VK_ATTACHMENT_UNUSED) ? 3963bf215546Sopenharmony_ci pass->attachments[a].format : VK_FORMAT_UNDEFINED; 3964bf215546Sopenharmony_ci 3965bf215546Sopenharmony_ci assert(subpass->color_count == 0 || 3966bf215546Sopenharmony_ci !create_info->pColorBlendState || 3967bf215546Sopenharmony_ci subpass->color_count == create_info->pColorBlendState->attachmentCount); 3968bf215546Sopenharmony_ci builder->color_attachment_count = subpass->color_count; 3969bf215546Sopenharmony_ci for (uint32_t i = 0; i < subpass->color_count; i++) { 3970bf215546Sopenharmony_ci const uint32_t a = subpass->color_attachments[i].attachment; 3971bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 3972bf215546Sopenharmony_ci continue; 3973bf215546Sopenharmony_ci 3974bf215546Sopenharmony_ci builder->color_attachment_formats[i] = pass->attachments[a].format; 3975bf215546Sopenharmony_ci builder->use_color_attachments = true; 3976bf215546Sopenharmony_ci builder->render_components |= 0xf << (i * 4); 3977bf215546Sopenharmony_ci } 3978bf215546Sopenharmony_ci } 3979bf215546Sopenharmony_ci } 3980bf215546Sopenharmony_ci 3981bf215546Sopenharmony_ci 3982bf215546Sopenharmony_ci if (builder->rasterizer_discard) { 3983bf215546Sopenharmony_ci builder->samples = VK_SAMPLE_COUNT_1_BIT; 3984bf215546Sopenharmony_ci } else { 3985bf215546Sopenharmony_ci builder->samples = create_info->pMultisampleState->rasterizationSamples; 3986bf215546Sopenharmony_ci builder->alpha_to_coverage = create_info->pMultisampleState->alphaToCoverageEnable; 3987bf215546Sopenharmony_ci 3988bf215546Sopenharmony_ci if (tu_blend_state_is_dual_src(create_info->pColorBlendState)) { 3989bf215546Sopenharmony_ci builder->color_attachment_count++; 3990bf215546Sopenharmony_ci builder->use_dual_src_blend = true; 3991bf215546Sopenharmony_ci /* dual source blending has an extra fs output in the 2nd slot */ 3992bf215546Sopenharmony_ci if (builder->color_attachment_formats[0] != VK_FORMAT_UNDEFINED) 3993bf215546Sopenharmony_ci builder->render_components |= 0xf << 4; 3994bf215546Sopenharmony_ci } 3995bf215546Sopenharmony_ci } 3996bf215546Sopenharmony_ci} 3997bf215546Sopenharmony_ci 3998bf215546Sopenharmony_cistatic VkResult 3999bf215546Sopenharmony_citu_graphics_pipeline_create(VkDevice device, 4000bf215546Sopenharmony_ci VkPipelineCache pipelineCache, 4001bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfo, 4002bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 4003bf215546Sopenharmony_ci VkPipeline *pPipeline) 4004bf215546Sopenharmony_ci{ 4005bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_device, dev, device); 4006bf215546Sopenharmony_ci TU_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); 4007bf215546Sopenharmony_ci 4008bf215546Sopenharmony_ci cache = cache ? cache : dev->mem_cache; 4009bf215546Sopenharmony_ci 4010bf215546Sopenharmony_ci struct tu_pipeline_builder builder; 4011bf215546Sopenharmony_ci tu_pipeline_builder_init_graphics(&builder, dev, cache, 4012bf215546Sopenharmony_ci pCreateInfo, pAllocator); 4013bf215546Sopenharmony_ci 4014bf215546Sopenharmony_ci struct tu_pipeline *pipeline = NULL; 4015bf215546Sopenharmony_ci VkResult result = tu_pipeline_builder_build(&builder, &pipeline); 4016bf215546Sopenharmony_ci tu_pipeline_builder_finish(&builder); 4017bf215546Sopenharmony_ci 4018bf215546Sopenharmony_ci if (result == VK_SUCCESS) 4019bf215546Sopenharmony_ci *pPipeline = tu_pipeline_to_handle(pipeline); 4020bf215546Sopenharmony_ci else 4021bf215546Sopenharmony_ci *pPipeline = VK_NULL_HANDLE; 4022bf215546Sopenharmony_ci 4023bf215546Sopenharmony_ci return result; 4024bf215546Sopenharmony_ci} 4025bf215546Sopenharmony_ci 4026bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 4027bf215546Sopenharmony_citu_CreateGraphicsPipelines(VkDevice device, 4028bf215546Sopenharmony_ci VkPipelineCache pipelineCache, 4029bf215546Sopenharmony_ci uint32_t count, 4030bf215546Sopenharmony_ci const VkGraphicsPipelineCreateInfo *pCreateInfos, 4031bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 4032bf215546Sopenharmony_ci VkPipeline *pPipelines) 4033bf215546Sopenharmony_ci{ 4034bf215546Sopenharmony_ci VkResult final_result = VK_SUCCESS; 4035bf215546Sopenharmony_ci uint32_t i = 0; 4036bf215546Sopenharmony_ci 4037bf215546Sopenharmony_ci for (; i < count; i++) { 4038bf215546Sopenharmony_ci VkResult result = tu_graphics_pipeline_create(device, pipelineCache, 4039bf215546Sopenharmony_ci &pCreateInfos[i], pAllocator, 4040bf215546Sopenharmony_ci &pPipelines[i]); 4041bf215546Sopenharmony_ci 4042bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 4043bf215546Sopenharmony_ci final_result = result; 4044bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 4045bf215546Sopenharmony_ci 4046bf215546Sopenharmony_ci if (pCreateInfos[i].flags & 4047bf215546Sopenharmony_ci VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 4048bf215546Sopenharmony_ci break; 4049bf215546Sopenharmony_ci } 4050bf215546Sopenharmony_ci } 4051bf215546Sopenharmony_ci 4052bf215546Sopenharmony_ci for (; i < count; i++) 4053bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 4054bf215546Sopenharmony_ci 4055bf215546Sopenharmony_ci return final_result; 4056bf215546Sopenharmony_ci} 4057bf215546Sopenharmony_ci 4058bf215546Sopenharmony_cistatic VkResult 4059bf215546Sopenharmony_citu_compute_pipeline_create(VkDevice device, 4060bf215546Sopenharmony_ci VkPipelineCache pipelineCache, 4061bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *pCreateInfo, 4062bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 4063bf215546Sopenharmony_ci VkPipeline *pPipeline) 4064bf215546Sopenharmony_ci{ 4065bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_device, dev, device); 4066bf215546Sopenharmony_ci TU_FROM_HANDLE(vk_pipeline_cache, cache, pipelineCache); 4067bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_pipeline_layout, layout, pCreateInfo->layout); 4068bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *stage_info = &pCreateInfo->stage; 4069bf215546Sopenharmony_ci VkResult result; 4070bf215546Sopenharmony_ci 4071bf215546Sopenharmony_ci cache = cache ? cache : dev->mem_cache; 4072bf215546Sopenharmony_ci 4073bf215546Sopenharmony_ci struct tu_pipeline *pipeline; 4074bf215546Sopenharmony_ci 4075bf215546Sopenharmony_ci *pPipeline = VK_NULL_HANDLE; 4076bf215546Sopenharmony_ci 4077bf215546Sopenharmony_ci VkPipelineCreationFeedback pipeline_feedback = { 4078bf215546Sopenharmony_ci .flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT, 4079bf215546Sopenharmony_ci }; 4080bf215546Sopenharmony_ci 4081bf215546Sopenharmony_ci const VkPipelineCreationFeedbackCreateInfo *creation_feedback = 4082bf215546Sopenharmony_ci vk_find_struct_const(pCreateInfo->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO); 4083bf215546Sopenharmony_ci 4084bf215546Sopenharmony_ci int64_t pipeline_start = os_time_get_nano(); 4085bf215546Sopenharmony_ci 4086bf215546Sopenharmony_ci pipeline = vk_object_zalloc(&dev->vk, pAllocator, sizeof(*pipeline), 4087bf215546Sopenharmony_ci VK_OBJECT_TYPE_PIPELINE); 4088bf215546Sopenharmony_ci if (!pipeline) 4089bf215546Sopenharmony_ci return VK_ERROR_OUT_OF_HOST_MEMORY; 4090bf215546Sopenharmony_ci 4091bf215546Sopenharmony_ci pipeline->executables_mem_ctx = ralloc_context(NULL); 4092bf215546Sopenharmony_ci util_dynarray_init(&pipeline->executables, pipeline->executables_mem_ctx); 4093bf215546Sopenharmony_ci 4094bf215546Sopenharmony_ci struct tu_shader_key key = { }; 4095bf215546Sopenharmony_ci tu_shader_key_init(&key, stage_info, dev); 4096bf215546Sopenharmony_ci 4097bf215546Sopenharmony_ci void *pipeline_mem_ctx = ralloc_context(NULL); 4098bf215546Sopenharmony_ci 4099bf215546Sopenharmony_ci unsigned char pipeline_sha1[20]; 4100bf215546Sopenharmony_ci tu_hash_compute(pipeline_sha1, stage_info, layout, &key, dev->compiler); 4101bf215546Sopenharmony_ci 4102bf215546Sopenharmony_ci struct tu_compiled_shaders *compiled = NULL; 4103bf215546Sopenharmony_ci 4104bf215546Sopenharmony_ci const bool executable_info = pCreateInfo->flags & 4105bf215546Sopenharmony_ci VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR; 4106bf215546Sopenharmony_ci 4107bf215546Sopenharmony_ci bool application_cache_hit = false; 4108bf215546Sopenharmony_ci 4109bf215546Sopenharmony_ci if (!executable_info) { 4110bf215546Sopenharmony_ci compiled = 4111bf215546Sopenharmony_ci tu_pipeline_cache_lookup(cache, pipeline_sha1, sizeof(pipeline_sha1), 4112bf215546Sopenharmony_ci &application_cache_hit); 4113bf215546Sopenharmony_ci } 4114bf215546Sopenharmony_ci 4115bf215546Sopenharmony_ci if (application_cache_hit && cache != dev->mem_cache) { 4116bf215546Sopenharmony_ci pipeline_feedback.flags |= 4117bf215546Sopenharmony_ci VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT; 4118bf215546Sopenharmony_ci } 4119bf215546Sopenharmony_ci 4120bf215546Sopenharmony_ci if (tu6_shared_constants_enable(layout, dev->compiler)) { 4121bf215546Sopenharmony_ci pipeline->shared_consts = (struct tu_push_constant_range) { 4122bf215546Sopenharmony_ci .lo = 0, 4123bf215546Sopenharmony_ci .dwords = layout->push_constant_size / 4, 4124bf215546Sopenharmony_ci }; 4125bf215546Sopenharmony_ci } 4126bf215546Sopenharmony_ci 4127bf215546Sopenharmony_ci char *nir_initial_disasm = NULL; 4128bf215546Sopenharmony_ci 4129bf215546Sopenharmony_ci if (!compiled) { 4130bf215546Sopenharmony_ci if (pCreateInfo->flags & 4131bf215546Sopenharmony_ci VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT) { 4132bf215546Sopenharmony_ci result = VK_PIPELINE_COMPILE_REQUIRED; 4133bf215546Sopenharmony_ci goto fail; 4134bf215546Sopenharmony_ci } 4135bf215546Sopenharmony_ci 4136bf215546Sopenharmony_ci struct ir3_shader_key ir3_key = {}; 4137bf215546Sopenharmony_ci 4138bf215546Sopenharmony_ci nir_shader *nir = tu_spirv_to_nir(dev, pipeline_mem_ctx, stage_info, 4139bf215546Sopenharmony_ci MESA_SHADER_COMPUTE); 4140bf215546Sopenharmony_ci 4141bf215546Sopenharmony_ci nir_initial_disasm = executable_info ? 4142bf215546Sopenharmony_ci nir_shader_as_str(nir, pipeline->executables_mem_ctx) : NULL; 4143bf215546Sopenharmony_ci 4144bf215546Sopenharmony_ci struct tu_shader *shader = 4145bf215546Sopenharmony_ci tu_shader_create(dev, nir, &key, layout, pAllocator); 4146bf215546Sopenharmony_ci if (!shader) { 4147bf215546Sopenharmony_ci result = VK_ERROR_OUT_OF_HOST_MEMORY; 4148bf215546Sopenharmony_ci goto fail; 4149bf215546Sopenharmony_ci } 4150bf215546Sopenharmony_ci 4151bf215546Sopenharmony_ci compiled = tu_shaders_init(dev, &pipeline_sha1, sizeof(pipeline_sha1)); 4152bf215546Sopenharmony_ci if (!compiled) { 4153bf215546Sopenharmony_ci tu_shader_destroy(dev, shader, pAllocator); 4154bf215546Sopenharmony_ci result = VK_ERROR_OUT_OF_HOST_MEMORY; 4155bf215546Sopenharmony_ci goto fail; 4156bf215546Sopenharmony_ci } 4157bf215546Sopenharmony_ci 4158bf215546Sopenharmony_ci compiled->active_desc_sets = shader->active_desc_sets; 4159bf215546Sopenharmony_ci compiled->push_consts[MESA_SHADER_COMPUTE] = shader->push_consts; 4160bf215546Sopenharmony_ci 4161bf215546Sopenharmony_ci struct ir3_shader_variant *v = 4162bf215546Sopenharmony_ci ir3_shader_create_variant(shader->ir3_shader, &ir3_key, executable_info); 4163bf215546Sopenharmony_ci 4164bf215546Sopenharmony_ci tu_shader_destroy(dev, shader, pAllocator); 4165bf215546Sopenharmony_ci 4166bf215546Sopenharmony_ci if (!v) { 4167bf215546Sopenharmony_ci result = VK_ERROR_OUT_OF_HOST_MEMORY; 4168bf215546Sopenharmony_ci goto fail; 4169bf215546Sopenharmony_ci } 4170bf215546Sopenharmony_ci 4171bf215546Sopenharmony_ci compiled->variants[MESA_SHADER_COMPUTE] = v; 4172bf215546Sopenharmony_ci 4173bf215546Sopenharmony_ci compiled = tu_pipeline_cache_insert(cache, compiled); 4174bf215546Sopenharmony_ci } 4175bf215546Sopenharmony_ci 4176bf215546Sopenharmony_ci pipeline_feedback.duration = os_time_get_nano() - pipeline_start; 4177bf215546Sopenharmony_ci 4178bf215546Sopenharmony_ci if (creation_feedback) { 4179bf215546Sopenharmony_ci *creation_feedback->pPipelineCreationFeedback = pipeline_feedback; 4180bf215546Sopenharmony_ci assert(creation_feedback->pipelineStageCreationFeedbackCount == 1); 4181bf215546Sopenharmony_ci creation_feedback->pPipelineStageCreationFeedbacks[0] = pipeline_feedback; 4182bf215546Sopenharmony_ci } 4183bf215546Sopenharmony_ci 4184bf215546Sopenharmony_ci pipeline->active_desc_sets = compiled->active_desc_sets; 4185bf215546Sopenharmony_ci 4186bf215546Sopenharmony_ci struct ir3_shader_variant *v = compiled->variants[MESA_SHADER_COMPUTE]; 4187bf215546Sopenharmony_ci 4188bf215546Sopenharmony_ci tu_pipeline_set_linkage(&pipeline->program.link[MESA_SHADER_COMPUTE], 4189bf215546Sopenharmony_ci &compiled->push_consts[MESA_SHADER_COMPUTE], v); 4190bf215546Sopenharmony_ci 4191bf215546Sopenharmony_ci result = tu_pipeline_allocate_cs(dev, pipeline, layout, NULL, v); 4192bf215546Sopenharmony_ci if (result != VK_SUCCESS) 4193bf215546Sopenharmony_ci goto fail; 4194bf215546Sopenharmony_ci 4195bf215546Sopenharmony_ci uint64_t shader_iova = tu_upload_variant(pipeline, v); 4196bf215546Sopenharmony_ci 4197bf215546Sopenharmony_ci struct tu_pvtmem_config pvtmem; 4198bf215546Sopenharmony_ci tu_setup_pvtmem(dev, pipeline, &pvtmem, v->pvtmem_size, v->pvtmem_per_wave); 4199bf215546Sopenharmony_ci 4200bf215546Sopenharmony_ci for (int i = 0; i < 3; i++) 4201bf215546Sopenharmony_ci pipeline->compute.local_size[i] = v->local_size[i]; 4202bf215546Sopenharmony_ci 4203bf215546Sopenharmony_ci pipeline->compute.subgroup_size = v->info.double_threadsize ? 128 : 64; 4204bf215546Sopenharmony_ci 4205bf215546Sopenharmony_ci struct tu_cs prog_cs; 4206bf215546Sopenharmony_ci uint32_t additional_reserve_size = tu_xs_get_additional_cs_size_dwords(v); 4207bf215546Sopenharmony_ci tu_cs_begin_sub_stream(&pipeline->cs, 64 + additional_reserve_size, &prog_cs); 4208bf215546Sopenharmony_ci tu6_emit_cs_config(&prog_cs, v, &pvtmem, shader_iova); 4209bf215546Sopenharmony_ci pipeline->program.state = tu_cs_end_draw_state(&pipeline->cs, &prog_cs); 4210bf215546Sopenharmony_ci 4211bf215546Sopenharmony_ci tu6_emit_load_state(pipeline, layout, true); 4212bf215546Sopenharmony_ci 4213bf215546Sopenharmony_ci tu_append_executable(pipeline, v, nir_initial_disasm); 4214bf215546Sopenharmony_ci 4215bf215546Sopenharmony_ci vk_pipeline_cache_object_unref(&compiled->base); 4216bf215546Sopenharmony_ci ralloc_free(pipeline_mem_ctx); 4217bf215546Sopenharmony_ci 4218bf215546Sopenharmony_ci *pPipeline = tu_pipeline_to_handle(pipeline); 4219bf215546Sopenharmony_ci 4220bf215546Sopenharmony_ci return VK_SUCCESS; 4221bf215546Sopenharmony_ci 4222bf215546Sopenharmony_cifail: 4223bf215546Sopenharmony_ci if (compiled) 4224bf215546Sopenharmony_ci vk_pipeline_cache_object_unref(&compiled->base); 4225bf215546Sopenharmony_ci 4226bf215546Sopenharmony_ci ralloc_free(pipeline_mem_ctx); 4227bf215546Sopenharmony_ci 4228bf215546Sopenharmony_ci vk_object_free(&dev->vk, pAllocator, pipeline); 4229bf215546Sopenharmony_ci 4230bf215546Sopenharmony_ci return result; 4231bf215546Sopenharmony_ci} 4232bf215546Sopenharmony_ci 4233bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 4234bf215546Sopenharmony_citu_CreateComputePipelines(VkDevice device, 4235bf215546Sopenharmony_ci VkPipelineCache pipelineCache, 4236bf215546Sopenharmony_ci uint32_t count, 4237bf215546Sopenharmony_ci const VkComputePipelineCreateInfo *pCreateInfos, 4238bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator, 4239bf215546Sopenharmony_ci VkPipeline *pPipelines) 4240bf215546Sopenharmony_ci{ 4241bf215546Sopenharmony_ci VkResult final_result = VK_SUCCESS; 4242bf215546Sopenharmony_ci uint32_t i = 0; 4243bf215546Sopenharmony_ci 4244bf215546Sopenharmony_ci for (; i < count; i++) { 4245bf215546Sopenharmony_ci VkResult result = tu_compute_pipeline_create(device, pipelineCache, 4246bf215546Sopenharmony_ci &pCreateInfos[i], 4247bf215546Sopenharmony_ci pAllocator, &pPipelines[i]); 4248bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 4249bf215546Sopenharmony_ci final_result = result; 4250bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 4251bf215546Sopenharmony_ci 4252bf215546Sopenharmony_ci if (pCreateInfos[i].flags & 4253bf215546Sopenharmony_ci VK_PIPELINE_CREATE_EARLY_RETURN_ON_FAILURE_BIT) 4254bf215546Sopenharmony_ci break; 4255bf215546Sopenharmony_ci } 4256bf215546Sopenharmony_ci } 4257bf215546Sopenharmony_ci 4258bf215546Sopenharmony_ci for (; i < count; i++) 4259bf215546Sopenharmony_ci pPipelines[i] = VK_NULL_HANDLE; 4260bf215546Sopenharmony_ci 4261bf215546Sopenharmony_ci return final_result; 4262bf215546Sopenharmony_ci} 4263bf215546Sopenharmony_ci 4264bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 4265bf215546Sopenharmony_citu_DestroyPipeline(VkDevice _device, 4266bf215546Sopenharmony_ci VkPipeline _pipeline, 4267bf215546Sopenharmony_ci const VkAllocationCallbacks *pAllocator) 4268bf215546Sopenharmony_ci{ 4269bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_device, dev, _device); 4270bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_pipeline, pipeline, _pipeline); 4271bf215546Sopenharmony_ci 4272bf215546Sopenharmony_ci if (!_pipeline) 4273bf215546Sopenharmony_ci return; 4274bf215546Sopenharmony_ci 4275bf215546Sopenharmony_ci tu_pipeline_finish(pipeline, dev, pAllocator); 4276bf215546Sopenharmony_ci vk_object_free(&dev->vk, pAllocator, pipeline); 4277bf215546Sopenharmony_ci} 4278bf215546Sopenharmony_ci 4279bf215546Sopenharmony_ci#define WRITE_STR(field, ...) ({ \ 4280bf215546Sopenharmony_ci memset(field, 0, sizeof(field)); \ 4281bf215546Sopenharmony_ci UNUSED int _i = snprintf(field, sizeof(field), __VA_ARGS__); \ 4282bf215546Sopenharmony_ci assert(_i > 0 && _i < sizeof(field)); \ 4283bf215546Sopenharmony_ci}) 4284bf215546Sopenharmony_ci 4285bf215546Sopenharmony_cistatic const struct tu_pipeline_executable * 4286bf215546Sopenharmony_citu_pipeline_get_executable(struct tu_pipeline *pipeline, uint32_t index) 4287bf215546Sopenharmony_ci{ 4288bf215546Sopenharmony_ci assert(index < util_dynarray_num_elements(&pipeline->executables, 4289bf215546Sopenharmony_ci struct tu_pipeline_executable)); 4290bf215546Sopenharmony_ci return util_dynarray_element( 4291bf215546Sopenharmony_ci &pipeline->executables, struct tu_pipeline_executable, index); 4292bf215546Sopenharmony_ci} 4293bf215546Sopenharmony_ci 4294bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 4295bf215546Sopenharmony_citu_GetPipelineExecutablePropertiesKHR( 4296bf215546Sopenharmony_ci VkDevice _device, 4297bf215546Sopenharmony_ci const VkPipelineInfoKHR* pPipelineInfo, 4298bf215546Sopenharmony_ci uint32_t* pExecutableCount, 4299bf215546Sopenharmony_ci VkPipelineExecutablePropertiesKHR* pProperties) 4300bf215546Sopenharmony_ci{ 4301bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_device, dev, _device); 4302bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_pipeline, pipeline, pPipelineInfo->pipeline); 4303bf215546Sopenharmony_ci VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutablePropertiesKHR, out, 4304bf215546Sopenharmony_ci pProperties, pExecutableCount); 4305bf215546Sopenharmony_ci 4306bf215546Sopenharmony_ci util_dynarray_foreach (&pipeline->executables, struct tu_pipeline_executable, exe) { 4307bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutablePropertiesKHR, &out, props) { 4308bf215546Sopenharmony_ci gl_shader_stage stage = exe->stage; 4309bf215546Sopenharmony_ci props->stages = mesa_to_vk_shader_stage(stage); 4310bf215546Sopenharmony_ci 4311bf215546Sopenharmony_ci if (!exe->is_binning) 4312bf215546Sopenharmony_ci WRITE_STR(props->name, "%s", _mesa_shader_stage_to_abbrev(stage)); 4313bf215546Sopenharmony_ci else 4314bf215546Sopenharmony_ci WRITE_STR(props->name, "Binning VS"); 4315bf215546Sopenharmony_ci 4316bf215546Sopenharmony_ci WRITE_STR(props->description, "%s", _mesa_shader_stage_to_string(stage)); 4317bf215546Sopenharmony_ci 4318bf215546Sopenharmony_ci props->subgroupSize = 4319bf215546Sopenharmony_ci dev->compiler->threadsize_base * (exe->stats.double_threadsize ? 2 : 1); 4320bf215546Sopenharmony_ci } 4321bf215546Sopenharmony_ci } 4322bf215546Sopenharmony_ci 4323bf215546Sopenharmony_ci return vk_outarray_status(&out); 4324bf215546Sopenharmony_ci} 4325bf215546Sopenharmony_ci 4326bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 4327bf215546Sopenharmony_citu_GetPipelineExecutableStatisticsKHR( 4328bf215546Sopenharmony_ci VkDevice _device, 4329bf215546Sopenharmony_ci const VkPipelineExecutableInfoKHR* pExecutableInfo, 4330bf215546Sopenharmony_ci uint32_t* pStatisticCount, 4331bf215546Sopenharmony_ci VkPipelineExecutableStatisticKHR* pStatistics) 4332bf215546Sopenharmony_ci{ 4333bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_pipeline, pipeline, pExecutableInfo->pipeline); 4334bf215546Sopenharmony_ci VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableStatisticKHR, out, 4335bf215546Sopenharmony_ci pStatistics, pStatisticCount); 4336bf215546Sopenharmony_ci 4337bf215546Sopenharmony_ci const struct tu_pipeline_executable *exe = 4338bf215546Sopenharmony_ci tu_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 4339bf215546Sopenharmony_ci 4340bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4341bf215546Sopenharmony_ci WRITE_STR(stat->name, "Max Waves Per Core"); 4342bf215546Sopenharmony_ci WRITE_STR(stat->description, 4343bf215546Sopenharmony_ci "Maximum number of simultaneous waves per core."); 4344bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4345bf215546Sopenharmony_ci stat->value.u64 = exe->stats.max_waves; 4346bf215546Sopenharmony_ci } 4347bf215546Sopenharmony_ci 4348bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4349bf215546Sopenharmony_ci WRITE_STR(stat->name, "Instruction Count"); 4350bf215546Sopenharmony_ci WRITE_STR(stat->description, 4351bf215546Sopenharmony_ci "Total number of IR3 instructions in the final generated " 4352bf215546Sopenharmony_ci "shader executable."); 4353bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4354bf215546Sopenharmony_ci stat->value.u64 = exe->stats.instrs_count; 4355bf215546Sopenharmony_ci } 4356bf215546Sopenharmony_ci 4357bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4358bf215546Sopenharmony_ci WRITE_STR(stat->name, "Code size"); 4359bf215546Sopenharmony_ci WRITE_STR(stat->description, 4360bf215546Sopenharmony_ci "Total number of dwords in the final generated " 4361bf215546Sopenharmony_ci "shader executable."); 4362bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4363bf215546Sopenharmony_ci stat->value.u64 = exe->stats.sizedwords; 4364bf215546Sopenharmony_ci } 4365bf215546Sopenharmony_ci 4366bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4367bf215546Sopenharmony_ci WRITE_STR(stat->name, "NOPs Count"); 4368bf215546Sopenharmony_ci WRITE_STR(stat->description, 4369bf215546Sopenharmony_ci "Number of NOP instructions in the final generated " 4370bf215546Sopenharmony_ci "shader executable."); 4371bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4372bf215546Sopenharmony_ci stat->value.u64 = exe->stats.nops_count; 4373bf215546Sopenharmony_ci } 4374bf215546Sopenharmony_ci 4375bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4376bf215546Sopenharmony_ci WRITE_STR(stat->name, "MOV Count"); 4377bf215546Sopenharmony_ci WRITE_STR(stat->description, 4378bf215546Sopenharmony_ci "Number of MOV instructions in the final generated " 4379bf215546Sopenharmony_ci "shader executable."); 4380bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4381bf215546Sopenharmony_ci stat->value.u64 = exe->stats.mov_count; 4382bf215546Sopenharmony_ci } 4383bf215546Sopenharmony_ci 4384bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4385bf215546Sopenharmony_ci WRITE_STR(stat->name, "COV Count"); 4386bf215546Sopenharmony_ci WRITE_STR(stat->description, 4387bf215546Sopenharmony_ci "Number of COV instructions in the final generated " 4388bf215546Sopenharmony_ci "shader executable."); 4389bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4390bf215546Sopenharmony_ci stat->value.u64 = exe->stats.cov_count; 4391bf215546Sopenharmony_ci } 4392bf215546Sopenharmony_ci 4393bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4394bf215546Sopenharmony_ci WRITE_STR(stat->name, "Registers used"); 4395bf215546Sopenharmony_ci WRITE_STR(stat->description, 4396bf215546Sopenharmony_ci "Number of registers used in the final generated " 4397bf215546Sopenharmony_ci "shader executable."); 4398bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4399bf215546Sopenharmony_ci stat->value.u64 = exe->stats.max_reg + 1; 4400bf215546Sopenharmony_ci } 4401bf215546Sopenharmony_ci 4402bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4403bf215546Sopenharmony_ci WRITE_STR(stat->name, "Half-registers used"); 4404bf215546Sopenharmony_ci WRITE_STR(stat->description, 4405bf215546Sopenharmony_ci "Number of half-registers used in the final generated " 4406bf215546Sopenharmony_ci "shader executable."); 4407bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4408bf215546Sopenharmony_ci stat->value.u64 = exe->stats.max_half_reg + 1; 4409bf215546Sopenharmony_ci } 4410bf215546Sopenharmony_ci 4411bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4412bf215546Sopenharmony_ci WRITE_STR(stat->name, "Instructions with SS sync bit"); 4413bf215546Sopenharmony_ci WRITE_STR(stat->description, 4414bf215546Sopenharmony_ci "SS bit is set for instructions which depend on a result " 4415bf215546Sopenharmony_ci "of \"long\" instructions to prevent RAW hazard."); 4416bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4417bf215546Sopenharmony_ci stat->value.u64 = exe->stats.ss; 4418bf215546Sopenharmony_ci } 4419bf215546Sopenharmony_ci 4420bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4421bf215546Sopenharmony_ci WRITE_STR(stat->name, "Instructions with SY sync bit"); 4422bf215546Sopenharmony_ci WRITE_STR(stat->description, 4423bf215546Sopenharmony_ci "SY bit is set for instructions which depend on a result " 4424bf215546Sopenharmony_ci "of loads from global memory to prevent RAW hazard."); 4425bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4426bf215546Sopenharmony_ci stat->value.u64 = exe->stats.sy; 4427bf215546Sopenharmony_ci } 4428bf215546Sopenharmony_ci 4429bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4430bf215546Sopenharmony_ci WRITE_STR(stat->name, "Estimated cycles stalled on SS"); 4431bf215546Sopenharmony_ci WRITE_STR(stat->description, 4432bf215546Sopenharmony_ci "A better metric to estimate the impact of SS syncs."); 4433bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4434bf215546Sopenharmony_ci stat->value.u64 = exe->stats.sstall; 4435bf215546Sopenharmony_ci } 4436bf215546Sopenharmony_ci 4437bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4438bf215546Sopenharmony_ci WRITE_STR(stat->name, "Estimated cycles stalled on SY"); 4439bf215546Sopenharmony_ci WRITE_STR(stat->description, 4440bf215546Sopenharmony_ci "A better metric to estimate the impact of SY syncs."); 4441bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4442bf215546Sopenharmony_ci stat->value.u64 = exe->stats.systall; 4443bf215546Sopenharmony_ci } 4444bf215546Sopenharmony_ci 4445bf215546Sopenharmony_ci for (int i = 0; i < ARRAY_SIZE(exe->stats.instrs_per_cat); i++) { 4446bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4447bf215546Sopenharmony_ci WRITE_STR(stat->name, "cat%d instructions", i); 4448bf215546Sopenharmony_ci WRITE_STR(stat->description, 4449bf215546Sopenharmony_ci "Number of cat%d instructions.", i); 4450bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4451bf215546Sopenharmony_ci stat->value.u64 = exe->stats.instrs_per_cat[i]; 4452bf215546Sopenharmony_ci } 4453bf215546Sopenharmony_ci } 4454bf215546Sopenharmony_ci 4455bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4456bf215546Sopenharmony_ci WRITE_STR(stat->name, "STP Count"); 4457bf215546Sopenharmony_ci WRITE_STR(stat->description, 4458bf215546Sopenharmony_ci "Number of STore Private instructions in the final generated " 4459bf215546Sopenharmony_ci "shader executable."); 4460bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4461bf215546Sopenharmony_ci stat->value.u64 = exe->stats.stp_count; 4462bf215546Sopenharmony_ci } 4463bf215546Sopenharmony_ci 4464bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableStatisticKHR, &out, stat) { 4465bf215546Sopenharmony_ci WRITE_STR(stat->name, "LDP Count"); 4466bf215546Sopenharmony_ci WRITE_STR(stat->description, 4467bf215546Sopenharmony_ci "Number of LoaD Private instructions in the final generated " 4468bf215546Sopenharmony_ci "shader executable."); 4469bf215546Sopenharmony_ci stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR; 4470bf215546Sopenharmony_ci stat->value.u64 = exe->stats.ldp_count; 4471bf215546Sopenharmony_ci } 4472bf215546Sopenharmony_ci 4473bf215546Sopenharmony_ci return vk_outarray_status(&out); 4474bf215546Sopenharmony_ci} 4475bf215546Sopenharmony_ci 4476bf215546Sopenharmony_cistatic bool 4477bf215546Sopenharmony_ciwrite_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir, 4478bf215546Sopenharmony_ci const char *data) 4479bf215546Sopenharmony_ci{ 4480bf215546Sopenharmony_ci ir->isText = VK_TRUE; 4481bf215546Sopenharmony_ci 4482bf215546Sopenharmony_ci size_t data_len = strlen(data) + 1; 4483bf215546Sopenharmony_ci 4484bf215546Sopenharmony_ci if (ir->pData == NULL) { 4485bf215546Sopenharmony_ci ir->dataSize = data_len; 4486bf215546Sopenharmony_ci return true; 4487bf215546Sopenharmony_ci } 4488bf215546Sopenharmony_ci 4489bf215546Sopenharmony_ci strncpy(ir->pData, data, ir->dataSize); 4490bf215546Sopenharmony_ci if (ir->dataSize < data_len) 4491bf215546Sopenharmony_ci return false; 4492bf215546Sopenharmony_ci 4493bf215546Sopenharmony_ci ir->dataSize = data_len; 4494bf215546Sopenharmony_ci return true; 4495bf215546Sopenharmony_ci} 4496bf215546Sopenharmony_ci 4497bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL 4498bf215546Sopenharmony_citu_GetPipelineExecutableInternalRepresentationsKHR( 4499bf215546Sopenharmony_ci VkDevice _device, 4500bf215546Sopenharmony_ci const VkPipelineExecutableInfoKHR* pExecutableInfo, 4501bf215546Sopenharmony_ci uint32_t* pInternalRepresentationCount, 4502bf215546Sopenharmony_ci VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations) 4503bf215546Sopenharmony_ci{ 4504bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_pipeline, pipeline, pExecutableInfo->pipeline); 4505bf215546Sopenharmony_ci VK_OUTARRAY_MAKE_TYPED(VkPipelineExecutableInternalRepresentationKHR, out, 4506bf215546Sopenharmony_ci pInternalRepresentations, pInternalRepresentationCount); 4507bf215546Sopenharmony_ci bool incomplete_text = false; 4508bf215546Sopenharmony_ci 4509bf215546Sopenharmony_ci const struct tu_pipeline_executable *exe = 4510bf215546Sopenharmony_ci tu_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex); 4511bf215546Sopenharmony_ci 4512bf215546Sopenharmony_ci if (exe->nir_from_spirv) { 4513bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { 4514bf215546Sopenharmony_ci WRITE_STR(ir->name, "NIR from SPIRV"); 4515bf215546Sopenharmony_ci WRITE_STR(ir->description, 4516bf215546Sopenharmony_ci "Initial NIR before any optimizations"); 4517bf215546Sopenharmony_ci 4518bf215546Sopenharmony_ci if (!write_ir_text(ir, exe->nir_from_spirv)) 4519bf215546Sopenharmony_ci incomplete_text = true; 4520bf215546Sopenharmony_ci } 4521bf215546Sopenharmony_ci } 4522bf215546Sopenharmony_ci 4523bf215546Sopenharmony_ci if (exe->nir_final) { 4524bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { 4525bf215546Sopenharmony_ci WRITE_STR(ir->name, "Final NIR"); 4526bf215546Sopenharmony_ci WRITE_STR(ir->description, 4527bf215546Sopenharmony_ci "Final NIR before going into the back-end compiler"); 4528bf215546Sopenharmony_ci 4529bf215546Sopenharmony_ci if (!write_ir_text(ir, exe->nir_final)) 4530bf215546Sopenharmony_ci incomplete_text = true; 4531bf215546Sopenharmony_ci } 4532bf215546Sopenharmony_ci } 4533bf215546Sopenharmony_ci 4534bf215546Sopenharmony_ci if (exe->disasm) { 4535bf215546Sopenharmony_ci vk_outarray_append_typed(VkPipelineExecutableInternalRepresentationKHR, &out, ir) { 4536bf215546Sopenharmony_ci WRITE_STR(ir->name, "IR3 Assembly"); 4537bf215546Sopenharmony_ci WRITE_STR(ir->description, 4538bf215546Sopenharmony_ci "Final IR3 assembly for the generated shader binary"); 4539bf215546Sopenharmony_ci 4540bf215546Sopenharmony_ci if (!write_ir_text(ir, exe->disasm)) 4541bf215546Sopenharmony_ci incomplete_text = true; 4542bf215546Sopenharmony_ci } 4543bf215546Sopenharmony_ci } 4544bf215546Sopenharmony_ci 4545bf215546Sopenharmony_ci return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out); 4546bf215546Sopenharmony_ci} 4547