1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2016 Red Hat. 3bf215546Sopenharmony_ci * Copyright © 2016 Bas Nieuwenhuizen 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * based in part on anv driver which is: 6bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation 7bf215546Sopenharmony_ci * 8bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 9bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 10bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 11bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 13bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 16bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 17bf215546Sopenharmony_ci * Software. 18bf215546Sopenharmony_ci * 19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25bf215546Sopenharmony_ci * IN THE SOFTWARE. 26bf215546Sopenharmony_ci */ 27bf215546Sopenharmony_ci 28bf215546Sopenharmony_ci#ifndef RADV_SHADER_H 29bf215546Sopenharmony_ci#define RADV_SHADER_H 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci#include "ac_binary.h" 32bf215546Sopenharmony_ci#include "ac_shader_util.h" 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_ci#include "amd_family.h" 35bf215546Sopenharmony_ci#include "radv_constants.h" 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci#include "nir/nir.h" 38bf215546Sopenharmony_ci#include "vulkan/runtime/vk_object.h" 39bf215546Sopenharmony_ci#include "vulkan/runtime/vk_shader_module.h" 40bf215546Sopenharmony_ci#include "vulkan/vulkan.h" 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci#include "aco_shader_info.h" 43bf215546Sopenharmony_ci 44bf215546Sopenharmony_ci#define RADV_VERT_ATTRIB_MAX MAX2(VERT_ATTRIB_MAX, VERT_ATTRIB_GENERIC0 + MAX_VERTEX_ATTRIBS) 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_cistruct radv_physical_device; 47bf215546Sopenharmony_cistruct radv_device; 48bf215546Sopenharmony_cistruct radv_pipeline; 49bf215546Sopenharmony_cistruct radv_pipeline_cache; 50bf215546Sopenharmony_cistruct radv_pipeline_key; 51bf215546Sopenharmony_cistruct radv_shader_args; 52bf215546Sopenharmony_cistruct radv_vs_input_state; 53bf215546Sopenharmony_cistruct radv_shader_args; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_cienum radv_vs_input_alpha_adjust { 56bf215546Sopenharmony_ci ALPHA_ADJUST_NONE = 0, 57bf215546Sopenharmony_ci ALPHA_ADJUST_SNORM = 1, 58bf215546Sopenharmony_ci ALPHA_ADJUST_SSCALED = 2, 59bf215546Sopenharmony_ci ALPHA_ADJUST_SINT = 3, 60bf215546Sopenharmony_ci}; 61bf215546Sopenharmony_ci 62bf215546Sopenharmony_cistruct radv_pipeline_key { 63bf215546Sopenharmony_ci uint32_t has_multiview_view_index : 1; 64bf215546Sopenharmony_ci uint32_t optimisations_disabled : 1; 65bf215546Sopenharmony_ci uint32_t invariant_geom : 1; 66bf215546Sopenharmony_ci uint32_t use_ngg : 1; 67bf215546Sopenharmony_ci uint32_t adjust_frag_coord_z : 1; 68bf215546Sopenharmony_ci uint32_t disable_aniso_single_level : 1; 69bf215546Sopenharmony_ci uint32_t disable_sinking_load_input_fs : 1; 70bf215546Sopenharmony_ci uint32_t image_2d_view_of_3d : 1; 71bf215546Sopenharmony_ci uint32_t primitives_generated_query : 1; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci struct { 74bf215546Sopenharmony_ci uint32_t instance_rate_inputs; 75bf215546Sopenharmony_ci uint32_t instance_rate_divisors[MAX_VERTEX_ATTRIBS]; 76bf215546Sopenharmony_ci uint8_t vertex_attribute_formats[MAX_VERTEX_ATTRIBS]; 77bf215546Sopenharmony_ci uint32_t vertex_attribute_bindings[MAX_VERTEX_ATTRIBS]; 78bf215546Sopenharmony_ci uint32_t vertex_attribute_offsets[MAX_VERTEX_ATTRIBS]; 79bf215546Sopenharmony_ci uint32_t vertex_attribute_strides[MAX_VERTEX_ATTRIBS]; 80bf215546Sopenharmony_ci uint8_t vertex_binding_align[MAX_VBS]; 81bf215546Sopenharmony_ci enum radv_vs_input_alpha_adjust vertex_alpha_adjust[MAX_VERTEX_ATTRIBS]; 82bf215546Sopenharmony_ci uint32_t vertex_post_shuffle; 83bf215546Sopenharmony_ci uint32_t provoking_vtx_last : 1; 84bf215546Sopenharmony_ci uint32_t dynamic_input_state : 1; 85bf215546Sopenharmony_ci uint8_t topology; 86bf215546Sopenharmony_ci } vs; 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci struct { 89bf215546Sopenharmony_ci unsigned tess_input_vertices; 90bf215546Sopenharmony_ci } tcs; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci struct { 93bf215546Sopenharmony_ci uint32_t col_format; 94bf215546Sopenharmony_ci uint32_t is_int8; 95bf215546Sopenharmony_ci uint32_t is_int10; 96bf215546Sopenharmony_ci uint32_t cb_target_mask; 97bf215546Sopenharmony_ci uint8_t log2_ps_iter_samples; 98bf215546Sopenharmony_ci uint8_t num_samples; 99bf215546Sopenharmony_ci bool mrt0_is_dual_src; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_ci bool lower_discard_to_demote; 102bf215546Sopenharmony_ci uint8_t enable_mrt_output_nan_fixup; 103bf215546Sopenharmony_ci bool force_vrs_enabled; 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci /* Used to export alpha through MRTZ for alpha-to-coverage (GFX11+). */ 106bf215546Sopenharmony_ci bool alpha_to_coverage_via_mrtz; 107bf215546Sopenharmony_ci 108bf215546Sopenharmony_ci bool has_epilog; 109bf215546Sopenharmony_ci } ps; 110bf215546Sopenharmony_ci 111bf215546Sopenharmony_ci struct { 112bf215546Sopenharmony_ci /* Non-zero if a required subgroup size is specified via 113bf215546Sopenharmony_ci * VK_EXT_subgroup_size_control. 114bf215546Sopenharmony_ci */ 115bf215546Sopenharmony_ci uint8_t compute_subgroup_size; 116bf215546Sopenharmony_ci bool require_full_subgroups; 117bf215546Sopenharmony_ci } cs; 118bf215546Sopenharmony_ci}; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_cistruct radv_nir_compiler_options { 121bf215546Sopenharmony_ci struct radv_pipeline_key key; 122bf215546Sopenharmony_ci bool robust_buffer_access; 123bf215546Sopenharmony_ci bool dump_shader; 124bf215546Sopenharmony_ci bool dump_preoptir; 125bf215546Sopenharmony_ci bool record_ir; 126bf215546Sopenharmony_ci bool record_stats; 127bf215546Sopenharmony_ci bool check_ir; 128bf215546Sopenharmony_ci bool has_ls_vgpr_init_bug; 129bf215546Sopenharmony_ci uint8_t enable_mrt_output_nan_fixup; 130bf215546Sopenharmony_ci bool wgp_mode; 131bf215546Sopenharmony_ci enum radeon_family family; 132bf215546Sopenharmony_ci enum amd_gfx_level gfx_level; 133bf215546Sopenharmony_ci uint32_t address32_hi; 134bf215546Sopenharmony_ci bool has_3d_cube_border_color_mipmap; 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci struct { 137bf215546Sopenharmony_ci void (*func)(void *private_data, enum aco_compiler_debug_level level, const char *message); 138bf215546Sopenharmony_ci void *private_data; 139bf215546Sopenharmony_ci } debug; 140bf215546Sopenharmony_ci}; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_cienum radv_ud_index { 143bf215546Sopenharmony_ci AC_UD_SCRATCH_RING_OFFSETS = 0, 144bf215546Sopenharmony_ci AC_UD_PUSH_CONSTANTS = 1, 145bf215546Sopenharmony_ci AC_UD_INLINE_PUSH_CONSTANTS = 2, 146bf215546Sopenharmony_ci AC_UD_INDIRECT_DESCRIPTOR_SETS = 3, 147bf215546Sopenharmony_ci AC_UD_VIEW_INDEX = 4, 148bf215546Sopenharmony_ci AC_UD_STREAMOUT_BUFFERS = 5, 149bf215546Sopenharmony_ci AC_UD_NGG_QUERY_STATE = 6, 150bf215546Sopenharmony_ci AC_UD_NGG_CULLING_SETTINGS = 7, 151bf215546Sopenharmony_ci AC_UD_NGG_VIEWPORT = 8, 152bf215546Sopenharmony_ci AC_UD_FORCE_VRS_RATES = 9, 153bf215546Sopenharmony_ci AC_UD_TASK_RING_ENTRY = 10, 154bf215546Sopenharmony_ci AC_UD_SHADER_START = 11, 155bf215546Sopenharmony_ci AC_UD_VS_VERTEX_BUFFERS = AC_UD_SHADER_START, 156bf215546Sopenharmony_ci AC_UD_VS_BASE_VERTEX_START_INSTANCE, 157bf215546Sopenharmony_ci AC_UD_VS_PROLOG_INPUTS, 158bf215546Sopenharmony_ci AC_UD_VS_MAX_UD, 159bf215546Sopenharmony_ci AC_UD_PS_EPILOG_PC, 160bf215546Sopenharmony_ci AC_UD_PS_MAX_UD, 161bf215546Sopenharmony_ci AC_UD_CS_GRID_SIZE = AC_UD_SHADER_START, 162bf215546Sopenharmony_ci AC_UD_CS_SBT_DESCRIPTORS, 163bf215546Sopenharmony_ci AC_UD_CS_RAY_LAUNCH_SIZE_ADDR, 164bf215546Sopenharmony_ci AC_UD_CS_TASK_RING_OFFSETS, 165bf215546Sopenharmony_ci AC_UD_CS_TASK_DRAW_ID, 166bf215546Sopenharmony_ci AC_UD_CS_TASK_IB, 167bf215546Sopenharmony_ci AC_UD_CS_MAX_UD, 168bf215546Sopenharmony_ci AC_UD_GS_MAX_UD, 169bf215546Sopenharmony_ci AC_UD_TCS_MAX_UD, 170bf215546Sopenharmony_ci AC_UD_TES_MAX_UD, 171bf215546Sopenharmony_ci AC_UD_MAX_UD = AC_UD_TCS_MAX_UD, 172bf215546Sopenharmony_ci}; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_cistruct radv_stream_output { 175bf215546Sopenharmony_ci uint8_t location; 176bf215546Sopenharmony_ci uint8_t buffer; 177bf215546Sopenharmony_ci uint16_t offset; 178bf215546Sopenharmony_ci uint8_t component_mask; 179bf215546Sopenharmony_ci uint8_t stream; 180bf215546Sopenharmony_ci}; 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_cistruct radv_streamout_info { 183bf215546Sopenharmony_ci uint16_t num_outputs; 184bf215546Sopenharmony_ci struct radv_stream_output outputs[MAX_SO_OUTPUTS]; 185bf215546Sopenharmony_ci uint16_t strides[MAX_SO_BUFFERS]; 186bf215546Sopenharmony_ci uint32_t enabled_stream_buffers_mask; 187bf215546Sopenharmony_ci}; 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_cistruct radv_userdata_info { 190bf215546Sopenharmony_ci int8_t sgpr_idx; 191bf215546Sopenharmony_ci uint8_t num_sgprs; 192bf215546Sopenharmony_ci}; 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_cistruct radv_userdata_locations { 195bf215546Sopenharmony_ci struct radv_userdata_info descriptor_sets[MAX_SETS]; 196bf215546Sopenharmony_ci struct radv_userdata_info shader_data[AC_UD_MAX_UD]; 197bf215546Sopenharmony_ci uint32_t descriptor_sets_enabled; 198bf215546Sopenharmony_ci}; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_cistruct radv_vs_output_info { 201bf215546Sopenharmony_ci uint8_t vs_output_param_offset[VARYING_SLOT_MAX]; 202bf215546Sopenharmony_ci uint8_t clip_dist_mask; 203bf215546Sopenharmony_ci uint8_t cull_dist_mask; 204bf215546Sopenharmony_ci uint8_t param_exports; 205bf215546Sopenharmony_ci uint8_t prim_param_exports; 206bf215546Sopenharmony_ci bool writes_pointsize; 207bf215546Sopenharmony_ci bool writes_layer; 208bf215546Sopenharmony_ci bool writes_layer_per_primitive; 209bf215546Sopenharmony_ci bool writes_viewport_index; 210bf215546Sopenharmony_ci bool writes_viewport_index_per_primitive; 211bf215546Sopenharmony_ci bool writes_primitive_shading_rate; 212bf215546Sopenharmony_ci bool writes_primitive_shading_rate_per_primitive; 213bf215546Sopenharmony_ci bool export_prim_id; 214bf215546Sopenharmony_ci bool export_clip_dists; 215bf215546Sopenharmony_ci unsigned pos_exports; 216bf215546Sopenharmony_ci}; 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_cistruct radv_es_output_info { 219bf215546Sopenharmony_ci uint32_t esgs_itemsize; 220bf215546Sopenharmony_ci}; 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_cistruct gfx9_gs_info { 223bf215546Sopenharmony_ci uint32_t vgt_gs_onchip_cntl; 224bf215546Sopenharmony_ci uint32_t vgt_gs_max_prims_per_subgroup; 225bf215546Sopenharmony_ci uint32_t vgt_esgs_ring_itemsize; 226bf215546Sopenharmony_ci uint32_t lds_size; 227bf215546Sopenharmony_ci}; 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_cistruct gfx10_ngg_info { 230bf215546Sopenharmony_ci uint16_t ngg_emit_size; /* in dwords */ 231bf215546Sopenharmony_ci uint32_t hw_max_esverts; 232bf215546Sopenharmony_ci uint32_t max_gsprims; 233bf215546Sopenharmony_ci uint32_t max_out_verts; 234bf215546Sopenharmony_ci uint32_t prim_amp_factor; 235bf215546Sopenharmony_ci uint32_t vgt_esgs_ring_itemsize; 236bf215546Sopenharmony_ci uint32_t esgs_ring_size; 237bf215546Sopenharmony_ci bool max_vert_out_per_gs_instance; 238bf215546Sopenharmony_ci bool enable_vertex_grouping; 239bf215546Sopenharmony_ci}; 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_cistruct radv_shader_info { 242bf215546Sopenharmony_ci uint64_t inline_push_constant_mask; 243bf215546Sopenharmony_ci bool can_inline_all_push_constants; 244bf215546Sopenharmony_ci bool loads_push_constants; 245bf215546Sopenharmony_ci bool loads_dynamic_offsets; 246bf215546Sopenharmony_ci uint32_t desc_set_used_mask; 247bf215546Sopenharmony_ci bool uses_view_index; 248bf215546Sopenharmony_ci bool uses_invocation_id; 249bf215546Sopenharmony_ci bool uses_prim_id; 250bf215546Sopenharmony_ci uint8_t wave_size; 251bf215546Sopenharmony_ci uint8_t ballot_bit_size; 252bf215546Sopenharmony_ci struct radv_userdata_locations user_sgprs_locs; 253bf215546Sopenharmony_ci bool is_ngg; 254bf215546Sopenharmony_ci bool is_ngg_passthrough; 255bf215546Sopenharmony_ci bool has_ngg_culling; 256bf215546Sopenharmony_ci bool has_ngg_early_prim_export; 257bf215546Sopenharmony_ci uint32_t num_lds_blocks_when_not_culling; 258bf215546Sopenharmony_ci uint32_t num_tess_patches; 259bf215546Sopenharmony_ci unsigned workgroup_size; 260bf215546Sopenharmony_ci bool force_vrs_per_vertex; 261bf215546Sopenharmony_ci struct { 262bf215546Sopenharmony_ci uint8_t input_usage_mask[RADV_VERT_ATTRIB_MAX]; 263bf215546Sopenharmony_ci uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 264bf215546Sopenharmony_ci bool needs_draw_id; 265bf215546Sopenharmony_ci bool needs_instance_id; 266bf215546Sopenharmony_ci struct radv_vs_output_info outinfo; 267bf215546Sopenharmony_ci struct radv_es_output_info es_info; 268bf215546Sopenharmony_ci bool as_es; 269bf215546Sopenharmony_ci bool as_ls; 270bf215546Sopenharmony_ci bool tcs_in_out_eq; 271bf215546Sopenharmony_ci uint64_t tcs_temp_only_input_mask; 272bf215546Sopenharmony_ci uint8_t num_linked_outputs; 273bf215546Sopenharmony_ci bool needs_base_instance; 274bf215546Sopenharmony_ci bool use_per_attribute_vb_descs; 275bf215546Sopenharmony_ci uint32_t vb_desc_usage_mask; 276bf215546Sopenharmony_ci bool has_prolog; 277bf215546Sopenharmony_ci bool dynamic_inputs; 278bf215546Sopenharmony_ci } vs; 279bf215546Sopenharmony_ci struct { 280bf215546Sopenharmony_ci uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 281bf215546Sopenharmony_ci uint8_t num_stream_output_components[4]; 282bf215546Sopenharmony_ci uint8_t output_streams[VARYING_SLOT_VAR31 + 1]; 283bf215546Sopenharmony_ci uint8_t max_stream; 284bf215546Sopenharmony_ci unsigned gsvs_vertex_size; 285bf215546Sopenharmony_ci unsigned max_gsvs_emit_size; 286bf215546Sopenharmony_ci unsigned vertices_in; 287bf215546Sopenharmony_ci unsigned vertices_out; 288bf215546Sopenharmony_ci unsigned output_prim; 289bf215546Sopenharmony_ci unsigned invocations; 290bf215546Sopenharmony_ci unsigned es_type; /* GFX9: VS or TES */ 291bf215546Sopenharmony_ci uint8_t num_linked_inputs; 292bf215546Sopenharmony_ci } gs; 293bf215546Sopenharmony_ci struct { 294bf215546Sopenharmony_ci uint8_t output_usage_mask[VARYING_SLOT_VAR31 + 1]; 295bf215546Sopenharmony_ci struct radv_vs_output_info outinfo; 296bf215546Sopenharmony_ci struct radv_es_output_info es_info; 297bf215546Sopenharmony_ci bool as_es; 298bf215546Sopenharmony_ci enum tess_primitive_mode _primitive_mode; 299bf215546Sopenharmony_ci enum gl_tess_spacing spacing; 300bf215546Sopenharmony_ci bool ccw; 301bf215546Sopenharmony_ci bool point_mode; 302bf215546Sopenharmony_ci uint8_t num_linked_inputs; 303bf215546Sopenharmony_ci uint8_t num_linked_patch_inputs; 304bf215546Sopenharmony_ci uint8_t num_linked_outputs; 305bf215546Sopenharmony_ci } tes; 306bf215546Sopenharmony_ci struct { 307bf215546Sopenharmony_ci bool uses_sample_shading; 308bf215546Sopenharmony_ci bool needs_sample_positions; 309bf215546Sopenharmony_ci bool writes_memory; 310bf215546Sopenharmony_ci bool writes_z; 311bf215546Sopenharmony_ci bool writes_stencil; 312bf215546Sopenharmony_ci bool writes_sample_mask; 313bf215546Sopenharmony_ci bool has_pcoord; 314bf215546Sopenharmony_ci bool prim_id_input; 315bf215546Sopenharmony_ci bool layer_input; 316bf215546Sopenharmony_ci bool viewport_index_input; 317bf215546Sopenharmony_ci uint8_t num_input_clips_culls; 318bf215546Sopenharmony_ci uint32_t input_mask; 319bf215546Sopenharmony_ci uint32_t input_per_primitive_mask; 320bf215546Sopenharmony_ci uint32_t flat_shaded_mask; 321bf215546Sopenharmony_ci uint32_t explicit_shaded_mask; 322bf215546Sopenharmony_ci uint32_t float16_shaded_mask; 323bf215546Sopenharmony_ci uint32_t num_interp; 324bf215546Sopenharmony_ci uint32_t num_prim_interp; 325bf215546Sopenharmony_ci bool can_discard; 326bf215546Sopenharmony_ci bool early_fragment_test; 327bf215546Sopenharmony_ci bool post_depth_coverage; 328bf215546Sopenharmony_ci bool reads_sample_mask_in; 329bf215546Sopenharmony_ci bool reads_front_face; 330bf215546Sopenharmony_ci bool reads_sample_id; 331bf215546Sopenharmony_ci bool reads_frag_shading_rate; 332bf215546Sopenharmony_ci bool reads_barycentric_model; 333bf215546Sopenharmony_ci bool reads_persp_sample; 334bf215546Sopenharmony_ci bool reads_persp_center; 335bf215546Sopenharmony_ci bool reads_persp_centroid; 336bf215546Sopenharmony_ci bool reads_linear_sample; 337bf215546Sopenharmony_ci bool reads_linear_center; 338bf215546Sopenharmony_ci bool reads_linear_centroid; 339bf215546Sopenharmony_ci uint8_t reads_frag_coord_mask; 340bf215546Sopenharmony_ci uint8_t reads_sample_pos_mask; 341bf215546Sopenharmony_ci uint8_t depth_layout; 342bf215546Sopenharmony_ci bool allow_flat_shading; 343bf215546Sopenharmony_ci bool has_epilog; 344bf215546Sopenharmony_ci unsigned spi_ps_input; 345bf215546Sopenharmony_ci } ps; 346bf215546Sopenharmony_ci struct { 347bf215546Sopenharmony_ci bool uses_grid_size; 348bf215546Sopenharmony_ci bool uses_block_id[3]; 349bf215546Sopenharmony_ci bool uses_thread_id[3]; 350bf215546Sopenharmony_ci bool uses_local_invocation_idx; 351bf215546Sopenharmony_ci unsigned block_size[3]; 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci uint8_t subgroup_size; 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci bool uses_sbt; 356bf215546Sopenharmony_ci bool uses_ray_launch_size; 357bf215546Sopenharmony_ci bool uses_task_rings; 358bf215546Sopenharmony_ci } cs; 359bf215546Sopenharmony_ci struct { 360bf215546Sopenharmony_ci uint64_t tes_inputs_read; 361bf215546Sopenharmony_ci uint64_t tes_patch_inputs_read; 362bf215546Sopenharmony_ci unsigned tcs_vertices_out; 363bf215546Sopenharmony_ci uint32_t num_lds_blocks; 364bf215546Sopenharmony_ci uint8_t num_linked_inputs; 365bf215546Sopenharmony_ci uint8_t num_linked_outputs; 366bf215546Sopenharmony_ci uint8_t num_linked_patch_outputs; 367bf215546Sopenharmony_ci bool tes_reads_tess_factors : 1; 368bf215546Sopenharmony_ci } tcs; 369bf215546Sopenharmony_ci struct { 370bf215546Sopenharmony_ci struct radv_vs_output_info outinfo; 371bf215546Sopenharmony_ci enum shader_prim output_prim; 372bf215546Sopenharmony_ci bool needs_ms_scratch_ring; 373bf215546Sopenharmony_ci } ms; 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci struct radv_streamout_info so; 376bf215546Sopenharmony_ci 377bf215546Sopenharmony_ci struct gfx9_gs_info gs_ring_info; 378bf215546Sopenharmony_ci struct gfx10_ngg_info ngg_info; 379bf215546Sopenharmony_ci}; 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_cistruct radv_vs_input_state { 382bf215546Sopenharmony_ci uint32_t attribute_mask; 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci uint32_t instance_rate_inputs; 385bf215546Sopenharmony_ci uint32_t nontrivial_divisors; 386bf215546Sopenharmony_ci uint32_t zero_divisors; 387bf215546Sopenharmony_ci uint32_t post_shuffle; 388bf215546Sopenharmony_ci /* Having two separate fields instead of a single uint64_t makes it easier to remove attributes 389bf215546Sopenharmony_ci * using bitwise arithmetic. 390bf215546Sopenharmony_ci */ 391bf215546Sopenharmony_ci uint32_t alpha_adjust_lo; 392bf215546Sopenharmony_ci uint32_t alpha_adjust_hi; 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci uint8_t bindings[MAX_VERTEX_ATTRIBS]; 395bf215546Sopenharmony_ci uint32_t divisors[MAX_VERTEX_ATTRIBS]; 396bf215546Sopenharmony_ci uint32_t offsets[MAX_VERTEX_ATTRIBS]; 397bf215546Sopenharmony_ci uint8_t formats[MAX_VERTEX_ATTRIBS]; 398bf215546Sopenharmony_ci uint8_t format_align_req_minus_1[MAX_VERTEX_ATTRIBS]; 399bf215546Sopenharmony_ci uint8_t format_sizes[MAX_VERTEX_ATTRIBS]; 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci bool bindings_match_attrib; 402bf215546Sopenharmony_ci}; 403bf215546Sopenharmony_ci 404bf215546Sopenharmony_cistruct radv_vs_prolog_key { 405bf215546Sopenharmony_ci const struct radv_vs_input_state *state; 406bf215546Sopenharmony_ci unsigned num_attributes; 407bf215546Sopenharmony_ci uint32_t misaligned_mask; 408bf215546Sopenharmony_ci bool as_ls; 409bf215546Sopenharmony_ci bool is_ngg; 410bf215546Sopenharmony_ci bool wave32; 411bf215546Sopenharmony_ci gl_shader_stage next_stage; 412bf215546Sopenharmony_ci}; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_cistruct radv_ps_epilog_key { 415bf215546Sopenharmony_ci uint32_t spi_shader_col_format; 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci /* Bitmasks, each bit represents one of the 8 MRTs. */ 418bf215546Sopenharmony_ci uint8_t color_is_int8; 419bf215546Sopenharmony_ci uint8_t color_is_int10; 420bf215546Sopenharmony_ci uint8_t enable_mrt_output_nan_fixup; 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_ci bool wave32; 423bf215546Sopenharmony_ci}; 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_cienum radv_shader_binary_type { RADV_BINARY_TYPE_LEGACY, RADV_BINARY_TYPE_RTLD }; 426bf215546Sopenharmony_ci 427bf215546Sopenharmony_cistruct radv_shader_binary { 428bf215546Sopenharmony_ci enum radv_shader_binary_type type; 429bf215546Sopenharmony_ci gl_shader_stage stage; 430bf215546Sopenharmony_ci bool is_gs_copy_shader; 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci struct ac_shader_config config; 433bf215546Sopenharmony_ci struct radv_shader_info info; 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci /* Self-referential size so we avoid consistency issues. */ 436bf215546Sopenharmony_ci uint32_t total_size; 437bf215546Sopenharmony_ci}; 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_cistruct radv_shader_binary_legacy { 440bf215546Sopenharmony_ci struct radv_shader_binary base; 441bf215546Sopenharmony_ci unsigned code_size; 442bf215546Sopenharmony_ci unsigned exec_size; 443bf215546Sopenharmony_ci unsigned ir_size; 444bf215546Sopenharmony_ci unsigned disasm_size; 445bf215546Sopenharmony_ci unsigned stats_size; 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci /* data has size of stats_size + code_size + ir_size + disasm_size + 2, 448bf215546Sopenharmony_ci * where the +2 is for 0 of the ir strings. */ 449bf215546Sopenharmony_ci uint8_t data[0]; 450bf215546Sopenharmony_ci}; 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_cistruct radv_shader_binary_rtld { 453bf215546Sopenharmony_ci struct radv_shader_binary base; 454bf215546Sopenharmony_ci unsigned elf_size; 455bf215546Sopenharmony_ci unsigned llvm_ir_size; 456bf215546Sopenharmony_ci uint8_t data[0]; 457bf215546Sopenharmony_ci}; 458bf215546Sopenharmony_ci 459bf215546Sopenharmony_cistruct radv_shader_part_binary { 460bf215546Sopenharmony_ci uint8_t num_sgprs; 461bf215546Sopenharmony_ci uint8_t num_vgprs; 462bf215546Sopenharmony_ci uint8_t num_preserved_sgprs; 463bf215546Sopenharmony_ci unsigned code_size; 464bf215546Sopenharmony_ci unsigned disasm_size; 465bf215546Sopenharmony_ci uint8_t data[0]; 466bf215546Sopenharmony_ci}; 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_cistruct radv_shader_arena { 469bf215546Sopenharmony_ci struct list_head list; 470bf215546Sopenharmony_ci struct list_head entries; 471bf215546Sopenharmony_ci struct radeon_winsys_bo *bo; 472bf215546Sopenharmony_ci char *ptr; 473bf215546Sopenharmony_ci}; 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ciunion radv_shader_arena_block { 476bf215546Sopenharmony_ci struct list_head pool; 477bf215546Sopenharmony_ci struct { 478bf215546Sopenharmony_ci /* List of blocks in the arena, sorted by address. */ 479bf215546Sopenharmony_ci struct list_head list; 480bf215546Sopenharmony_ci /* For holes, a list_head for the free-list. For allocations, freelist.prev=NULL and 481bf215546Sopenharmony_ci * freelist.next is a pointer associated with the allocation. 482bf215546Sopenharmony_ci */ 483bf215546Sopenharmony_ci struct list_head freelist; 484bf215546Sopenharmony_ci struct radv_shader_arena *arena; 485bf215546Sopenharmony_ci uint32_t offset; 486bf215546Sopenharmony_ci uint32_t size; 487bf215546Sopenharmony_ci }; 488bf215546Sopenharmony_ci}; 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_cistruct radv_shader { 491bf215546Sopenharmony_ci uint32_t ref_count; 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci uint64_t va; 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci struct ac_shader_config config; 496bf215546Sopenharmony_ci uint8_t *code_ptr; 497bf215546Sopenharmony_ci uint32_t code_size; 498bf215546Sopenharmony_ci uint32_t exec_size; 499bf215546Sopenharmony_ci struct radv_shader_info info; 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci /* debug only */ 502bf215546Sopenharmony_ci char *spirv; 503bf215546Sopenharmony_ci uint32_t spirv_size; 504bf215546Sopenharmony_ci char *nir_string; 505bf215546Sopenharmony_ci char *disasm_string; 506bf215546Sopenharmony_ci char *ir_string; 507bf215546Sopenharmony_ci uint32_t *statistics; 508bf215546Sopenharmony_ci}; 509bf215546Sopenharmony_ci 510bf215546Sopenharmony_cistruct radv_trap_handler_shader { 511bf215546Sopenharmony_ci struct radeon_winsys_bo *bo; 512bf215546Sopenharmony_ci union radv_shader_arena_block *alloc; 513bf215546Sopenharmony_ci}; 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_cistruct radv_shader_part { 516bf215546Sopenharmony_ci struct radeon_winsys_bo *bo; 517bf215546Sopenharmony_ci union radv_shader_arena_block *alloc; 518bf215546Sopenharmony_ci uint32_t rsrc1; 519bf215546Sopenharmony_ci uint8_t num_preserved_sgprs; 520bf215546Sopenharmony_ci bool nontrivial_divisors; 521bf215546Sopenharmony_ci 522bf215546Sopenharmony_ci /* debug only */ 523bf215546Sopenharmony_ci char *disasm_string; 524bf215546Sopenharmony_ci}; 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_cistruct radv_pipeline_layout; 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_civoid radv_optimize_nir(struct nir_shader *shader, bool optimize_conservatively, bool allow_copies); 529bf215546Sopenharmony_civoid radv_optimize_nir_algebraic(nir_shader *shader, bool opt_offsets); 530bf215546Sopenharmony_cibool radv_nir_lower_ycbcr_textures(nir_shader *shader, const struct radv_pipeline_layout *layout); 531bf215546Sopenharmony_ci 532bf215546Sopenharmony_cibool radv_nir_lower_ray_queries(nir_shader *shader, struct radv_device *device); 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_civoid radv_nir_apply_pipeline_layout(nir_shader *shader, struct radv_device *device, 535bf215546Sopenharmony_ci const struct radv_pipeline_layout *layout, 536bf215546Sopenharmony_ci const struct radv_shader_info *info, 537bf215546Sopenharmony_ci const struct radv_shader_args *args); 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_cistruct radv_pipeline_stage; 540bf215546Sopenharmony_ci 541bf215546Sopenharmony_cinir_shader *radv_shader_spirv_to_nir(struct radv_device *device, 542bf215546Sopenharmony_ci const struct radv_pipeline_stage *stage, 543bf215546Sopenharmony_ci const struct radv_pipeline_key *key); 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_civoid radv_nir_lower_abi(nir_shader *shader, enum amd_gfx_level gfx_level, 546bf215546Sopenharmony_ci const struct radv_shader_info *info, const struct radv_shader_args *args, 547bf215546Sopenharmony_ci const struct radv_pipeline_key *pl_key, bool use_llvm); 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_civoid radv_init_shader_arenas(struct radv_device *device); 550bf215546Sopenharmony_civoid radv_destroy_shader_arenas(struct radv_device *device); 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_cistruct radv_pipeline_shader_stack_size; 553bf215546Sopenharmony_ci 554bf215546Sopenharmony_ciVkResult radv_create_shaders(struct radv_pipeline *pipeline, 555bf215546Sopenharmony_ci struct radv_pipeline_layout *pipeline_layout, 556bf215546Sopenharmony_ci struct radv_device *device, struct radv_pipeline_cache *cache, 557bf215546Sopenharmony_ci const struct radv_pipeline_key *key, 558bf215546Sopenharmony_ci const VkPipelineShaderStageCreateInfo *pStages, 559bf215546Sopenharmony_ci uint32_t stageCount, 560bf215546Sopenharmony_ci const VkPipelineCreateFlags flags, const uint8_t *custom_hash, 561bf215546Sopenharmony_ci const VkPipelineCreationFeedbackCreateInfo *creation_feedback, 562bf215546Sopenharmony_ci struct radv_pipeline_shader_stack_size **stack_sizes, 563bf215546Sopenharmony_ci uint32_t *num_stack_sizes, 564bf215546Sopenharmony_ci gl_shader_stage *last_vgt_api_stage); 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_cistruct radv_shader_args; 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_cistruct radv_shader *radv_shader_create(struct radv_device *device, 569bf215546Sopenharmony_ci const struct radv_shader_binary *binary, 570bf215546Sopenharmony_ci bool keep_shader_info, bool from_cache, 571bf215546Sopenharmony_ci const struct radv_shader_args *args); 572bf215546Sopenharmony_cistruct radv_shader *radv_shader_nir_to_asm( 573bf215546Sopenharmony_ci struct radv_device *device, struct radv_pipeline_stage *stage, struct nir_shader *const *shaders, 574bf215546Sopenharmony_ci int shader_count, const struct radv_pipeline_key *key, bool keep_shader_info, bool keep_statistic_info, 575bf215546Sopenharmony_ci struct radv_shader_binary **binary_out); 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_cibool radv_shader_binary_upload(struct radv_device *device, const struct radv_shader_binary *binary, 578bf215546Sopenharmony_ci struct radv_shader *shader, void *dest_ptr); 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ciunion radv_shader_arena_block *radv_alloc_shader_memory(struct radv_device *device, uint32_t size, 581bf215546Sopenharmony_ci void *ptr); 582bf215546Sopenharmony_civoid radv_free_shader_memory(struct radv_device *device, union radv_shader_arena_block *alloc); 583bf215546Sopenharmony_ci 584bf215546Sopenharmony_cistruct radv_shader * 585bf215546Sopenharmony_ciradv_create_gs_copy_shader(struct radv_device *device, struct nir_shader *nir, 586bf215546Sopenharmony_ci struct radv_shader_info *info, const struct radv_shader_args *args, 587bf215546Sopenharmony_ci struct radv_shader_binary **binary_out, 588bf215546Sopenharmony_ci bool keep_shader_info, bool keep_statistic_info, 589bf215546Sopenharmony_ci bool disable_optimizations); 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_cistruct radv_trap_handler_shader *radv_create_trap_handler_shader(struct radv_device *device); 592bf215546Sopenharmony_ciuint64_t radv_trap_handler_shader_get_va(const struct radv_trap_handler_shader *trap); 593bf215546Sopenharmony_civoid radv_trap_handler_shader_destroy(struct radv_device *device, 594bf215546Sopenharmony_ci struct radv_trap_handler_shader *trap); 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_cistruct radv_shader_part *radv_create_vs_prolog(struct radv_device *device, 597bf215546Sopenharmony_ci const struct radv_vs_prolog_key *key); 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_cistruct radv_shader_part *radv_create_ps_epilog(struct radv_device *device, 600bf215546Sopenharmony_ci const struct radv_ps_epilog_key *key); 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_civoid radv_shader_destroy(struct radv_device *device, struct radv_shader *shader); 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_civoid radv_shader_part_destroy(struct radv_device *device, struct radv_shader_part *shader_part); 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_ciuint64_t radv_shader_get_va(const struct radv_shader *shader); 607bf215546Sopenharmony_cistruct radv_shader *radv_find_shader(struct radv_device *device, uint64_t pc); 608bf215546Sopenharmony_ci 609bf215546Sopenharmony_ciunsigned radv_get_max_waves(const struct radv_device *device, struct radv_shader *shader, 610bf215546Sopenharmony_ci gl_shader_stage stage); 611bf215546Sopenharmony_ci 612bf215546Sopenharmony_ciconst char *radv_get_shader_name(const struct radv_shader_info *info, gl_shader_stage stage); 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ciunsigned radv_compute_spi_ps_input(const struct radv_pipeline_key *pipeline_key, 615bf215546Sopenharmony_ci const struct radv_shader_info *info); 616bf215546Sopenharmony_ci 617bf215546Sopenharmony_cibool radv_can_dump_shader(struct radv_device *device, nir_shader *nir, bool meta_shader); 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_cibool radv_can_dump_shader_stats(struct radv_device *device, nir_shader *nir); 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ciVkResult radv_dump_shader_stats(struct radv_device *device, struct radv_pipeline *pipeline, 622bf215546Sopenharmony_ci gl_shader_stage stage, FILE *output); 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_cistatic inline unsigned 625bf215546Sopenharmony_cicalculate_tess_lds_size(enum amd_gfx_level gfx_level, unsigned tcs_num_input_vertices, 626bf215546Sopenharmony_ci unsigned tcs_num_output_vertices, unsigned tcs_num_inputs, 627bf215546Sopenharmony_ci unsigned tcs_num_patches, unsigned tcs_num_outputs, 628bf215546Sopenharmony_ci unsigned tcs_num_patch_outputs) 629bf215546Sopenharmony_ci{ 630bf215546Sopenharmony_ci unsigned input_vertex_size = tcs_num_inputs * 16; 631bf215546Sopenharmony_ci unsigned output_vertex_size = tcs_num_outputs * 16; 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci unsigned input_patch_size = tcs_num_input_vertices * input_vertex_size; 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci unsigned pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size; 636bf215546Sopenharmony_ci unsigned output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16; 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci unsigned output_patch0_offset = input_patch_size * tcs_num_patches; 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci unsigned lds_size = output_patch0_offset + output_patch_size * tcs_num_patches; 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci if (gfx_level >= GFX7) { 643bf215546Sopenharmony_ci assert(lds_size <= 65536); 644bf215546Sopenharmony_ci lds_size = align(lds_size, 512) / 512; 645bf215546Sopenharmony_ci } else { 646bf215546Sopenharmony_ci assert(lds_size <= 32768); 647bf215546Sopenharmony_ci lds_size = align(lds_size, 256) / 256; 648bf215546Sopenharmony_ci } 649bf215546Sopenharmony_ci 650bf215546Sopenharmony_ci return lds_size; 651bf215546Sopenharmony_ci} 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_cistatic inline unsigned 654bf215546Sopenharmony_ciget_tcs_num_patches(unsigned tcs_num_input_vertices, unsigned tcs_num_output_vertices, 655bf215546Sopenharmony_ci unsigned tcs_num_inputs, unsigned tcs_num_outputs, 656bf215546Sopenharmony_ci unsigned tcs_num_patch_outputs, unsigned tess_offchip_block_dw_size, 657bf215546Sopenharmony_ci enum amd_gfx_level gfx_level, enum radeon_family family) 658bf215546Sopenharmony_ci{ 659bf215546Sopenharmony_ci uint32_t input_vertex_size = tcs_num_inputs * 16; 660bf215546Sopenharmony_ci uint32_t input_patch_size = tcs_num_input_vertices * input_vertex_size; 661bf215546Sopenharmony_ci uint32_t output_vertex_size = tcs_num_outputs * 16; 662bf215546Sopenharmony_ci uint32_t pervertex_output_patch_size = tcs_num_output_vertices * output_vertex_size; 663bf215546Sopenharmony_ci uint32_t output_patch_size = pervertex_output_patch_size + tcs_num_patch_outputs * 16; 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci /* Ensure that we only need one wave per SIMD so we don't need to check 666bf215546Sopenharmony_ci * resource usage. Also ensures that the number of tcs in and out 667bf215546Sopenharmony_ci * vertices per threadgroup are at most 256. 668bf215546Sopenharmony_ci */ 669bf215546Sopenharmony_ci unsigned num_patches = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices) * 4; 670bf215546Sopenharmony_ci /* Make sure that the data fits in LDS. This assumes the shaders only 671bf215546Sopenharmony_ci * use LDS for the inputs and outputs. 672bf215546Sopenharmony_ci */ 673bf215546Sopenharmony_ci unsigned hardware_lds_size = 32768; 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_ci /* Looks like STONEY hangs if we use more than 32 KiB LDS in a single 676bf215546Sopenharmony_ci * threadgroup, even though there is more than 32 KiB LDS. 677bf215546Sopenharmony_ci * 678bf215546Sopenharmony_ci * Test: dEQP-VK.tessellation.shader_input_output.barrier 679bf215546Sopenharmony_ci */ 680bf215546Sopenharmony_ci if (gfx_level >= GFX7 && family != CHIP_STONEY) 681bf215546Sopenharmony_ci hardware_lds_size = 65536; 682bf215546Sopenharmony_ci 683bf215546Sopenharmony_ci if (input_patch_size + output_patch_size) 684bf215546Sopenharmony_ci num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size)); 685bf215546Sopenharmony_ci /* Make sure the output data fits in the offchip buffer */ 686bf215546Sopenharmony_ci if (output_patch_size) 687bf215546Sopenharmony_ci num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size); 688bf215546Sopenharmony_ci /* Not necessary for correctness, but improves performance. The 689bf215546Sopenharmony_ci * specific value is taken from the proprietary driver. 690bf215546Sopenharmony_ci */ 691bf215546Sopenharmony_ci num_patches = MIN2(num_patches, 40); 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci /* GFX6 bug workaround - limit LS-HS threadgroups to only one wave. */ 694bf215546Sopenharmony_ci if (gfx_level == GFX6) { 695bf215546Sopenharmony_ci unsigned one_wave = 64 / MAX2(tcs_num_input_vertices, tcs_num_output_vertices); 696bf215546Sopenharmony_ci num_patches = MIN2(num_patches, one_wave); 697bf215546Sopenharmony_ci } 698bf215546Sopenharmony_ci return num_patches; 699bf215546Sopenharmony_ci} 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_civoid radv_lower_io(struct radv_device *device, nir_shader *nir, bool is_mesh_shading); 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_cibool radv_lower_io_to_mem(struct radv_device *device, struct radv_pipeline_stage *stage, 704bf215546Sopenharmony_ci const struct radv_pipeline_key *pl_key); 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_civoid radv_lower_ngg(struct radv_device *device, struct radv_pipeline_stage *ngg_stage, 707bf215546Sopenharmony_ci const struct radv_pipeline_key *pl_key); 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_cibool radv_consider_culling(const struct radv_physical_device *pdevice, struct nir_shader *nir, 710bf215546Sopenharmony_ci uint64_t ps_inputs_read, unsigned num_vertices_per_primitive, 711bf215546Sopenharmony_ci const struct radv_shader_info *info); 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_civoid radv_get_nir_options(struct radv_physical_device *device); 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_cibool radv_force_primitive_shading_rate(nir_shader *nir, struct radv_device *device); 716bf215546Sopenharmony_ci 717bf215546Sopenharmony_cibool radv_lower_fs_intrinsics(nir_shader *nir, const struct radv_pipeline_stage *fs_stage, 718bf215546Sopenharmony_ci const struct radv_pipeline_key *key); 719bf215546Sopenharmony_ci 720bf215546Sopenharmony_ci#endif 721