Lines Matching defs:shader
44 static void si_dump_shader_key(const struct si_shader *shader, FILE *f);
58 /** Whether the shader runs as a combination of multiple API shaders */
59 bool si_is_multi_part_shader(struct si_shader *shader)
61 if (shader->selector->screen->info.gfx_level <= GFX8 ||
62 shader->selector->stage > MESA_SHADER_GEOMETRY)
65 return shader->key.ge.as_ls || shader->key.ge.as_es ||
66 shader->selector->stage == MESA_SHADER_TESS_CTRL ||
67 shader->selector->stage == MESA_SHADER_GEOMETRY;
70 /** Whether the shader runs on a merged HW stage (LSHS or ESGS) */
71 bool si_is_merged_shader(struct si_shader *shader)
73 if (shader->selector->stage > MESA_SHADER_GEOMETRY)
76 return shader->key.ge.as_ngg || si_is_multi_part_shader(shader);
111 /* Since some shader stages use the highest used IO index
225 unsigned si_get_max_workgroup_size(const struct si_shader *shader)
227 switch (shader->selector->stage) {
230 return shader->key.ge.as_ngg ? 128 : 0;
235 return shader->selector->screen->info.gfx_level >= GFX7 ? 128 : 0;
238 return shader->selector->screen->info.gfx_level >= GFX9 ? 128 : 0;
248 if (shader->selector->info.base.workgroup_size_variable)
251 uint16_t *local_size = shader->selector->info.base.workgroup_size;
263 if (ctx->shader->selector->info.base.num_ubos == 1 &&
264 ctx->shader->selector->info.base.num_ssbos == 0)
297 unsigned num_vbos_in_user_sgprs = ctx->shader->selector->info.num_vbos_in_user_sgprs;
301 if (si_is_merged_shader(ctx->shader))
317 struct si_shader *shader = ctx->shader;
320 if (shader->key.ge.as_ls) {
345 if (!shader->is_gs_copy_shader) {
347 if (shader->selector->info.num_inputs) {
349 for (unsigned i = 1; i < shader->selector->info.num_inputs; i++)
352 *num_prolog_vgprs += shader->selector->info.num_inputs;
387 /* Convenient merged shader definitions. */
401 struct si_shader *shader = ctx->shader;
410 if (shader->key.ge.as_ls || stage == MESA_SHADER_TESS_CTRL)
412 else if (shader->key.ge.as_es || shader->key.ge.as_ngg || stage == MESA_SHADER_GEOMETRY)
420 if (shader->selector->info.base.vs.blit_sgprs_amd) {
421 declare_vs_blit_inputs(ctx, shader->selector->info.base.vs.blit_sgprs_amd);
431 if (ctx->shader->is_gs_copy_shader) {
443 if (shader->key.ge.as_es) {
445 } else if (shader->key.ge.as_ls) {
513 /* LS return values are inputs to the TCS main shader part. */
520 if (shader->key.ge.opt.same_patch_vertices) {
521 unsigned num_outputs = util_last_bit64(shader->selector->info.outputs_written);
527 if (shader->key.ge.opt.same_patch_vertices) {
528 unsigned num_inputs = util_last_bit64(shader->previous_stage_sel->info.outputs_written);
552 if (ctx->shader->key.ge.as_ngg)
567 if (ctx->stage != MESA_SHADER_VERTEX || !shader->selector->info.base.vs.blit_sgprs_amd) {
572 if (ctx->stage == MESA_SHADER_VERTEX && shader->selector->info.base.vs.blit_sgprs_amd) {
573 declare_vs_blit_inputs(ctx, shader->selector->info.base.vs.blit_sgprs_amd);
615 if ((ctx->shader->key.ge.as_es || ngg_cull_shader) &&
620 /* For the NGG cull shader, add 1 SGPR to hold
625 if (shader->selector->info.num_vbos_in_user_sgprs) {
628 SI_SGPR_VS_VB_DESCRIPTOR_FIRST + shader->selector->info.num_vbos_in_user_sgprs * 4;
634 /* The NGG cull shader has to return all 9 VGPRs.
636 * The normal merged ESGS shader only has to return the 5 VGPRs
656 if (shader->key.ge.as_es) {
715 shader->info.face_vgpr_index = ctx->args.num_vgprs_used;
718 shader->info.ancillary_vgpr_index = ctx->args.num_vgprs_used;
721 shader->info.sample_coverage_vgpr_index = ctx->args.num_vgprs_used;
728 if (shader->selector->info.colors_read) {
729 unsigned num_color_elements = util_bitcount(shader->selector->info.colors_read);
739 num_returns = num_return_sgprs + util_bitcount(shader->selector->info.colors_written) * 4 +
740 shader->selector->info.writes_z + shader->selector->info.writes_stencil +
741 shader->selector->info.writes_samplemask + 1 /* SampleMaskIn */;
752 if (shader->selector->info.uses_grid_size)
754 if (shader->selector->info.uses_variable_block_size)
758 shader->selector->info.base.cs.user_data_components_amd;
765 for (unsigned i = 0; i < shader->selector->cs_num_shaderbufs_in_user_sgprs; i++) {
772 for (unsigned i = 0; i < shader->selector->cs_num_images_in_user_sgprs; i++) {
773 unsigned num_sgprs = BITSET_TEST(shader->selector->info.base.image_buffers, i) ? 4 : 8;
783 if (shader->selector->info.uses_block_id[i]) {
787 if (shader->selector->info.uses_subgroup_info)
799 assert(0 && "unimplemented shader");
803 shader->info.num_input_sgprs = ctx->args.num_sgprs_used;
804 shader->info.num_input_vgprs = ctx->args.num_vgprs_used;
806 assert(shader->info.num_input_vgprs >= num_prolog_vgprs);
807 shader->info.num_input_vgprs -= num_prolog_vgprs;
820 static bool si_shader_binary_open(struct si_screen *screen, struct si_shader *shader,
823 const struct si_shader_selector *sel = shader->selector;
835 add_part(shader->prolog);
836 add_part(shader->previous_stage);
837 add_part(shader);
838 add_part(shader->epilog);
845 if (sel && screen->info.gfx_level >= GFX9 && !shader->is_gs_copy_shader &&
847 (sel->stage <= MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg))) {
850 sym->size = shader->gs_info.esgs_ring_size * 4;
854 if (sel->stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg) {
857 sym->size = shader->ngg.ngg_emit_size * 4;
868 .wave_size = shader->wave_size,
877 shader->config.lds_size = DIV_ROUND_UP(rtld->lds_size, alloc_granularity);
883 static unsigned si_get_shader_binary_size(struct si_screen *screen, struct si_shader *shader)
886 si_shader_binary_open(screen, shader, &rtld);
915 bool si_shader_binary_upload(struct si_screen *sscreen, struct si_shader *shader,
919 if (!si_shader_binary_open(sscreen, shader, &binary))
922 si_resource_reference(&shader->bo, NULL);
923 shader->bo = si_aligned_buffer_create(
928 if (!shader->bo)
936 u.rx_va = shader->bo->gpu_address;
938 shader->bo->buf, NULL,
947 shader->binary.uploaded_code_size = size;
948 shader->binary.uploaded_code = malloc(size);
949 memcpy(shader->binary.uploaded_code, u.rx_ptr, size);
952 sscreen->ws->buffer_unmap(sscreen->ws, shader->bo->buf);
1018 static void si_calculate_max_simd_waves(struct si_shader *shader)
1020 struct si_screen *sscreen = shader->selector->screen;
1021 struct ac_shader_config *conf = &shader->config;
1022 unsigned num_inputs = shader->selector->info.num_inputs;
1023 unsigned lds_increment = get_lds_granularity(sscreen, shader->selector->stage);
1030 switch (shader->selector->stage) {
1045 unsigned max_workgroup_size = si_get_max_workgroup_size(shader);
1047 DIV_ROUND_UP(max_workgroup_size, shader->wave_size);
1061 * Wave32 and Wave64 with shader-db fairly. */
1070 shader->info.max_simd_waves = max_simd_waves;
1073 void si_shader_dump_stats_for_shader_db(struct si_screen *screen, struct si_shader *shader,
1076 const struct ac_shader_config *conf = &shader->config;
1080 si_shader_dump_disassembly(screen, &shader->binary, shader->selector->stage,
1081 shader->wave_size, debug, "main", NULL);
1088 conf->num_sgprs, conf->num_vgprs, si_get_shader_binary_size(screen, shader),
1089 conf->lds_size, conf->scratch_bytes_per_wave, shader->info.max_simd_waves,
1090 conf->spilled_sgprs, conf->spilled_vgprs, shader->info.private_mem_vgprs,
1091 shader->selector->info.has_divergent_loop,
1092 shader->selector->info.base.num_inlinable_uniforms,
1093 shader->info.nr_param_exports,
1094 stages[shader->selector->stage], shader->wave_size);
1097 static void si_shader_dump_stats(struct si_screen *sscreen, struct si_shader *shader, FILE *file,
1100 const struct ac_shader_config *conf = &shader->config;
1102 if (!check_debug_option || si_can_dump_shader(sscreen, shader->selector->stage)) {
1103 if (shader->selector->stage == MESA_SHADER_FRAGMENT) {
1124 shader->info.private_mem_vgprs, si_get_shader_binary_size(sscreen, shader),
1125 conf->lds_size * get_lds_granularity(sscreen, shader->selector->stage),
1126 conf->scratch_bytes_per_wave, shader->info.max_simd_waves);
1130 const char *si_get_shader_name(const struct si_shader *shader)
1132 switch (shader->selector->stage) {
1134 if (shader->key.ge.as_es)
1136 else if (shader->key.ge.as_ls)
1138 else if (shader->key.ge.as_ngg)
1145 if (shader->key.ge.as_es)
1147 else if (shader->key.ge.as_ngg)
1152 if (shader->is_gs_copy_shader)
1165 void si_shader_dump(struct si_screen *sscreen, struct si_shader *shader,
1168 gl_shader_stage stage = shader->selector->stage;
1171 si_dump_shader_key(shader, file);
1173 if (!check_debug_option && shader->binary.llvm_ir_string) {
1174 if (shader->previous_stage && shader->previous_stage->binary.llvm_ir_string) {
1175 fprintf(file, "\n%s - previous stage - LLVM IR:\n\n", si_get_shader_name(shader));
1176 fprintf(file, "%s\n", shader->previous_stage->binary.llvm_ir_string);
1179 fprintf(file, "\n%s - main shader part - LLVM IR:\n\n", si_get_shader_name(shader));
1180 fprintf(file, "%s\n", shader->binary.llvm_ir_string);
1186 fprintf(file, "\n%s:\n", si_get_shader_name(shader));
1188 if (shader->prolog)
1189 si_shader_dump_disassembly(sscreen, &shader->prolog->binary, stage, shader->wave_size, debug,
1191 if (shader->previous_stage)
1192 si_shader_dump_disassembly(sscreen, &shader->previous_stage->binary, stage,
1193 shader->wave_size, debug, "previous stage", file);
1194 si_shader_dump_disassembly(sscreen, &shader->binary, stage, shader->wave_size, debug, "main",
1197 if (shader->epilog)
1198 si_shader_dump_disassembly(sscreen, &shader->epilog->binary, stage, shader->wave_size, debug,
1203 si_shader_dump_stats(sscreen, shader, file, check_debug_option);
1230 static void si_dump_shader_key(const struct si_shader *shader, FILE *f)
1232 const union si_shader_key *key = &shader->key;
1233 gl_shader_stage stage = shader->selector->stage;
1237 _mesa_sha1_print(f, shader->selector->info.base.source_sha1);
1250 if (shader->selector->screen->info.gfx_level >= GFX9) {
1265 if (shader->is_gs_copy_shader)
1268 if (shader->selector->screen->info.gfx_level >= GFX9 &&
1375 * build the VS prolog function, and set shader->info bits where needed.
1377 * \param info Shader info of the vertex shader.
1378 * \param num_input_sgprs Number of input SGPRs for the vertex shader.
1379 * \param has_old_ Whether the preceding shader part is the NGG cull shader.
1381 * \param shader_out The vertex shader, or the next shader if merging LS+HS or ES+GS.
1382 * \param key Output shader part key.
1515 static bool si_lower_io_to_mem(struct si_shader *shader, nir_shader *nir,
1518 struct si_shader_selector *sel = shader->selector;
1519 const union si_shader_key *key = &shader->key;
1540 shader->wave_size,
1575 struct nir_shader *si_get_nir_shader(struct si_shader *shader, bool *free_nir,
1578 struct si_shader_selector *sel = shader->selector;
1579 const union si_shader_key *key = &shader->key;
1597 /* Modify the shader's name so that each variant gets its own name. */
1606 /* Kill outputs according to the shader key. */
1618 /* Most places use shader information from the default variant, not
1623 * from the shader code:
1632 * TODO: These are things the driver ignores in the final shader code
1633 * and relies on the default shader info.
1644 * on current states, so we don't care about the shader code.
1651 * don't have shader variants.
1653 * TODO: The driver uses a linear search to find a shader variant. This
1673 * get turned into PC-relative loads from a data section next to the shader.
1690 bool opt_offsets = si_lower_io_to_mem(shader, nir, tcs_vgpr_only_inputs);
1722 void si_update_shader_binary_info(struct si_shader *shader, nir_shader *nir)
1725 si_nir_scan_shader(shader->selector->screen, nir, &info);
1727 shader->info.uses_vmem_load_other |= info.uses_vmem_load_other;
1728 shader->info.uses_vmem_sampler_or_bvh |= info.uses_vmem_sampler_or_bvh;
1778 struct si_shader *shader, struct util_debug_callback *debug)
1780 struct si_shader_selector *sel = shader->selector;
1782 struct nir_shader *nir = si_get_nir_shader(shader, &free_nir, 0);
1787 (sel->stage == MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg)) &&
1788 !shader->key.ge.as_ls && !shader->key.ge.as_es) {
1790 shader->info.nr_param_exports = 0;
1791 shader->info.vs_output_param_mask = 0;
1793 STATIC_ASSERT(sizeof(shader->info.vs_output_param_offset[0]) == 1);
1794 memset(shader->info.vs_output_param_offset, AC_EXP_PARAM_DEFAULT_VAL_0000,
1795 sizeof(shader->info.vs_output_param_offset));
1797 /* A slot remapping table for duplicated outputs, so that 1 vertex shader output can be
1798 * mapped to multiple fragment shader inputs.
1806 shader->info.vs_output_param_offset);
1809 /* TODO: Use this for the GS copy shader too. */
1810 si_nir_assign_param_offsets(nir, &sel->info, slot_remap, &shader->info.nr_param_exports,
1811 &shader->info.vs_output_param_mask,
1812 shader->info.vs_output_param_offset);
1814 if (shader->key.ge.mono.u.vs_export_prim_id) {
1815 shader->info.vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = shader->info.nr_param_exports++;
1816 shader->info.vs_output_param_mask |= BITFIELD64_BIT(sel->info.num_outputs);
1821 if (si_shader_uses_streamout(shader))
1833 for (unsigned i = 0; i < ARRAY_SIZE(shader->info.vs_output_ps_input_cntl); i++)
1834 shader->info.vs_output_ps_input_cntl[i] = SI_PS_INPUT_CNTL_UNUSED;
1835 shader->info.vs_output_ps_input_cntl[VARYING_SLOT_COL0] = SI_PS_INPUT_CNTL_UNUSED_COLOR0;
1837 si_update_shader_binary_info(shader, nir);
1839 shader->info.uses_instanceid = sel->info.uses_instanceid;
1840 shader->info.private_mem_vgprs = DIV_ROUND_UP(nir->scratch_size, 4);
1873 if (!si_llvm_compile_shader(sscreen, compiler, shader, &so, debug, nir, free_nir))
1876 shader->config.float_mode = float_mode;
1878 /* The GS copy shader is compiled next. */
1879 if (sel->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg) {
1880 shader->gs_copy_shader = si_generate_gs_copy_shader(sscreen, compiler, sel, &so, debug);
1881 if (!shader->gs_copy_shader) {
1882 fprintf(stderr, "radeonsi: can't create GS copy shader\n");
1891 !shader->key.ge.as_ls && !shader->key.ge.as_es) {
1892 ubyte *vs_output_param_offset = shader->info.vs_output_param_offset;
1894 if (sel->stage == MESA_SHADER_GEOMETRY && !shader->key.ge.as_ngg)
1895 vs_output_param_offset = shader->gs_copy_shader->info.vs_output_param_offset;
1897 /* We must use the original shader info before the removal of duplicated shader outputs. */
1900 shader->key.ge.mono.u.vs_export_prim_id;
1921 shader->info.vs_output_ps_input_cntl[semantic] = ps_input_cntl;
1928 sscreen->info.num_physical_wave64_vgprs_per_simd * (shader->wave_size == 32 ? 2 : 1);
1932 unsigned threads_per_tg = si_get_max_workgroup_size(shader);
1933 unsigned waves_per_tg = DIV_ROUND_UP(threads_per_tg, shader->wave_size);
1939 if (shader->config.num_sgprs > max_sgprs || shader->config.num_vgprs > max_vgprs) {
1941 "LLVM failed to compile a shader correctly: "
1943 shader->config.num_sgprs, shader->config.num_vgprs, max_sgprs, max_vgprs);
1947 * the env var to allow shader-db to work.
1956 shader->config.scratch_bytes_per_wave && !si_is_merged_shader(shader))
1957 shader->info.num_input_sgprs += 1; /* scratch byte offset */
1961 shader->info.num_input_vgprs = ac_get_fs_input_vgpr_cnt(
1962 &shader->config, &shader->info.face_vgpr_index, &shader->info.ancillary_vgpr_index,
1963 &shader->info.sample_coverage_vgpr_index);
1966 si_calculate_max_simd_waves(shader);
1967 si_shader_dump_stats_for_shader_db(sscreen, shader, debug);
1972 * Create, compile and return a shader part (prolog or epilog).
1975 * \param list list of shader parts of the same category
1976 * \param type shader type
1977 * \param key shader part key
2010 struct si_shader shader = {};
2011 shader.selector = &sel;
2016 shader.key.ge.as_ls = key->vs_prolog.as_ls;
2017 shader.key.ge.as_es = key->vs_prolog.as_es;
2018 shader.key.ge.as_ngg = key->vs_prolog.as_ngg;
2023 shader.key.ge.part.tcs.epilog = key->tcs_epilog.states;
2028 shader.key.ps.part.prolog = key->ps_prolog.states;
2031 shader.key.ps.part.epilog = key->ps_epilog.states;
2036 unreachable("bad shader part");
2042 ctx.shader = &shader;
2067 struct si_shader *shader, struct util_debug_callback *debug,
2072 if (!si_vs_needs_prolog(vs, key, &shader->key, false,
2073 shader->selector->stage == MESA_SHADER_GEOMETRY))
2078 si_get_vs_prolog_key(&vs->info, main_part->info.num_input_sgprs, false, key, shader,
2081 shader->prolog =
2084 return shader->prolog != NULL;
2088 * Select and compile (or reuse) vertex shader parts (prolog & epilog).
2091 struct si_shader *shader, struct util_debug_callback *debug)
2093 return si_get_vs_prolog(sscreen, compiler, shader, debug, shader, &shader->key.ge.part.vs.prolog);
2096 void si_get_tcs_epilog_key(struct si_shader *shader, union si_shader_part_key *key)
2099 key->tcs_epilog.wave32 = shader->wave_size == 32;
2100 key->tcs_epilog.states = shader->key.ge.part.tcs.epilog;
2104 shader->wave_size % shader->selector->info.base.tess.tcs_vertices_out == 0;
2111 struct si_shader *shader, struct util_debug_callback *debug)
2114 struct si_shader *ls_main_part = shader->key.ge.part.tcs.ls->main_shader_part_ls;
2116 if (!si_get_vs_prolog(sscreen, compiler, shader, debug, ls_main_part,
2117 &shader->key.ge.part.tcs.ls_prolog))
2120 shader->previous_stage = ls_main_part;
2125 si_get_tcs_epilog_key(shader, &epilog_key);
2127 shader->epilog = si_get_shader_part(sscreen, &sscreen->tcs_epilogs, MESA_SHADER_TESS_CTRL, false,
2130 return shader->epilog != NULL;
2137 struct si_shader *shader, struct util_debug_callback *debug)
2142 if (shader->key.ge.as_ngg)
2143 es_main_part = shader->key.ge.part.gs.es->main_shader_part_ngg_es;
2145 es_main_part = shader->key.ge.part.gs.es->main_shader_part_es;
2147 if (shader->key.ge.part.gs.es->stage == MESA_SHADER_VERTEX &&
2148 !si_get_vs_prolog(sscreen, compiler, shader, debug, es_main_part,
2149 &shader->key.ge.part.gs.vs_prolog))
2152 shader->previous_stage = es_main_part;
2160 * build the PS prolog function, and set related bits in shader->config.
2162 void si_get_ps_prolog_key(struct si_shader *shader, union si_shader_part_key *key,
2165 struct si_shader_info *info = &shader->selector->info;
2168 key->ps_prolog.states = shader->key.ps.part.prolog;
2169 key->ps_prolog.wave32 = shader->wave_size == 32;
2171 key->ps_prolog.num_input_sgprs = shader->info.num_input_sgprs;
2172 key->ps_prolog.num_input_vgprs = shader->info.num_input_vgprs;
2180 key->ps_prolog.ancillary_vgpr_index = shader->info.ancillary_vgpr_index;
2181 key->ps_prolog.sample_coverage_vgpr_index = shader->info.sample_coverage_vgpr_index;
2183 if (shader->key.ps.part.prolog.poly_stipple)
2184 shader->info.uses_vmem_load_other = true;
2187 ubyte *color = shader->selector->info.color_attr_index;
2189 if (shader->key.ps.part.prolog.color_two_side) {
2192 key->ps_prolog.face_vgpr_index = shader->info.face_vgpr_index;
2194 shader->config.spi_ps_input_ena |= S_0286CC_FRONT_FACE_ENA(1);
2206 if (shader->key.ps.part.prolog.flatshade_colors && interp == INTERP_MODE_COLOR)
2216 if (shader->key.ps.part.prolog.force_persp_sample_interp)
2218 if (shader->key.ps.part.prolog.force_persp_center_interp)
2225 shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
2231 shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
2237 shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTROID_ENA(1);
2246 if (shader->key.ps.part.prolog.force_linear_sample_interp)
2248 if (shader->key.ps.part.prolog.force_linear_center_interp)
2253 * main shader and PERSP_PULL_MODEL is never used.
2259 shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
2265 shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
2271 shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTROID_ENA(1);
2303 void si_get_ps_epilog_key(struct si_shader *shader, union si_shader_part_key *key)
2305 struct si_shader_info *info = &shader->selector->info;
2307 key->ps_epilog.wave32 = shader->wave_size == 32;
2308 key->ps_epilog.uses_discard = si_shader_uses_discard(shader);
2314 key->ps_epilog.states = shader->key.ps.part.epilog;
2318 * Select and compile (or reuse) pixel shader parts (prolog & epilog).
2321 struct si_shader *shader, struct util_debug_callback *debug)
2327 si_get_ps_prolog_key(shader, &prolog_key, true);
2331 shader->prolog =
2334 if (!shader->prolog)
2339 si_get_ps_epilog_key(shader, &epilog_key);
2341 shader->epilog =
2344 if (!shader->epilog)
2348 if (shader->key.ps.part.prolog.poly_stipple) {
2349 shader->config.spi_ps_input_ena |= S_0286CC_POS_FIXED_PT_ENA(1);
2350 assert(G_0286CC_POS_FIXED_PT_ENA(shader->config.spi_ps_input_addr));
2354 if (shader->key.ps.part.prolog.force_persp_sample_interp &&
2355 (G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_ena) ||
2356 G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
2357 shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTER_ENA;
2358 shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
2359 shader->config.spi_ps_input_ena |= S_0286CC_PERSP_SAMPLE_ENA(1);
2361 if (shader->key.ps.part.prolog.force_linear_sample_interp &&
2362 (G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_ena) ||
2363 G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
2364 shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTER_ENA;
2365 shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
2366 shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_SAMPLE_ENA(1);
2368 if (shader->key.ps.part.prolog.force_persp_center_interp &&
2369 (G_0286CC_PERSP_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
2370 G_0286CC_PERSP_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
2371 shader->config.spi_ps_input_ena &= C_0286CC_PERSP_SAMPLE_ENA;
2372 shader->config.spi_ps_input_ena &= C_0286CC_PERSP_CENTROID_ENA;
2373 shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
2375 if (shader->key.ps.part.prolog.force_linear_center_interp &&
2376 (G_0286CC_LINEAR_SAMPLE_ENA(shader->config.spi_ps_input_ena) ||
2377 G_0286CC_LINEAR_CENTROID_ENA(shader->config.spi_ps_input_ena))) {
2378 shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_SAMPLE_ENA;
2379 shader->config.spi_ps_input_ena &= C_0286CC_LINEAR_CENTROID_ENA;
2380 shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
2384 if (G_0286CC_POS_W_FLOAT_ENA(shader->config.spi_ps_input_ena) &&
2385 !(shader->config.spi_ps_input_ena & 0xf)) {
2386 shader->config.spi_ps_input_ena |= S_0286CC_PERSP_CENTER_ENA(1);
2387 assert(G_0286CC_PERSP_CENTER_ENA(shader->config.spi_ps_input_addr));
2391 if (!(shader->config.spi_ps_input_ena & 0x7f)) {
2392 shader->config.spi_ps_input_ena |= S_0286CC_LINEAR_CENTER_ENA(1);
2393 assert(G_0286CC_LINEAR_CENTER_ENA(shader->config.spi_ps_input_addr));
2397 if (shader->key.ps.part.prolog.samplemask_log_ps_iter) {
2398 shader->config.spi_ps_input_ena |= S_0286CC_ANCILLARY_ENA(1);
2399 assert(G_0286CC_ANCILLARY_ENA(shader->config.spi_ps_input_addr));
2420 void si_fix_resource_usage(struct si_screen *sscreen, struct si_shader *shader)
2422 unsigned min_sgprs = shader->info.num_input_sgprs + 2; /* VCC */
2424 shader->config.num_sgprs = MAX2(shader->config.num_sgprs, min_sgprs);
2426 if (shader->selector->stage == MESA_SHADER_COMPUTE &&
2427 si_get_max_workgroup_size(shader) > shader->wave_size) {
2428 si_multiwave_lds_size_workaround(sscreen, &shader->config.lds_size);
2433 struct si_shader *shader, struct util_debug_callback *debug)
2435 struct si_shader_selector *sel = shader->selector;
2436 struct si_shader *mainp = *si_get_main_shader_part(sel, &shader->key);
2447 if (shader->is_monolithic) {
2448 /* Monolithic shader (compiled as a whole, has many variants,
2451 if (!si_compile_shader(sscreen, compiler, shader, debug))
2454 /* The shader consists of several parts:
2456 * - the middle part is the user shader, it has 1 variant only
2457 * and it was compiled during the creation of the shader
2465 * shaders also contain the prolog and user shader parts of
2466 * the previous shader stage.
2472 /* Copy the compiled shader data over. */
2473 shader->is_binary_shared = true;
2474 shader->binary = mainp->binary;
2475 shader->config = mainp->config;
2476 shader->info = mainp->info;
2481 if (!si_shader_select_vs_parts(sscreen, compiler, shader, debug))
2485 if (!si_shader_select_tcs_parts(sscreen, compiler, shader, debug))
2491 if (!si_shader_select_gs_parts(sscreen, compiler, shader, debug))
2494 /* Clone the GS copy shader for the shader variant.
2499 if (!shader->key.ge.as_ngg) {
2506 shader->gs_copy_shader = CALLOC_STRUCT(si_shader);
2507 memcpy(shader->gs_copy_shader, sel->main_shader_part->gs_copy_shader,
2508 sizeof(*shader->gs_copy_shader));
2510 pipe_reference(NULL, &shader->gs_copy_shader->bo->b.b.reference);
2512 shader->gs_copy_shader->shader_log = NULL;
2513 shader->gs_copy_shader->is_binary_shared = true;
2514 util_queue_fence_init(&shader->gs_copy_shader->ready);
2518 if (!si_shader_select_ps_parts(sscreen, compiler, shader, debug))
2524 shader->config.num_vgprs = MAX2(shader->config.num_vgprs, shader->info.num_input_vgprs);
2529 assert(shader->wave_size == mainp->wave_size);
2530 assert(!shader->previous_stage || shader->wave_size == shader->previous_stage->wave_size);
2533 if (shader->prolog) {
2534 shader->config.num_sgprs =
2535 MAX2(shader->config.num_sgprs, shader->prolog->config.num_sgprs);
2536 shader->config.num_vgprs =
2537 MAX2(shader->config.num_vgprs, shader->prolog->config.num_vgprs);
2539 if (shader->previous_stage) {
2540 shader->config.num_sgprs =
2541 MAX2(shader->config.num_sgprs, shader->previous_stage->config.num_sgprs);
2542 shader->config.num_vgprs =
2543 MAX2(shader->config.num_vgprs, shader->previous_stage->config.num_vgprs);
2544 shader->config.spilled_sgprs =
2545 MAX2(shader->config.spilled_sgprs, shader->previous_stage->config.spilled_sgprs);
2546 shader->config.spilled_vgprs =
2547 MAX2(shader->config.spilled_vgprs, shader->previous_stage->config.spilled_vgprs);
2548 shader->info.private_mem_vgprs =
2549 MAX2(shader->info.private_mem_vgprs, shader->previous_stage->info.private_mem_vgprs);
2550 shader->config.scratch_bytes_per_wave =
2551 MAX2(shader->config.scratch_bytes_per_wave,
2552 shader->previous_stage->config.scratch_bytes_per_wave);
2553 shader->info.uses_instanceid |= shader->previous_stage->info.uses_instanceid;
2554 shader->info.uses_vmem_load_other |= shader->previous_stage->info.uses_vmem_load_other;
2555 shader->info.uses_vmem_sampler_or_bvh |= shader->previous_stage->info.uses_vmem_sampler_or_bvh;
2557 if (shader->epilog) {
2558 shader->config.num_sgprs =
2559 MAX2(shader->config.num_sgprs, shader->epilog->config.num_sgprs);
2560 shader->config.num_vgprs =
2561 MAX2(shader->config.num_vgprs, shader->epilog->config.num_vgprs);
2563 si_calculate_max_simd_waves(shader);
2566 if (sel->stage <= MESA_SHADER_GEOMETRY && shader->key.ge.as_ngg) {
2567 assert(!shader->key.ge.as_es && !shader->key.ge.as_ls);
2568 if (!gfx10_ngg_calculate_subgroup_info(shader)) {
2573 gfx9_get_gs_info(shader->previous_stage_sel, sel, &shader->gs_info);
2576 shader->uses_vs_state_provoking_vertex =
2583 shader->key.ge.mono.u.vs_export_prim_id));
2585 shader->uses_gs_state_outprim = sscreen->use_ngg &&
2588 si_shader_uses_streamout(shader);
2591 shader->uses_base_instance = sel->info.uses_base_instance ||
2592 shader->key.ge.part.vs.prolog.instance_divisor_is_one ||
2593 shader->key.ge.part.vs.prolog.instance_divisor_is_fetched;
2595 shader->uses_base_instance = shader->previous_stage_sel &&
2596 (shader->previous_stage_sel->info.uses_base_instance ||
2597 shader->key.ge.part.tcs.ls_prolog.instance_divisor_is_one ||
2598 shader->key.ge.part.tcs.ls_prolog.instance_divisor_is_fetched);
2600 shader->uses_base_instance = shader->previous_stage_sel &&
2601 (shader->previous_stage_sel->info.uses_base_instance ||
2602 shader->key.ge.part.gs.vs_prolog.instance_divisor_is_one ||
2603 shader->key.ge.part.gs.vs_prolog.instance_divisor_is_fetched);
2606 si_fix_resource_usage(sscreen, shader);
2609 bool ok = si_shader_binary_upload(sscreen, shader, 0);
2610 si_shader_dump(sscreen, shader, debug, stderr, true);
2613 fprintf(stderr, "LLVM failed to upload shader\n");
2630 void si_shader_destroy(struct si_shader *shader)
2632 if (shader->scratch_bo)
2633 si_resource_reference(&shader->scratch_bo, NULL);
2635 si_resource_reference(&shader->bo, NULL);
2637 if (!shader->is_binary_shared)
2638 si_shader_binary_clean(&shader->binary);
2640 free(shader->shader_log);