1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © Microsoft Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "d3d12_compiler.h" 25bf215546Sopenharmony_ci#include "d3d12_context.h" 26bf215546Sopenharmony_ci#include "d3d12_debug.h" 27bf215546Sopenharmony_ci#include "d3d12_screen.h" 28bf215546Sopenharmony_ci#include "d3d12_nir_passes.h" 29bf215546Sopenharmony_ci#include "nir_to_dxil.h" 30bf215546Sopenharmony_ci#include "dxil_nir.h" 31bf215546Sopenharmony_ci#include "dxil_nir_lower_int_cubemaps.h" 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_ci#include "pipe/p_state.h" 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_ci#include "nir.h" 36bf215546Sopenharmony_ci#include "nir/nir_draw_helpers.h" 37bf215546Sopenharmony_ci#include "nir/tgsi_to_nir.h" 38bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h" 39bf215546Sopenharmony_ci#include "tgsi/tgsi_from_mesa.h" 40bf215546Sopenharmony_ci#include "tgsi/tgsi_ureg.h" 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci#include "util/hash_table.h" 43bf215546Sopenharmony_ci#include "util/u_memory.h" 44bf215546Sopenharmony_ci#include "util/u_prim.h" 45bf215546Sopenharmony_ci#include "util/u_simple_shaders.h" 46bf215546Sopenharmony_ci#include "util/u_dl.h" 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci#include <dxguids/dxguids.h> 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ciextern "C" { 51bf215546Sopenharmony_ci#include "tgsi/tgsi_parse.h" 52bf215546Sopenharmony_ci#include "tgsi/tgsi_point_sprite.h" 53bf215546Sopenharmony_ci} 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_ci#ifdef _WIN32 56bf215546Sopenharmony_ci#include "dxil_validator.h" 57bf215546Sopenharmony_ci#endif 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ciconst void * 60bf215546Sopenharmony_cid3d12_get_compiler_options(struct pipe_screen *screen, 61bf215546Sopenharmony_ci enum pipe_shader_ir ir, 62bf215546Sopenharmony_ci enum pipe_shader_type shader) 63bf215546Sopenharmony_ci{ 64bf215546Sopenharmony_ci assert(ir == PIPE_SHADER_IR_NIR); 65bf215546Sopenharmony_ci return &d3d12_screen(screen)->nir_options; 66bf215546Sopenharmony_ci} 67bf215546Sopenharmony_ci 68bf215546Sopenharmony_cistatic uint32_t 69bf215546Sopenharmony_ciresource_dimension(enum glsl_sampler_dim dim) 70bf215546Sopenharmony_ci{ 71bf215546Sopenharmony_ci switch (dim) { 72bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_1D: 73bf215546Sopenharmony_ci return RESOURCE_DIMENSION_TEXTURE1D; 74bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_2D: 75bf215546Sopenharmony_ci return RESOURCE_DIMENSION_TEXTURE2D; 76bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_3D: 77bf215546Sopenharmony_ci return RESOURCE_DIMENSION_TEXTURE3D; 78bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_CUBE: 79bf215546Sopenharmony_ci return RESOURCE_DIMENSION_TEXTURECUBE; 80bf215546Sopenharmony_ci default: 81bf215546Sopenharmony_ci return RESOURCE_DIMENSION_UNKNOWN; 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci} 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_cistatic bool 86bf215546Sopenharmony_cican_remove_dead_sampler(nir_variable *var, void *data) 87bf215546Sopenharmony_ci{ 88bf215546Sopenharmony_ci const struct glsl_type *base_type = glsl_without_array(var->type); 89bf215546Sopenharmony_ci return glsl_type_is_sampler(base_type) && !glsl_type_is_bare_sampler(base_type); 90bf215546Sopenharmony_ci} 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_cistatic struct d3d12_shader * 93bf215546Sopenharmony_cicompile_nir(struct d3d12_context *ctx, struct d3d12_shader_selector *sel, 94bf215546Sopenharmony_ci struct d3d12_shader_key *key, struct nir_shader *nir) 95bf215546Sopenharmony_ci{ 96bf215546Sopenharmony_ci struct d3d12_screen *screen = d3d12_screen(ctx->base.screen); 97bf215546Sopenharmony_ci struct d3d12_shader *shader = rzalloc(sel, d3d12_shader); 98bf215546Sopenharmony_ci shader->key = *key; 99bf215546Sopenharmony_ci shader->nir = nir; 100bf215546Sopenharmony_ci sel->current = shader; 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_samplers); 103bf215546Sopenharmony_ci NIR_PASS_V(nir, dxil_nir_split_typed_samplers); 104bf215546Sopenharmony_ci 105bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_opt_dce); 106bf215546Sopenharmony_ci struct nir_remove_dead_variables_options dead_var_opts = {}; 107bf215546Sopenharmony_ci dead_var_opts.can_remove_var = can_remove_dead_sampler; 108bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, &dead_var_opts); 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_ci if (key->samples_int_textures) 111bf215546Sopenharmony_ci NIR_PASS_V(nir, dxil_lower_sample_to_txf_for_integer_tex, 112bf215546Sopenharmony_ci key->tex_wrap_states, key->swizzle_state, 113bf215546Sopenharmony_ci screen->base.get_paramf(&screen->base, PIPE_CAPF_MAX_TEXTURE_LOD_BIAS)); 114bf215546Sopenharmony_ci 115bf215546Sopenharmony_ci if (key->vs.needs_format_emulation) 116bf215546Sopenharmony_ci dxil_nir_lower_vs_vertex_conversion(nir, key->vs.format_conversion); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci uint32_t num_ubos_before_lower_to_ubo = nir->info.num_ubos; 119bf215546Sopenharmony_ci uint32_t num_uniforms_before_lower_to_ubo = nir->num_uniforms; 120bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_uniforms_to_ubo, false, false); 121bf215546Sopenharmony_ci shader->has_default_ubo0 = num_uniforms_before_lower_to_ubo > 0 && 122bf215546Sopenharmony_ci nir->info.num_ubos > num_ubos_before_lower_to_ubo; 123bf215546Sopenharmony_ci 124bf215546Sopenharmony_ci if (key->last_vertex_processing_stage) { 125bf215546Sopenharmony_ci if (key->invert_depth) 126bf215546Sopenharmony_ci NIR_PASS_V(nir, d3d12_nir_invert_depth, key->invert_depth, key->halfz); 127bf215546Sopenharmony_ci if (!key->halfz) 128bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_clip_halfz); 129bf215546Sopenharmony_ci NIR_PASS_V(nir, d3d12_lower_yflip); 130bf215546Sopenharmony_ci } 131bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_packed_ubo_loads); 132bf215546Sopenharmony_ci NIR_PASS_V(nir, d3d12_lower_load_draw_params); 133bf215546Sopenharmony_ci NIR_PASS_V(nir, d3d12_lower_load_patch_vertices_in); 134bf215546Sopenharmony_ci NIR_PASS_V(nir, d3d12_lower_state_vars, shader); 135bf215546Sopenharmony_ci NIR_PASS_V(nir, dxil_nir_lower_bool_input); 136bf215546Sopenharmony_ci NIR_PASS_V(nir, dxil_nir_lower_loads_stores_to_dxil); 137bf215546Sopenharmony_ci NIR_PASS_V(nir, dxil_nir_lower_atomics_to_dxil); 138bf215546Sopenharmony_ci NIR_PASS_V(nir, dxil_nir_lower_double_math); 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci if (key->fs.multisample_disabled) 141bf215546Sopenharmony_ci NIR_PASS_V(nir, d3d12_disable_multisampling); 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci struct nir_to_dxil_options opts = {}; 144bf215546Sopenharmony_ci opts.interpolate_at_vertex = screen->have_load_at_vertex; 145bf215546Sopenharmony_ci opts.lower_int16 = !screen->opts4.Native16BitShaderOpsSupported; 146bf215546Sopenharmony_ci opts.no_ubo0 = !shader->has_default_ubo0; 147bf215546Sopenharmony_ci opts.last_ubo_is_not_arrayed = shader->num_state_vars > 0; 148bf215546Sopenharmony_ci opts.provoking_vertex = key->fs.provoking_vertex; 149bf215546Sopenharmony_ci opts.input_clip_size = key->input_clip_size; 150bf215546Sopenharmony_ci opts.environment = DXIL_ENVIRONMENT_GL; 151bf215546Sopenharmony_ci opts.shader_model_max = SHADER_MODEL_6_2; 152bf215546Sopenharmony_ci#ifdef _WIN32 153bf215546Sopenharmony_ci opts.validator_version_max = dxil_get_validator_version(ctx->dxil_validator); 154bf215546Sopenharmony_ci#endif 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci struct blob tmp; 157bf215546Sopenharmony_ci if (!nir_to_dxil(nir, &opts, &tmp)) { 158bf215546Sopenharmony_ci debug_printf("D3D12: nir_to_dxil failed\n"); 159bf215546Sopenharmony_ci return NULL; 160bf215546Sopenharmony_ci } 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci // Non-ubo variables 163bf215546Sopenharmony_ci shader->begin_srv_binding = (UINT_MAX); 164bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, nir, nir_var_uniform) { 165bf215546Sopenharmony_ci auto type_no_array = glsl_without_array(var->type); 166bf215546Sopenharmony_ci if (glsl_type_is_texture(type_no_array)) { 167bf215546Sopenharmony_ci unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; 168bf215546Sopenharmony_ci for (unsigned i = 0; i < count; ++i) { 169bf215546Sopenharmony_ci shader->srv_bindings[var->data.binding + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array)); 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci shader->begin_srv_binding = MIN2(var->data.binding, shader->begin_srv_binding); 172bf215546Sopenharmony_ci shader->end_srv_binding = MAX2(var->data.binding + count, shader->end_srv_binding); 173bf215546Sopenharmony_ci } 174bf215546Sopenharmony_ci } 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci nir_foreach_image_variable(var, nir) { 177bf215546Sopenharmony_ci auto type_no_array = glsl_without_array(var->type); 178bf215546Sopenharmony_ci unsigned count = glsl_type_is_array(var->type) ? glsl_get_aoa_size(var->type) : 1; 179bf215546Sopenharmony_ci for (unsigned i = 0; i < count; ++i) { 180bf215546Sopenharmony_ci shader->uav_bindings[var->data.driver_location + i].format = var->data.image.format; 181bf215546Sopenharmony_ci shader->uav_bindings[var->data.driver_location + i].dimension = resource_dimension(glsl_get_sampler_dim(type_no_array)); 182bf215546Sopenharmony_ci } 183bf215546Sopenharmony_ci } 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci // Ubo variables 186bf215546Sopenharmony_ci if(nir->info.num_ubos) { 187bf215546Sopenharmony_ci // Ignore state_vars ubo as it is bound as root constants 188bf215546Sopenharmony_ci unsigned num_ubo_bindings = nir->info.num_ubos - (shader->state_vars_used ? 1 : 0); 189bf215546Sopenharmony_ci for(unsigned i = shader->has_default_ubo0 ? 0 : 1; i < num_ubo_bindings; ++i) { 190bf215546Sopenharmony_ci shader->cb_bindings[shader->num_cb_bindings++].binding = i; 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci } 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci#ifdef _WIN32 195bf215546Sopenharmony_ci if (ctx->dxil_validator) { 196bf215546Sopenharmony_ci if (!(d3d12_debug & D3D12_DEBUG_EXPERIMENTAL)) { 197bf215546Sopenharmony_ci char *err; 198bf215546Sopenharmony_ci if (!dxil_validate_module(ctx->dxil_validator, tmp.data, 199bf215546Sopenharmony_ci tmp.size, &err) && err) { 200bf215546Sopenharmony_ci debug_printf( 201bf215546Sopenharmony_ci "== VALIDATION ERROR =============================================\n" 202bf215546Sopenharmony_ci "%s\n" 203bf215546Sopenharmony_ci "== END ==========================================================\n", 204bf215546Sopenharmony_ci err); 205bf215546Sopenharmony_ci ralloc_free(err); 206bf215546Sopenharmony_ci } 207bf215546Sopenharmony_ci } 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci if (d3d12_debug & D3D12_DEBUG_DISASS) { 210bf215546Sopenharmony_ci char *str = dxil_disasm_module(ctx->dxil_validator, tmp.data, 211bf215546Sopenharmony_ci tmp.size); 212bf215546Sopenharmony_ci fprintf(stderr, 213bf215546Sopenharmony_ci "== BEGIN SHADER ============================================\n" 214bf215546Sopenharmony_ci "%s\n" 215bf215546Sopenharmony_ci "== END SHADER ==============================================\n", 216bf215546Sopenharmony_ci str); 217bf215546Sopenharmony_ci ralloc_free(str); 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci } 220bf215546Sopenharmony_ci#endif 221bf215546Sopenharmony_ci 222bf215546Sopenharmony_ci blob_finish_get_buffer(&tmp, &shader->bytecode, &shader->bytecode_length); 223bf215546Sopenharmony_ci 224bf215546Sopenharmony_ci if (d3d12_debug & D3D12_DEBUG_DXIL) { 225bf215546Sopenharmony_ci char buf[256]; 226bf215546Sopenharmony_ci static int i; 227bf215546Sopenharmony_ci snprintf(buf, sizeof(buf), "dump%02d.dxil", i++); 228bf215546Sopenharmony_ci FILE *fp = fopen(buf, "wb"); 229bf215546Sopenharmony_ci fwrite(shader->bytecode, sizeof(char), shader->bytecode_length, fp); 230bf215546Sopenharmony_ci fclose(fp); 231bf215546Sopenharmony_ci fprintf(stderr, "wrote '%s'...\n", buf); 232bf215546Sopenharmony_ci } 233bf215546Sopenharmony_ci return shader; 234bf215546Sopenharmony_ci} 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_cistruct d3d12_selection_context { 237bf215546Sopenharmony_ci struct d3d12_context *ctx; 238bf215546Sopenharmony_ci bool needs_point_sprite_lowering; 239bf215546Sopenharmony_ci bool needs_vertex_reordering; 240bf215546Sopenharmony_ci unsigned provoking_vertex; 241bf215546Sopenharmony_ci bool alternate_tri; 242bf215546Sopenharmony_ci unsigned fill_mode_lowered; 243bf215546Sopenharmony_ci unsigned cull_mode_lowered; 244bf215546Sopenharmony_ci bool manual_depth_range; 245bf215546Sopenharmony_ci unsigned missing_dual_src_outputs; 246bf215546Sopenharmony_ci unsigned frag_result_color_lowering; 247bf215546Sopenharmony_ci const unsigned *variable_workgroup_size; 248bf215546Sopenharmony_ci}; 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_cistatic unsigned 251bf215546Sopenharmony_cimissing_dual_src_outputs(struct d3d12_context *ctx) 252bf215546Sopenharmony_ci{ 253bf215546Sopenharmony_ci if (!ctx->gfx_pipeline_state.blend->is_dual_src) 254bf215546Sopenharmony_ci return 0; 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; 257bf215546Sopenharmony_ci nir_shader *s = fs->initial; 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci unsigned indices_seen = 0; 260bf215546Sopenharmony_ci nir_foreach_function(function, s) { 261bf215546Sopenharmony_ci if (function->impl) { 262bf215546Sopenharmony_ci nir_foreach_block(block, function->impl) { 263bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 264bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 265bf215546Sopenharmony_ci continue; 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 268bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_store_deref) 269bf215546Sopenharmony_ci continue; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci nir_variable *var = nir_intrinsic_get_var(intr, 0); 272bf215546Sopenharmony_ci if (var->data.mode != nir_var_shader_out) 273bf215546Sopenharmony_ci continue; 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci unsigned index = var->data.index; 276bf215546Sopenharmony_ci if (var->data.location > FRAG_RESULT_DATA0) 277bf215546Sopenharmony_ci index = var->data.location - FRAG_RESULT_DATA0; 278bf215546Sopenharmony_ci else if (var->data.location != FRAG_RESULT_COLOR && 279bf215546Sopenharmony_ci var->data.location != FRAG_RESULT_DATA0) 280bf215546Sopenharmony_ci continue; 281bf215546Sopenharmony_ci 282bf215546Sopenharmony_ci indices_seen |= 1u << index; 283bf215546Sopenharmony_ci if ((indices_seen & 3) == 3) 284bf215546Sopenharmony_ci return 0; 285bf215546Sopenharmony_ci } 286bf215546Sopenharmony_ci } 287bf215546Sopenharmony_ci } 288bf215546Sopenharmony_ci } 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci return 3 & ~indices_seen; 291bf215546Sopenharmony_ci} 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_cistatic unsigned 294bf215546Sopenharmony_cifrag_result_color_lowering(struct d3d12_context *ctx) 295bf215546Sopenharmony_ci{ 296bf215546Sopenharmony_ci struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; 297bf215546Sopenharmony_ci assert(fs); 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci if (fs->initial->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR)) 300bf215546Sopenharmony_ci return ctx->fb.nr_cbufs > 1 ? ctx->fb.nr_cbufs : 0; 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_ci return 0; 303bf215546Sopenharmony_ci} 304bf215546Sopenharmony_ci 305bf215546Sopenharmony_cistatic bool 306bf215546Sopenharmony_cimanual_depth_range(struct d3d12_context *ctx) 307bf215546Sopenharmony_ci{ 308bf215546Sopenharmony_ci if (!d3d12_need_zero_one_depth_range(ctx)) 309bf215546Sopenharmony_ci return false; 310bf215546Sopenharmony_ci 311bf215546Sopenharmony_ci /** 312bf215546Sopenharmony_ci * If we can't use the D3D12 zero-one depth-range, we might have to apply 313bf215546Sopenharmony_ci * depth-range ourselves. 314bf215546Sopenharmony_ci * 315bf215546Sopenharmony_ci * Because we only need to override the depth-range to zero-one range in 316bf215546Sopenharmony_ci * the case where we write frag-depth, we only need to apply manual 317bf215546Sopenharmony_ci * depth-range to gl_FragCoord.z. 318bf215546Sopenharmony_ci * 319bf215546Sopenharmony_ci * No extra care is needed to be taken in the case where gl_FragDepth is 320bf215546Sopenharmony_ci * written conditionally, because the GLSL 4.60 spec states: 321bf215546Sopenharmony_ci * 322bf215546Sopenharmony_ci * If a shader statically assigns a value to gl_FragDepth, and there 323bf215546Sopenharmony_ci * is an execution path through the shader that does not set 324bf215546Sopenharmony_ci * gl_FragDepth, then the value of the fragment’s depth may be 325bf215546Sopenharmony_ci * undefined for executions of the shader that take that path. That 326bf215546Sopenharmony_ci * is, if the set of linked fragment shaders statically contain a 327bf215546Sopenharmony_ci * write to gl_FragDepth, then it is responsible for always writing 328bf215546Sopenharmony_ci * it. 329bf215546Sopenharmony_ci */ 330bf215546Sopenharmony_ci 331bf215546Sopenharmony_ci struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; 332bf215546Sopenharmony_ci return fs && fs->initial->info.inputs_read & VARYING_BIT_POS; 333bf215546Sopenharmony_ci} 334bf215546Sopenharmony_ci 335bf215546Sopenharmony_cistatic bool 336bf215546Sopenharmony_cineeds_edge_flag_fix(enum pipe_prim_type mode) 337bf215546Sopenharmony_ci{ 338bf215546Sopenharmony_ci return (mode == PIPE_PRIM_QUADS || 339bf215546Sopenharmony_ci mode == PIPE_PRIM_QUAD_STRIP || 340bf215546Sopenharmony_ci mode == PIPE_PRIM_POLYGON); 341bf215546Sopenharmony_ci} 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_cistatic unsigned 344bf215546Sopenharmony_cifill_mode_lowered(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo) 345bf215546Sopenharmony_ci{ 346bf215546Sopenharmony_ci struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX]; 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL && 349bf215546Sopenharmony_ci !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) || 350bf215546Sopenharmony_ci ctx->gfx_pipeline_state.rast == NULL || 351bf215546Sopenharmony_ci (dinfo->mode != PIPE_PRIM_TRIANGLES && 352bf215546Sopenharmony_ci dinfo->mode != PIPE_PRIM_TRIANGLE_STRIP)) 353bf215546Sopenharmony_ci return PIPE_POLYGON_MODE_FILL; 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci /* D3D12 supports line mode (wireframe) but doesn't support edge flags */ 356bf215546Sopenharmony_ci if (((ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_LINE && 357bf215546Sopenharmony_ci ctx->gfx_pipeline_state.rast->base.cull_face != PIPE_FACE_FRONT) || 358bf215546Sopenharmony_ci (ctx->gfx_pipeline_state.rast->base.fill_back == PIPE_POLYGON_MODE_LINE && 359bf215546Sopenharmony_ci ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_FRONT)) && 360bf215546Sopenharmony_ci (vs->initial->info.outputs_written & VARYING_BIT_EDGE || 361bf215546Sopenharmony_ci needs_edge_flag_fix(ctx->initial_api_prim))) 362bf215546Sopenharmony_ci return PIPE_POLYGON_MODE_LINE; 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_ci if (ctx->gfx_pipeline_state.rast->base.fill_front == PIPE_POLYGON_MODE_POINT) 365bf215546Sopenharmony_ci return PIPE_POLYGON_MODE_POINT; 366bf215546Sopenharmony_ci 367bf215546Sopenharmony_ci return PIPE_POLYGON_MODE_FILL; 368bf215546Sopenharmony_ci} 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_cistatic bool 371bf215546Sopenharmony_cihas_stream_out_for_streams(struct d3d12_context *ctx) 372bf215546Sopenharmony_ci{ 373bf215546Sopenharmony_ci unsigned mask = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->initial->info.gs.active_stream_mask & ~1; 374bf215546Sopenharmony_ci for (unsigned i = 0; i < ctx->gfx_pipeline_state.so_info.num_outputs; ++i) { 375bf215546Sopenharmony_ci unsigned stream = ctx->gfx_pipeline_state.so_info.output[i].stream; 376bf215546Sopenharmony_ci if (((1 << stream) & mask) && 377bf215546Sopenharmony_ci ctx->so_buffer_views[stream].SizeInBytes) 378bf215546Sopenharmony_ci return true; 379bf215546Sopenharmony_ci } 380bf215546Sopenharmony_ci return false; 381bf215546Sopenharmony_ci} 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_cistatic bool 384bf215546Sopenharmony_cineeds_point_sprite_lowering(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo) 385bf215546Sopenharmony_ci{ 386bf215546Sopenharmony_ci struct d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX]; 387bf215546Sopenharmony_ci struct d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; 388bf215546Sopenharmony_ci 389bf215546Sopenharmony_ci if (gs != NULL && !gs->is_variant) { 390bf215546Sopenharmony_ci /* There is an user GS; Check if it outputs points with PSIZE */ 391bf215546Sopenharmony_ci return (gs->initial->info.gs.output_primitive == GL_POINTS && 392bf215546Sopenharmony_ci (gs->initial->info.outputs_written & VARYING_BIT_PSIZ || 393bf215546Sopenharmony_ci ctx->gfx_pipeline_state.rast->base.point_size > 1.0) && 394bf215546Sopenharmony_ci (gs->initial->info.gs.active_stream_mask == 1 || 395bf215546Sopenharmony_ci !has_stream_out_for_streams(ctx))); 396bf215546Sopenharmony_ci } else { 397bf215546Sopenharmony_ci /* No user GS; check if we are drawing wide points */ 398bf215546Sopenharmony_ci return ((dinfo->mode == PIPE_PRIM_POINTS || 399bf215546Sopenharmony_ci fill_mode_lowered(ctx, dinfo) == PIPE_POLYGON_MODE_POINT) && 400bf215546Sopenharmony_ci (ctx->gfx_pipeline_state.rast->base.point_size > 1.0 || 401bf215546Sopenharmony_ci ctx->gfx_pipeline_state.rast->base.offset_point || 402bf215546Sopenharmony_ci (ctx->gfx_pipeline_state.rast->base.point_size_per_vertex && 403bf215546Sopenharmony_ci vs->initial->info.outputs_written & VARYING_BIT_PSIZ)) && 404bf215546Sopenharmony_ci (vs->initial->info.outputs_written & VARYING_BIT_POS)); 405bf215546Sopenharmony_ci } 406bf215546Sopenharmony_ci} 407bf215546Sopenharmony_ci 408bf215546Sopenharmony_cistatic unsigned 409bf215546Sopenharmony_cicull_mode_lowered(struct d3d12_context *ctx, unsigned fill_mode) 410bf215546Sopenharmony_ci{ 411bf215546Sopenharmony_ci if ((ctx->gfx_stages[PIPE_SHADER_GEOMETRY] != NULL && 412bf215546Sopenharmony_ci !ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant) || 413bf215546Sopenharmony_ci ctx->gfx_pipeline_state.rast == NULL || 414bf215546Sopenharmony_ci ctx->gfx_pipeline_state.rast->base.cull_face == PIPE_FACE_NONE) 415bf215546Sopenharmony_ci return PIPE_FACE_NONE; 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_ci return ctx->gfx_pipeline_state.rast->base.cull_face; 418bf215546Sopenharmony_ci} 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_cistatic unsigned 421bf215546Sopenharmony_ciget_provoking_vertex(struct d3d12_selection_context *sel_ctx, bool *alternate, const struct pipe_draw_info *dinfo) 422bf215546Sopenharmony_ci{ 423bf215546Sopenharmony_ci if (dinfo->mode == GL_PATCHES) { 424bf215546Sopenharmony_ci *alternate = false; 425bf215546Sopenharmony_ci return 0; 426bf215546Sopenharmony_ci } 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci struct d3d12_shader_selector *vs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_VERTEX]; 429bf215546Sopenharmony_ci struct d3d12_shader_selector *gs = sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; 430bf215546Sopenharmony_ci struct d3d12_shader_selector *last_vertex_stage = gs && !gs->is_variant ? gs : vs; 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci /* Make sure GL prims match Gallium prims */ 433bf215546Sopenharmony_ci STATIC_ASSERT(GL_POINTS == PIPE_PRIM_POINTS); 434bf215546Sopenharmony_ci STATIC_ASSERT(GL_LINES == PIPE_PRIM_LINES); 435bf215546Sopenharmony_ci STATIC_ASSERT(GL_LINE_STRIP == PIPE_PRIM_LINE_STRIP); 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci enum pipe_prim_type mode; 438bf215546Sopenharmony_ci switch (last_vertex_stage->stage) { 439bf215546Sopenharmony_ci case PIPE_SHADER_GEOMETRY: 440bf215546Sopenharmony_ci mode = (enum pipe_prim_type)last_vertex_stage->current->nir->info.gs.output_primitive; 441bf215546Sopenharmony_ci break; 442bf215546Sopenharmony_ci case PIPE_SHADER_VERTEX: 443bf215546Sopenharmony_ci mode = (enum pipe_prim_type)dinfo->mode; 444bf215546Sopenharmony_ci break; 445bf215546Sopenharmony_ci default: 446bf215546Sopenharmony_ci unreachable("Tesselation shaders are not supported"); 447bf215546Sopenharmony_ci } 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci bool flatshade_first = sel_ctx->ctx->gfx_pipeline_state.rast && 450bf215546Sopenharmony_ci sel_ctx->ctx->gfx_pipeline_state.rast->base.flatshade_first; 451bf215546Sopenharmony_ci *alternate = (mode == GL_TRIANGLE_STRIP || mode == GL_TRIANGLE_STRIP_ADJACENCY) && 452bf215546Sopenharmony_ci (!gs || gs->is_variant || 453bf215546Sopenharmony_ci gs->initial->info.gs.vertices_out > u_prim_vertex_count(mode)->min); 454bf215546Sopenharmony_ci return flatshade_first ? 0 : u_prim_vertex_count(mode)->min - 1; 455bf215546Sopenharmony_ci} 456bf215546Sopenharmony_ci 457bf215546Sopenharmony_cistatic bool 458bf215546Sopenharmony_cihas_flat_varyings(struct d3d12_context *ctx) 459bf215546Sopenharmony_ci{ 460bf215546Sopenharmony_ci struct d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci if (!fs || !fs->current) 463bf215546Sopenharmony_ci return false; 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci nir_foreach_variable_with_modes(input, fs->current->nir, 466bf215546Sopenharmony_ci nir_var_shader_in) { 467bf215546Sopenharmony_ci if (input->data.interpolation == INTERP_MODE_FLAT && 468bf215546Sopenharmony_ci /* Disregard sysvals */ 469bf215546Sopenharmony_ci (input->data.location >= VARYING_SLOT_VAR0 || 470bf215546Sopenharmony_ci input->data.location <= VARYING_SLOT_TEX7)) 471bf215546Sopenharmony_ci return true; 472bf215546Sopenharmony_ci } 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci return false; 475bf215546Sopenharmony_ci} 476bf215546Sopenharmony_ci 477bf215546Sopenharmony_cistatic bool 478bf215546Sopenharmony_cineeds_vertex_reordering(struct d3d12_selection_context *sel_ctx, const struct pipe_draw_info *dinfo) 479bf215546Sopenharmony_ci{ 480bf215546Sopenharmony_ci struct d3d12_context *ctx = sel_ctx->ctx; 481bf215546Sopenharmony_ci bool flat = has_flat_varyings(ctx); 482bf215546Sopenharmony_ci bool xfb = ctx->gfx_pipeline_state.num_so_targets > 0; 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci if (fill_mode_lowered(ctx, dinfo) != PIPE_POLYGON_MODE_FILL) 485bf215546Sopenharmony_ci return false; 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci /* TODO add support for line primitives */ 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci /* When flat shading a triangle and provoking vertex is not the first one, we use load_at_vertex. 490bf215546Sopenharmony_ci If not available for this adapter, or if it's a triangle strip, we need to reorder the vertices */ 491bf215546Sopenharmony_ci if (flat && sel_ctx->provoking_vertex >= 2 && (!d3d12_screen(ctx->base.screen)->have_load_at_vertex || 492bf215546Sopenharmony_ci sel_ctx->alternate_tri)) 493bf215546Sopenharmony_ci return true; 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci /* When transform feedback is enabled and the output is alternating (triangle strip or triangle 496bf215546Sopenharmony_ci strip with adjacency), we need to reorder vertices to get the order expected by OpenGL. This 497bf215546Sopenharmony_ci only works when there is no flat shading involved. In that scenario, we don't care about 498bf215546Sopenharmony_ci the provoking vertex. */ 499bf215546Sopenharmony_ci if (xfb && !flat && sel_ctx->alternate_tri) { 500bf215546Sopenharmony_ci sel_ctx->provoking_vertex = 0; 501bf215546Sopenharmony_ci return true; 502bf215546Sopenharmony_ci } 503bf215546Sopenharmony_ci 504bf215546Sopenharmony_ci return false; 505bf215546Sopenharmony_ci} 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_cistatic nir_variable * 508bf215546Sopenharmony_cicreate_varying_from_info(nir_shader *nir, struct d3d12_varying_info *info, 509bf215546Sopenharmony_ci unsigned slot, unsigned slot_frac, nir_variable_mode mode, bool patch) 510bf215546Sopenharmony_ci{ 511bf215546Sopenharmony_ci nir_variable *var; 512bf215546Sopenharmony_ci char tmp[100]; 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_ci snprintf(tmp, ARRAY_SIZE(tmp), 515bf215546Sopenharmony_ci mode == nir_var_shader_in ? "in_%d" : "out_%d", 516bf215546Sopenharmony_ci info->slots[slot].vars[slot_frac].driver_location); 517bf215546Sopenharmony_ci var = nir_variable_create(nir, mode, info->slots[slot].types[slot_frac], tmp); 518bf215546Sopenharmony_ci var->data.location = slot; 519bf215546Sopenharmony_ci var->data.location_frac = slot_frac; 520bf215546Sopenharmony_ci var->data.driver_location = info->slots[slot].vars[slot_frac].driver_location; 521bf215546Sopenharmony_ci var->data.interpolation = info->slots[slot].vars[slot_frac].interpolation; 522bf215546Sopenharmony_ci var->data.patch = info->slots[slot].patch; 523bf215546Sopenharmony_ci var->data.compact = info->slots[slot].vars[slot_frac].compact; 524bf215546Sopenharmony_ci if (patch) 525bf215546Sopenharmony_ci var->data.location += VARYING_SLOT_PATCH0; 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci if (mode == nir_var_shader_out) 528bf215546Sopenharmony_ci NIR_PASS_V(nir, d3d12_write_0_to_new_varying, var); 529bf215546Sopenharmony_ci 530bf215546Sopenharmony_ci return var; 531bf215546Sopenharmony_ci} 532bf215546Sopenharmony_ci 533bf215546Sopenharmony_civoid 534bf215546Sopenharmony_cicreate_varyings_from_info(nir_shader *nir, struct d3d12_varying_info *info, 535bf215546Sopenharmony_ci unsigned slot, nir_variable_mode mode, bool patch) 536bf215546Sopenharmony_ci{ 537bf215546Sopenharmony_ci unsigned mask = info->slots[slot].location_frac_mask; 538bf215546Sopenharmony_ci while (mask) 539bf215546Sopenharmony_ci create_varying_from_info(nir, info, slot, u_bit_scan(&mask), mode, patch); 540bf215546Sopenharmony_ci} 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_cistatic void 543bf215546Sopenharmony_cifill_varyings(struct d3d12_varying_info *info, nir_shader *s, 544bf215546Sopenharmony_ci nir_variable_mode modes, uint64_t mask, bool patch) 545bf215546Sopenharmony_ci{ 546bf215546Sopenharmony_ci nir_foreach_variable_with_modes(var, s, modes) { 547bf215546Sopenharmony_ci unsigned slot = var->data.location; 548bf215546Sopenharmony_ci bool is_generic_patch = slot >= VARYING_SLOT_PATCH0; 549bf215546Sopenharmony_ci if (patch ^ is_generic_patch) 550bf215546Sopenharmony_ci continue; 551bf215546Sopenharmony_ci if (is_generic_patch) 552bf215546Sopenharmony_ci slot -= VARYING_SLOT_PATCH0; 553bf215546Sopenharmony_ci uint64_t slot_bit = BITFIELD64_BIT(slot); 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci if (!(mask & slot_bit)) 556bf215546Sopenharmony_ci continue; 557bf215546Sopenharmony_ci 558bf215546Sopenharmony_ci const struct glsl_type *type = var->type; 559bf215546Sopenharmony_ci if ((s->info.stage == MESA_SHADER_GEOMETRY || 560bf215546Sopenharmony_ci s->info.stage == MESA_SHADER_TESS_CTRL) && 561bf215546Sopenharmony_ci (modes & nir_var_shader_in) && 562bf215546Sopenharmony_ci glsl_type_is_array(type)) 563bf215546Sopenharmony_ci type = glsl_get_array_element(type); 564bf215546Sopenharmony_ci info->slots[slot].types[var->data.location_frac] = type; 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci info->slots[slot].patch = var->data.patch; 567bf215546Sopenharmony_ci auto& var_slot = info->slots[slot].vars[var->data.location_frac]; 568bf215546Sopenharmony_ci var_slot.driver_location = var->data.driver_location; 569bf215546Sopenharmony_ci var_slot.interpolation = var->data.interpolation; 570bf215546Sopenharmony_ci var_slot.compact = var->data.compact; 571bf215546Sopenharmony_ci info->mask |= slot_bit; 572bf215546Sopenharmony_ci info->slots[slot].location_frac_mask |= (1 << var->data.location_frac); 573bf215546Sopenharmony_ci } 574bf215546Sopenharmony_ci} 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_cistatic void 577bf215546Sopenharmony_cifill_flat_varyings(struct d3d12_gs_variant_key *key, d3d12_shader_selector *fs) 578bf215546Sopenharmony_ci{ 579bf215546Sopenharmony_ci if (!fs || !fs->current) 580bf215546Sopenharmony_ci return; 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci nir_foreach_variable_with_modes(input, fs->current->nir, 583bf215546Sopenharmony_ci nir_var_shader_in) { 584bf215546Sopenharmony_ci if (input->data.interpolation == INTERP_MODE_FLAT) 585bf215546Sopenharmony_ci key->flat_varyings |= BITFIELD64_BIT(input->data.location); 586bf215546Sopenharmony_ci } 587bf215546Sopenharmony_ci} 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_cistatic void 590bf215546Sopenharmony_civalidate_geometry_shader_variant(struct d3d12_selection_context *sel_ctx) 591bf215546Sopenharmony_ci{ 592bf215546Sopenharmony_ci struct d3d12_context *ctx = sel_ctx->ctx; 593bf215546Sopenharmony_ci d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX]; 594bf215546Sopenharmony_ci d3d12_shader_selector *fs = ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; 595bf215546Sopenharmony_ci struct d3d12_gs_variant_key key = {0}; 596bf215546Sopenharmony_ci bool variant_needed = false; 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci d3d12_shader_selector *gs = ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci /* Nothing to do if there is a user geometry shader bound */ 601bf215546Sopenharmony_ci if (gs != NULL && !gs->is_variant) 602bf215546Sopenharmony_ci return; 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci /* Fill the geometry shader variant key */ 605bf215546Sopenharmony_ci if (sel_ctx->fill_mode_lowered != PIPE_POLYGON_MODE_FILL) { 606bf215546Sopenharmony_ci key.fill_mode = sel_ctx->fill_mode_lowered; 607bf215546Sopenharmony_ci key.cull_mode = sel_ctx->cull_mode_lowered; 608bf215546Sopenharmony_ci key.has_front_face = BITSET_TEST(fs->initial->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); 609bf215546Sopenharmony_ci if (key.cull_mode != PIPE_FACE_NONE || key.has_front_face) 610bf215546Sopenharmony_ci key.front_ccw = ctx->gfx_pipeline_state.rast->base.front_ccw ^ (ctx->flip_y < 0); 611bf215546Sopenharmony_ci key.edge_flag_fix = needs_edge_flag_fix(ctx->initial_api_prim); 612bf215546Sopenharmony_ci fill_flat_varyings(&key, fs); 613bf215546Sopenharmony_ci if (key.flat_varyings != 0) 614bf215546Sopenharmony_ci key.flatshade_first = ctx->gfx_pipeline_state.rast->base.flatshade_first; 615bf215546Sopenharmony_ci variant_needed = true; 616bf215546Sopenharmony_ci } else if (sel_ctx->needs_point_sprite_lowering) { 617bf215546Sopenharmony_ci key.passthrough = true; 618bf215546Sopenharmony_ci variant_needed = true; 619bf215546Sopenharmony_ci } else if (sel_ctx->needs_vertex_reordering) { 620bf215546Sopenharmony_ci /* TODO support cases where flat shading (pv != 0) and xfb are enabled */ 621bf215546Sopenharmony_ci key.provoking_vertex = sel_ctx->provoking_vertex; 622bf215546Sopenharmony_ci key.alternate_tri = sel_ctx->alternate_tri; 623bf215546Sopenharmony_ci variant_needed = true; 624bf215546Sopenharmony_ci } 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci if (variant_needed) { 627bf215546Sopenharmony_ci fill_varyings(&key.varyings, vs->initial, nir_var_shader_out, 628bf215546Sopenharmony_ci vs->initial->info.outputs_written, false); 629bf215546Sopenharmony_ci } 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci /* Check if the currently bound geometry shader variant is correct */ 632bf215546Sopenharmony_ci if (gs && memcmp(&gs->gs_key, &key, sizeof(key)) == 0) 633bf215546Sopenharmony_ci return; 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci /* Find/create the proper variant and bind it */ 636bf215546Sopenharmony_ci gs = variant_needed ? d3d12_get_gs_variant(ctx, &key) : NULL; 637bf215546Sopenharmony_ci ctx->gfx_stages[PIPE_SHADER_GEOMETRY] = gs; 638bf215546Sopenharmony_ci} 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_cistatic void 641bf215546Sopenharmony_civalidate_tess_ctrl_shader_variant(struct d3d12_selection_context *sel_ctx) 642bf215546Sopenharmony_ci{ 643bf215546Sopenharmony_ci struct d3d12_context *ctx = sel_ctx->ctx; 644bf215546Sopenharmony_ci d3d12_shader_selector *vs = ctx->gfx_stages[PIPE_SHADER_VERTEX]; 645bf215546Sopenharmony_ci d3d12_shader_selector *tcs = ctx->gfx_stages[PIPE_SHADER_TESS_CTRL]; 646bf215546Sopenharmony_ci d3d12_shader_selector *tes = ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]; 647bf215546Sopenharmony_ci struct d3d12_tcs_variant_key key = {0}; 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci /* Nothing to do if there is a user tess ctrl shader bound */ 650bf215546Sopenharmony_ci if (tcs != NULL && !tcs->is_variant) 651bf215546Sopenharmony_ci return; 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci bool variant_needed = tes != nullptr; 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_ci /* Fill the variant key */ 656bf215546Sopenharmony_ci if (variant_needed) { 657bf215546Sopenharmony_ci fill_varyings(&key.varyings, vs->initial, nir_var_shader_out, 658bf215546Sopenharmony_ci vs->initial->info.outputs_written, false); 659bf215546Sopenharmony_ci key.vertices_out = ctx->patch_vertices; 660bf215546Sopenharmony_ci } 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci /* Check if the currently bound tessellation control shader variant is correct */ 663bf215546Sopenharmony_ci if (tcs && memcmp(&tcs->tcs_key, &key, sizeof(key)) == 0) 664bf215546Sopenharmony_ci return; 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci /* Find/create the proper variant and bind it */ 667bf215546Sopenharmony_ci tcs = variant_needed ? d3d12_get_tcs_variant(ctx, &key) : NULL; 668bf215546Sopenharmony_ci ctx->gfx_stages[PIPE_SHADER_TESS_CTRL] = tcs; 669bf215546Sopenharmony_ci} 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_cistatic bool 672bf215546Sopenharmony_cid3d12_compare_varying_info(const d3d12_varying_info *expect, const d3d12_varying_info *have) 673bf215546Sopenharmony_ci{ 674bf215546Sopenharmony_ci if (expect->mask != have->mask) 675bf215546Sopenharmony_ci return false; 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci if (!expect->mask) 678bf215546Sopenharmony_ci return true; 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci /* 6 is a rough (wild) guess for a bulk memcmp cross-over point. When there 681bf215546Sopenharmony_ci * are a small number of slots present, individual memcmp is much faster. */ 682bf215546Sopenharmony_ci if (util_bitcount64(expect->mask) < 6) { 683bf215546Sopenharmony_ci uint64_t mask = expect->mask; 684bf215546Sopenharmony_ci while (mask) { 685bf215546Sopenharmony_ci int slot = u_bit_scan64(&mask); 686bf215546Sopenharmony_ci if (memcmp(&expect->slots[slot], &have->slots[slot], sizeof(have->slots[slot]))) 687bf215546Sopenharmony_ci return false; 688bf215546Sopenharmony_ci } 689bf215546Sopenharmony_ci 690bf215546Sopenharmony_ci return true; 691bf215546Sopenharmony_ci } 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci return !memcmp(expect, have, sizeof(struct d3d12_varying_info)); 694bf215546Sopenharmony_ci} 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_cistatic bool 697bf215546Sopenharmony_cid3d12_compare_shader_keys(const d3d12_shader_key *expect, const d3d12_shader_key *have) 698bf215546Sopenharmony_ci{ 699bf215546Sopenharmony_ci assert(expect->stage == have->stage); 700bf215546Sopenharmony_ci assert(expect); 701bf215546Sopenharmony_ci assert(have); 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci if (expect->hash != have->hash) 704bf215546Sopenharmony_ci return false; 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_ci /* Because we only add varyings we check that a shader has at least the expected in- 707bf215546Sopenharmony_ci * and outputs. */ 708bf215546Sopenharmony_ci 709bf215546Sopenharmony_ci if (!d3d12_compare_varying_info(&expect->required_varying_inputs, 710bf215546Sopenharmony_ci &have->required_varying_inputs) || 711bf215546Sopenharmony_ci expect->next_varying_inputs != have->next_varying_inputs) 712bf215546Sopenharmony_ci return false; 713bf215546Sopenharmony_ci 714bf215546Sopenharmony_ci if (!d3d12_compare_varying_info(&expect->required_varying_outputs, 715bf215546Sopenharmony_ci &have->required_varying_outputs) || 716bf215546Sopenharmony_ci expect->prev_varying_outputs != have->prev_varying_outputs) 717bf215546Sopenharmony_ci return false; 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci if (expect->stage == PIPE_SHADER_GEOMETRY) { 720bf215546Sopenharmony_ci if (expect->gs.writes_psize) { 721bf215546Sopenharmony_ci if (!have->gs.writes_psize || 722bf215546Sopenharmony_ci expect->gs.point_pos_stream_out != have->gs.point_pos_stream_out || 723bf215546Sopenharmony_ci expect->gs.sprite_coord_enable != have->gs.sprite_coord_enable || 724bf215546Sopenharmony_ci expect->gs.sprite_origin_upper_left != have->gs.sprite_origin_upper_left || 725bf215546Sopenharmony_ci expect->gs.point_size_per_vertex != have->gs.point_size_per_vertex) 726bf215546Sopenharmony_ci return false; 727bf215546Sopenharmony_ci } else if (have->gs.writes_psize) { 728bf215546Sopenharmony_ci return false; 729bf215546Sopenharmony_ci } 730bf215546Sopenharmony_ci if (expect->gs.primitive_id != have->gs.primitive_id || 731bf215546Sopenharmony_ci expect->gs.triangle_strip != have->gs.triangle_strip) 732bf215546Sopenharmony_ci return false; 733bf215546Sopenharmony_ci } else if (expect->stage == PIPE_SHADER_FRAGMENT) { 734bf215546Sopenharmony_ci if (expect->fs.frag_result_color_lowering != have->fs.frag_result_color_lowering || 735bf215546Sopenharmony_ci expect->fs.manual_depth_range != have->fs.manual_depth_range || 736bf215546Sopenharmony_ci expect->fs.polygon_stipple != have->fs.polygon_stipple || 737bf215546Sopenharmony_ci expect->fs.cast_to_uint != have->fs.cast_to_uint || 738bf215546Sopenharmony_ci expect->fs.cast_to_int != have->fs.cast_to_int || 739bf215546Sopenharmony_ci expect->fs.remap_front_facing != have->fs.remap_front_facing || 740bf215546Sopenharmony_ci expect->fs.missing_dual_src_outputs != have->fs.missing_dual_src_outputs || 741bf215546Sopenharmony_ci expect->fs.multisample_disabled != have->fs.multisample_disabled) 742bf215546Sopenharmony_ci return false; 743bf215546Sopenharmony_ci } else if (expect->stage == PIPE_SHADER_COMPUTE) { 744bf215546Sopenharmony_ci if (memcmp(expect->cs.workgroup_size, have->cs.workgroup_size, 745bf215546Sopenharmony_ci sizeof(have->cs.workgroup_size))) 746bf215546Sopenharmony_ci return false; 747bf215546Sopenharmony_ci } else if (expect->stage == PIPE_SHADER_TESS_CTRL) { 748bf215546Sopenharmony_ci if (expect->hs.primitive_mode != have->hs.primitive_mode || 749bf215546Sopenharmony_ci expect->hs.ccw != have->hs.ccw || 750bf215546Sopenharmony_ci expect->hs.point_mode != have->hs.point_mode || 751bf215546Sopenharmony_ci expect->hs.spacing != have->hs.spacing || 752bf215546Sopenharmony_ci expect->hs.patch_vertices_in != have->hs.patch_vertices_in || 753bf215546Sopenharmony_ci memcmp(&expect->hs.required_patch_outputs, &have->hs.required_patch_outputs, 754bf215546Sopenharmony_ci sizeof(struct d3d12_varying_info)) || 755bf215546Sopenharmony_ci expect->hs.next_patch_inputs != have->hs.next_patch_inputs) 756bf215546Sopenharmony_ci return false; 757bf215546Sopenharmony_ci } else if (expect->stage == PIPE_SHADER_TESS_EVAL) { 758bf215546Sopenharmony_ci if (expect->ds.tcs_vertices_out != have->ds.tcs_vertices_out || 759bf215546Sopenharmony_ci memcmp(&expect->ds.required_patch_inputs, &have->ds.required_patch_inputs, 760bf215546Sopenharmony_ci sizeof(struct d3d12_varying_info)) || 761bf215546Sopenharmony_ci expect->ds.prev_patch_outputs != have ->ds.prev_patch_outputs) 762bf215546Sopenharmony_ci return false; 763bf215546Sopenharmony_ci } 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci if (expect->input_clip_size != have->input_clip_size) 766bf215546Sopenharmony_ci return false; 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci if (expect->tex_saturate_s != have->tex_saturate_s || 769bf215546Sopenharmony_ci expect->tex_saturate_r != have->tex_saturate_r || 770bf215546Sopenharmony_ci expect->tex_saturate_t != have->tex_saturate_t) 771bf215546Sopenharmony_ci return false; 772bf215546Sopenharmony_ci 773bf215546Sopenharmony_ci if (expect->samples_int_textures != have->samples_int_textures) 774bf215546Sopenharmony_ci return false; 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci if (expect->n_texture_states != have->n_texture_states) 777bf215546Sopenharmony_ci return false; 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci if (expect->n_images != have->n_images) 780bf215546Sopenharmony_ci return false; 781bf215546Sopenharmony_ci 782bf215546Sopenharmony_ci if (memcmp(expect->tex_wrap_states, have->tex_wrap_states, 783bf215546Sopenharmony_ci expect->n_texture_states * sizeof(dxil_wrap_sampler_state))) 784bf215546Sopenharmony_ci return false; 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci if (memcmp(expect->swizzle_state, have->swizzle_state, 787bf215546Sopenharmony_ci expect->n_texture_states * sizeof(dxil_texture_swizzle_state))) 788bf215546Sopenharmony_ci return false; 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_ci if (memcmp(expect->sampler_compare_funcs, have->sampler_compare_funcs, 791bf215546Sopenharmony_ci expect->n_texture_states * sizeof(enum compare_func))) 792bf215546Sopenharmony_ci return false; 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci if (memcmp(expect->image_format_conversion, have->image_format_conversion, 795bf215546Sopenharmony_ci expect->n_images * sizeof(struct d3d12_image_format_conversion_info))) 796bf215546Sopenharmony_ci return false; 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci if (expect->invert_depth != have->invert_depth || 799bf215546Sopenharmony_ci expect->halfz != have->halfz) 800bf215546Sopenharmony_ci return false; 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci if (expect->stage == PIPE_SHADER_VERTEX) { 803bf215546Sopenharmony_ci if (expect->vs.needs_format_emulation != have->vs.needs_format_emulation) 804bf215546Sopenharmony_ci return false; 805bf215546Sopenharmony_ci 806bf215546Sopenharmony_ci if (expect->vs.needs_format_emulation) { 807bf215546Sopenharmony_ci if (memcmp(expect->vs.format_conversion, have->vs.format_conversion, 808bf215546Sopenharmony_ci PIPE_MAX_ATTRIBS * sizeof (enum pipe_format))) 809bf215546Sopenharmony_ci return false; 810bf215546Sopenharmony_ci } 811bf215546Sopenharmony_ci } 812bf215546Sopenharmony_ci 813bf215546Sopenharmony_ci if (expect->fs.provoking_vertex != have->fs.provoking_vertex) 814bf215546Sopenharmony_ci return false; 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci return true; 817bf215546Sopenharmony_ci} 818bf215546Sopenharmony_ci 819bf215546Sopenharmony_cistatic uint32_t 820bf215546Sopenharmony_cid3d12_shader_key_hash(const d3d12_shader_key *key) 821bf215546Sopenharmony_ci{ 822bf215546Sopenharmony_ci uint32_t hash; 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci hash = (uint32_t)key->stage; 825bf215546Sopenharmony_ci hash += key->required_varying_inputs.mask; 826bf215546Sopenharmony_ci hash += key->required_varying_outputs.mask; 827bf215546Sopenharmony_ci hash += key->next_varying_inputs; 828bf215546Sopenharmony_ci hash += key->prev_varying_outputs; 829bf215546Sopenharmony_ci switch (key->stage) { 830bf215546Sopenharmony_ci case PIPE_SHADER_VERTEX: 831bf215546Sopenharmony_ci /* (Probably) not worth the bit extraction for needs_format_emulation and 832bf215546Sopenharmony_ci * the rest of the the format_conversion data is large. Don't bother 833bf215546Sopenharmony_ci * hashing for now until this is shown to be worthwhile. */ 834bf215546Sopenharmony_ci break; 835bf215546Sopenharmony_ci case PIPE_SHADER_GEOMETRY: 836bf215546Sopenharmony_ci hash = _mesa_hash_data_with_seed(&key->gs, sizeof(key->gs), hash); 837bf215546Sopenharmony_ci break; 838bf215546Sopenharmony_ci case PIPE_SHADER_FRAGMENT: 839bf215546Sopenharmony_ci hash = _mesa_hash_data_with_seed(&key->fs, sizeof(key->fs), hash); 840bf215546Sopenharmony_ci break; 841bf215546Sopenharmony_ci case PIPE_SHADER_COMPUTE: 842bf215546Sopenharmony_ci hash = _mesa_hash_data_with_seed(&key->cs, sizeof(key->cs), hash); 843bf215546Sopenharmony_ci break; 844bf215546Sopenharmony_ci case PIPE_SHADER_TESS_CTRL: 845bf215546Sopenharmony_ci hash += key->hs.next_patch_inputs; 846bf215546Sopenharmony_ci break; 847bf215546Sopenharmony_ci case PIPE_SHADER_TESS_EVAL: 848bf215546Sopenharmony_ci hash += key->ds.tcs_vertices_out; 849bf215546Sopenharmony_ci hash += key->ds.prev_patch_outputs; 850bf215546Sopenharmony_ci break; 851bf215546Sopenharmony_ci default: 852bf215546Sopenharmony_ci /* No type specific information to hash for other stages. */ 853bf215546Sopenharmony_ci break; 854bf215546Sopenharmony_ci } 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci hash += key->n_texture_states; 857bf215546Sopenharmony_ci hash += key->n_images; 858bf215546Sopenharmony_ci return hash; 859bf215546Sopenharmony_ci} 860bf215546Sopenharmony_ci 861bf215546Sopenharmony_cistatic void 862bf215546Sopenharmony_cid3d12_fill_shader_key(struct d3d12_selection_context *sel_ctx, 863bf215546Sopenharmony_ci d3d12_shader_key *key, d3d12_shader_selector *sel, 864bf215546Sopenharmony_ci d3d12_shader_selector *prev, d3d12_shader_selector *next) 865bf215546Sopenharmony_ci{ 866bf215546Sopenharmony_ci pipe_shader_type stage = sel->stage; 867bf215546Sopenharmony_ci 868bf215546Sopenharmony_ci uint64_t system_generated_in_values = 869bf215546Sopenharmony_ci VARYING_BIT_PNTC | 870bf215546Sopenharmony_ci VARYING_BIT_PRIMITIVE_ID; 871bf215546Sopenharmony_ci 872bf215546Sopenharmony_ci uint64_t system_out_values = 873bf215546Sopenharmony_ci VARYING_BIT_CLIP_DIST0 | 874bf215546Sopenharmony_ci VARYING_BIT_CLIP_DIST1; 875bf215546Sopenharmony_ci 876bf215546Sopenharmony_ci memset(key, 0, sizeof(d3d12_shader_key)); 877bf215546Sopenharmony_ci key->stage = stage; 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci if (prev) { 880bf215546Sopenharmony_ci /* We require as inputs what the previous stage has written, 881bf215546Sopenharmony_ci * except certain system values */ 882bf215546Sopenharmony_ci if (stage == PIPE_SHADER_FRAGMENT || stage == PIPE_SHADER_GEOMETRY) 883bf215546Sopenharmony_ci system_out_values |= VARYING_BIT_POS; 884bf215546Sopenharmony_ci if (stage == PIPE_SHADER_FRAGMENT) 885bf215546Sopenharmony_ci system_out_values |= VARYING_BIT_PSIZ | VARYING_BIT_VIEWPORT | VARYING_BIT_LAYER; 886bf215546Sopenharmony_ci uint64_t mask = prev->current->nir->info.outputs_written & ~system_out_values; 887bf215546Sopenharmony_ci fill_varyings(&key->required_varying_inputs, prev->current->nir, 888bf215546Sopenharmony_ci nir_var_shader_out, mask, false); 889bf215546Sopenharmony_ci key->prev_varying_outputs = prev->current->nir->info.outputs_written; 890bf215546Sopenharmony_ci 891bf215546Sopenharmony_ci if (stage == PIPE_SHADER_TESS_EVAL) { 892bf215546Sopenharmony_ci uint32_t patch_mask = prev->current->nir->info.patch_outputs_written; 893bf215546Sopenharmony_ci fill_varyings(&key->ds.required_patch_inputs, prev->current->nir, 894bf215546Sopenharmony_ci nir_var_shader_out, patch_mask, true); 895bf215546Sopenharmony_ci key->ds.prev_patch_outputs = patch_mask; 896bf215546Sopenharmony_ci } 897bf215546Sopenharmony_ci 898bf215546Sopenharmony_ci /* Set the provoking vertex based on the previous shader output. Only set the 899bf215546Sopenharmony_ci * key value if the driver actually supports changing the provoking vertex though */ 900bf215546Sopenharmony_ci if (stage == PIPE_SHADER_FRAGMENT && sel_ctx->ctx->gfx_pipeline_state.rast && 901bf215546Sopenharmony_ci !sel_ctx->needs_vertex_reordering && 902bf215546Sopenharmony_ci d3d12_screen(sel_ctx->ctx->base.screen)->have_load_at_vertex) 903bf215546Sopenharmony_ci key->fs.provoking_vertex = sel_ctx->provoking_vertex; 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_ci /* Get the input clip distance size. The info's clip_distance_array_size corresponds 906bf215546Sopenharmony_ci * to the output, and in cases of TES or GS you could have differently-sized inputs 907bf215546Sopenharmony_ci * and outputs. For FS, there is no output, so it's repurposed to mean input. 908bf215546Sopenharmony_ci */ 909bf215546Sopenharmony_ci if (stage != PIPE_SHADER_FRAGMENT) 910bf215546Sopenharmony_ci key->input_clip_size = prev->current->nir->info.clip_distance_array_size; 911bf215546Sopenharmony_ci } 912bf215546Sopenharmony_ci 913bf215546Sopenharmony_ci /* We require as outputs what the next stage reads, 914bf215546Sopenharmony_ci * except certain system values */ 915bf215546Sopenharmony_ci if (next) { 916bf215546Sopenharmony_ci if (!next->is_variant) { 917bf215546Sopenharmony_ci if (stage == PIPE_SHADER_VERTEX) 918bf215546Sopenharmony_ci system_generated_in_values |= VARYING_BIT_POS; 919bf215546Sopenharmony_ci uint64_t mask = next->current->nir->info.inputs_read & ~system_generated_in_values; 920bf215546Sopenharmony_ci fill_varyings(&key->required_varying_outputs, next->current->nir, 921bf215546Sopenharmony_ci nir_var_shader_in, mask, false); 922bf215546Sopenharmony_ci 923bf215546Sopenharmony_ci if (stage == PIPE_SHADER_TESS_CTRL) { 924bf215546Sopenharmony_ci uint32_t patch_mask = next->current->nir->info.patch_outputs_read; 925bf215546Sopenharmony_ci fill_varyings(&key->hs.required_patch_outputs, prev->current->nir, 926bf215546Sopenharmony_ci nir_var_shader_in, patch_mask, true); 927bf215546Sopenharmony_ci key->hs.next_patch_inputs = patch_mask; 928bf215546Sopenharmony_ci } 929bf215546Sopenharmony_ci } 930bf215546Sopenharmony_ci key->next_varying_inputs = next->current->nir->info.inputs_read; 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_ci } 933bf215546Sopenharmony_ci 934bf215546Sopenharmony_ci if (stage == PIPE_SHADER_GEOMETRY || 935bf215546Sopenharmony_ci ((stage == PIPE_SHADER_VERTEX || stage == PIPE_SHADER_TESS_EVAL) && 936bf215546Sopenharmony_ci (!next || next->stage == PIPE_SHADER_FRAGMENT))) { 937bf215546Sopenharmony_ci key->last_vertex_processing_stage = 1; 938bf215546Sopenharmony_ci key->invert_depth = sel_ctx->ctx->reverse_depth_range; 939bf215546Sopenharmony_ci key->halfz = sel_ctx->ctx->gfx_pipeline_state.rast ? 940bf215546Sopenharmony_ci sel_ctx->ctx->gfx_pipeline_state.rast->base.clip_halfz : false; 941bf215546Sopenharmony_ci if (sel_ctx->ctx->pstipple.enabled && 942bf215546Sopenharmony_ci sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable) 943bf215546Sopenharmony_ci key->next_varying_inputs |= VARYING_BIT_POS; 944bf215546Sopenharmony_ci } 945bf215546Sopenharmony_ci 946bf215546Sopenharmony_ci if (stage == PIPE_SHADER_GEOMETRY && sel_ctx->ctx->gfx_pipeline_state.rast) { 947bf215546Sopenharmony_ci struct pipe_rasterizer_state *rast = &sel_ctx->ctx->gfx_pipeline_state.rast->base; 948bf215546Sopenharmony_ci if (sel_ctx->needs_point_sprite_lowering) { 949bf215546Sopenharmony_ci key->gs.writes_psize = 1; 950bf215546Sopenharmony_ci key->gs.point_size_per_vertex = rast->point_size_per_vertex; 951bf215546Sopenharmony_ci key->gs.sprite_coord_enable = rast->sprite_coord_enable; 952bf215546Sopenharmony_ci key->gs.sprite_origin_upper_left = (rast->sprite_coord_mode != PIPE_SPRITE_COORD_LOWER_LEFT); 953bf215546Sopenharmony_ci if (sel_ctx->ctx->flip_y < 0) 954bf215546Sopenharmony_ci key->gs.sprite_origin_upper_left = !key->gs.sprite_origin_upper_left; 955bf215546Sopenharmony_ci key->gs.aa_point = rast->point_smooth; 956bf215546Sopenharmony_ci key->gs.stream_output_factor = 6; 957bf215546Sopenharmony_ci } else if (sel_ctx->fill_mode_lowered == PIPE_POLYGON_MODE_LINE) { 958bf215546Sopenharmony_ci key->gs.stream_output_factor = 2; 959bf215546Sopenharmony_ci } else if (sel_ctx->needs_vertex_reordering && !sel->is_variant) { 960bf215546Sopenharmony_ci key->gs.triangle_strip = 1; 961bf215546Sopenharmony_ci } 962bf215546Sopenharmony_ci 963bf215546Sopenharmony_ci if (sel->is_variant && next && next->initial->info.inputs_read & VARYING_BIT_PRIMITIVE_ID) 964bf215546Sopenharmony_ci key->gs.primitive_id = 1; 965bf215546Sopenharmony_ci } else if (stage == PIPE_SHADER_FRAGMENT) { 966bf215546Sopenharmony_ci key->fs.missing_dual_src_outputs = sel_ctx->missing_dual_src_outputs; 967bf215546Sopenharmony_ci key->fs.frag_result_color_lowering = sel_ctx->frag_result_color_lowering; 968bf215546Sopenharmony_ci key->fs.manual_depth_range = sel_ctx->manual_depth_range; 969bf215546Sopenharmony_ci key->fs.polygon_stipple = sel_ctx->ctx->pstipple.enabled && 970bf215546Sopenharmony_ci sel_ctx->ctx->gfx_pipeline_state.rast->base.poly_stipple_enable; 971bf215546Sopenharmony_ci key->fs.multisample_disabled = sel_ctx->ctx->gfx_pipeline_state.rast && 972bf215546Sopenharmony_ci !sel_ctx->ctx->gfx_pipeline_state.rast->desc.MultisampleEnable; 973bf215546Sopenharmony_ci if (sel_ctx->ctx->gfx_pipeline_state.blend && 974bf215546Sopenharmony_ci sel_ctx->ctx->gfx_pipeline_state.blend->desc.RenderTarget[0].LogicOpEnable && 975bf215546Sopenharmony_ci !sel_ctx->ctx->gfx_pipeline_state.has_float_rtv) { 976bf215546Sopenharmony_ci key->fs.cast_to_uint = util_format_is_unorm(sel_ctx->ctx->fb.cbufs[0]->format); 977bf215546Sopenharmony_ci key->fs.cast_to_int = !key->fs.cast_to_uint; 978bf215546Sopenharmony_ci } 979bf215546Sopenharmony_ci } else if (stage == PIPE_SHADER_TESS_CTRL) { 980bf215546Sopenharmony_ci if (next && next->current->nir->info.stage == MESA_SHADER_TESS_EVAL) { 981bf215546Sopenharmony_ci key->hs.primitive_mode = next->current->nir->info.tess._primitive_mode; 982bf215546Sopenharmony_ci key->hs.ccw = next->current->nir->info.tess.ccw; 983bf215546Sopenharmony_ci key->hs.point_mode = next->current->nir->info.tess.point_mode; 984bf215546Sopenharmony_ci key->hs.spacing = next->current->nir->info.tess.spacing; 985bf215546Sopenharmony_ci } else { 986bf215546Sopenharmony_ci key->hs.primitive_mode = TESS_PRIMITIVE_QUADS; 987bf215546Sopenharmony_ci key->hs.ccw = true; 988bf215546Sopenharmony_ci key->hs.point_mode = false; 989bf215546Sopenharmony_ci key->hs.spacing = TESS_SPACING_EQUAL; 990bf215546Sopenharmony_ci } 991bf215546Sopenharmony_ci key->hs.patch_vertices_in = MAX2(sel_ctx->ctx->patch_vertices, 1); 992bf215546Sopenharmony_ci } else if (stage == PIPE_SHADER_TESS_EVAL) { 993bf215546Sopenharmony_ci if (prev && prev->current->nir->info.stage == MESA_SHADER_TESS_CTRL) 994bf215546Sopenharmony_ci key->ds.tcs_vertices_out = prev->current->nir->info.tess.tcs_vertices_out; 995bf215546Sopenharmony_ci else 996bf215546Sopenharmony_ci key->ds.tcs_vertices_out = 32; 997bf215546Sopenharmony_ci } 998bf215546Sopenharmony_ci 999bf215546Sopenharmony_ci if (sel->samples_int_textures) { 1000bf215546Sopenharmony_ci key->samples_int_textures = sel->samples_int_textures; 1001bf215546Sopenharmony_ci key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage]; 1002bf215546Sopenharmony_ci /* Copy only states with integer textures */ 1003bf215546Sopenharmony_ci for(int i = 0; i < key->n_texture_states; ++i) { 1004bf215546Sopenharmony_ci auto& wrap_state = sel_ctx->ctx->tex_wrap_states[stage][i]; 1005bf215546Sopenharmony_ci if (wrap_state.is_int_sampler) { 1006bf215546Sopenharmony_ci memcpy(&key->tex_wrap_states[i], &wrap_state, sizeof(wrap_state)); 1007bf215546Sopenharmony_ci key->swizzle_state[i] = sel_ctx->ctx->tex_swizzle_state[stage][i]; 1008bf215546Sopenharmony_ci } 1009bf215546Sopenharmony_ci } 1010bf215546Sopenharmony_ci } 1011bf215546Sopenharmony_ci 1012bf215546Sopenharmony_ci for (unsigned i = 0; i < sel_ctx->ctx->num_samplers[stage]; ++i) { 1013bf215546Sopenharmony_ci if (!sel_ctx->ctx->samplers[stage][i] || 1014bf215546Sopenharmony_ci sel_ctx->ctx->samplers[stage][i]->filter == PIPE_TEX_FILTER_NEAREST) 1015bf215546Sopenharmony_ci continue; 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci if (sel_ctx->ctx->samplers[stage][i]->wrap_r == PIPE_TEX_WRAP_CLAMP) 1018bf215546Sopenharmony_ci key->tex_saturate_r |= 1 << i; 1019bf215546Sopenharmony_ci if (sel_ctx->ctx->samplers[stage][i]->wrap_s == PIPE_TEX_WRAP_CLAMP) 1020bf215546Sopenharmony_ci key->tex_saturate_s |= 1 << i; 1021bf215546Sopenharmony_ci if (sel_ctx->ctx->samplers[stage][i]->wrap_t == PIPE_TEX_WRAP_CLAMP) 1022bf215546Sopenharmony_ci key->tex_saturate_t |= 1 << i; 1023bf215546Sopenharmony_ci } 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci if (sel->compare_with_lod_bias_grad) { 1026bf215546Sopenharmony_ci key->n_texture_states = sel_ctx->ctx->num_sampler_views[stage]; 1027bf215546Sopenharmony_ci memcpy(key->sampler_compare_funcs, sel_ctx->ctx->tex_compare_func[stage], 1028bf215546Sopenharmony_ci key->n_texture_states * sizeof(enum compare_func)); 1029bf215546Sopenharmony_ci memcpy(key->swizzle_state, sel_ctx->ctx->tex_swizzle_state[stage], 1030bf215546Sopenharmony_ci key->n_texture_states * sizeof(dxil_texture_swizzle_state)); 1031bf215546Sopenharmony_ci } 1032bf215546Sopenharmony_ci 1033bf215546Sopenharmony_ci if (stage == PIPE_SHADER_VERTEX && sel_ctx->ctx->gfx_pipeline_state.ves) { 1034bf215546Sopenharmony_ci key->vs.needs_format_emulation = sel_ctx->ctx->gfx_pipeline_state.ves->needs_format_emulation; 1035bf215546Sopenharmony_ci if (key->vs.needs_format_emulation) { 1036bf215546Sopenharmony_ci memcpy(key->vs.format_conversion, sel_ctx->ctx->gfx_pipeline_state.ves->format_conversion, 1037bf215546Sopenharmony_ci sel_ctx->ctx->gfx_pipeline_state.ves->num_elements * sizeof(enum pipe_format)); 1038bf215546Sopenharmony_ci } 1039bf215546Sopenharmony_ci } 1040bf215546Sopenharmony_ci 1041bf215546Sopenharmony_ci if (stage == PIPE_SHADER_FRAGMENT && 1042bf215546Sopenharmony_ci sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY] && 1043bf215546Sopenharmony_ci sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->is_variant && 1044bf215546Sopenharmony_ci sel_ctx->ctx->gfx_stages[PIPE_SHADER_GEOMETRY]->gs_key.has_front_face) { 1045bf215546Sopenharmony_ci key->fs.remap_front_facing = 1; 1046bf215546Sopenharmony_ci } 1047bf215546Sopenharmony_ci 1048bf215546Sopenharmony_ci if (stage == PIPE_SHADER_COMPUTE && sel_ctx->variable_workgroup_size) { 1049bf215546Sopenharmony_ci memcpy(key->cs.workgroup_size, sel_ctx->variable_workgroup_size, sizeof(key->cs.workgroup_size)); 1050bf215546Sopenharmony_ci } 1051bf215546Sopenharmony_ci 1052bf215546Sopenharmony_ci key->n_images = sel_ctx->ctx->num_image_views[stage]; 1053bf215546Sopenharmony_ci for (int i = 0; i < key->n_images; ++i) { 1054bf215546Sopenharmony_ci key->image_format_conversion[i].emulated_format = sel_ctx->ctx->image_view_emulation_formats[stage][i]; 1055bf215546Sopenharmony_ci if (key->image_format_conversion[i].emulated_format != PIPE_FORMAT_NONE) 1056bf215546Sopenharmony_ci key->image_format_conversion[i].view_format = sel_ctx->ctx->image_views[stage][i].format; 1057bf215546Sopenharmony_ci } 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci key->hash = d3d12_shader_key_hash(key); 1060bf215546Sopenharmony_ci} 1061bf215546Sopenharmony_ci 1062bf215546Sopenharmony_cistatic void 1063bf215546Sopenharmony_ciselect_shader_variant(struct d3d12_selection_context *sel_ctx, d3d12_shader_selector *sel, 1064bf215546Sopenharmony_ci d3d12_shader_selector *prev, d3d12_shader_selector *next) 1065bf215546Sopenharmony_ci{ 1066bf215546Sopenharmony_ci struct d3d12_context *ctx = sel_ctx->ctx; 1067bf215546Sopenharmony_ci d3d12_shader_key key; 1068bf215546Sopenharmony_ci nir_shader *new_nir_variant; 1069bf215546Sopenharmony_ci unsigned pstipple_binding = UINT32_MAX; 1070bf215546Sopenharmony_ci 1071bf215546Sopenharmony_ci d3d12_fill_shader_key(sel_ctx, &key, sel, prev, next); 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci /* Check for an existing variant */ 1074bf215546Sopenharmony_ci for (d3d12_shader *variant = sel->first; variant; 1075bf215546Sopenharmony_ci variant = variant->next_variant) { 1076bf215546Sopenharmony_ci 1077bf215546Sopenharmony_ci if (d3d12_compare_shader_keys(&key, &variant->key)) { 1078bf215546Sopenharmony_ci sel->current = variant; 1079bf215546Sopenharmony_ci return; 1080bf215546Sopenharmony_ci } 1081bf215546Sopenharmony_ci } 1082bf215546Sopenharmony_ci 1083bf215546Sopenharmony_ci /* Clone the NIR shader */ 1084bf215546Sopenharmony_ci new_nir_variant = nir_shader_clone(sel, sel->initial); 1085bf215546Sopenharmony_ci 1086bf215546Sopenharmony_ci /* Apply any needed lowering passes */ 1087bf215546Sopenharmony_ci if (key.gs.writes_psize) { 1088bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, d3d12_lower_point_sprite, 1089bf215546Sopenharmony_ci !key.gs.sprite_origin_upper_left, 1090bf215546Sopenharmony_ci key.gs.point_size_per_vertex, 1091bf215546Sopenharmony_ci key.gs.sprite_coord_enable, 1092bf215546Sopenharmony_ci key.next_varying_inputs); 1093bf215546Sopenharmony_ci 1094bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant); 1095bf215546Sopenharmony_ci nir_shader_gather_info(new_nir_variant, impl); 1096bf215546Sopenharmony_ci } 1097bf215546Sopenharmony_ci 1098bf215546Sopenharmony_ci if (key.gs.primitive_id) { 1099bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, d3d12_lower_primitive_id); 1100bf215546Sopenharmony_ci 1101bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant); 1102bf215546Sopenharmony_ci nir_shader_gather_info(new_nir_variant, impl); 1103bf215546Sopenharmony_ci } 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci if (key.gs.triangle_strip) 1106bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, d3d12_lower_triangle_strip); 1107bf215546Sopenharmony_ci 1108bf215546Sopenharmony_ci if (key.fs.polygon_stipple) { 1109bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, nir_lower_pstipple_fs, 1110bf215546Sopenharmony_ci &pstipple_binding, 0, false); 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant); 1113bf215546Sopenharmony_ci nir_shader_gather_info(new_nir_variant, impl); 1114bf215546Sopenharmony_ci } 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_ci if (key.fs.remap_front_facing) { 1117bf215546Sopenharmony_ci d3d12_forward_front_face(new_nir_variant); 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_ci nir_function_impl *impl = nir_shader_get_entrypoint(new_nir_variant); 1120bf215546Sopenharmony_ci nir_shader_gather_info(new_nir_variant, impl); 1121bf215546Sopenharmony_ci } 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_ci if (key.fs.missing_dual_src_outputs) { 1124bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, d3d12_add_missing_dual_src_target, 1125bf215546Sopenharmony_ci key.fs.missing_dual_src_outputs); 1126bf215546Sopenharmony_ci } else if (key.fs.frag_result_color_lowering) { 1127bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, nir_lower_fragcolor, 1128bf215546Sopenharmony_ci key.fs.frag_result_color_lowering); 1129bf215546Sopenharmony_ci } 1130bf215546Sopenharmony_ci 1131bf215546Sopenharmony_ci if (key.fs.manual_depth_range) 1132bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, d3d12_lower_depth_range); 1133bf215546Sopenharmony_ci 1134bf215546Sopenharmony_ci if (sel->compare_with_lod_bias_grad) { 1135bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(dxil_texture_swizzle_state) == 1136bf215546Sopenharmony_ci sizeof(nir_lower_tex_shadow_swizzle)); 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, nir_lower_tex_shadow, key.n_texture_states, 1139bf215546Sopenharmony_ci key.sampler_compare_funcs, (nir_lower_tex_shadow_swizzle *)key.swizzle_state); 1140bf215546Sopenharmony_ci } 1141bf215546Sopenharmony_ci 1142bf215546Sopenharmony_ci if (key.fs.cast_to_uint) 1143bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, false); 1144bf215546Sopenharmony_ci if (key.fs.cast_to_int) 1145bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, d3d12_lower_uint_cast, true); 1146bf215546Sopenharmony_ci 1147bf215546Sopenharmony_ci if (key.n_images) 1148bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, d3d12_lower_image_casts, key.image_format_conversion); 1149bf215546Sopenharmony_ci 1150bf215546Sopenharmony_ci if (sel->workgroup_size_variable) { 1151bf215546Sopenharmony_ci new_nir_variant->info.workgroup_size[0] = key.cs.workgroup_size[0]; 1152bf215546Sopenharmony_ci new_nir_variant->info.workgroup_size[1] = key.cs.workgroup_size[1]; 1153bf215546Sopenharmony_ci new_nir_variant->info.workgroup_size[2] = key.cs.workgroup_size[2]; 1154bf215546Sopenharmony_ci } 1155bf215546Sopenharmony_ci 1156bf215546Sopenharmony_ci if (new_nir_variant->info.stage == MESA_SHADER_TESS_CTRL) { 1157bf215546Sopenharmony_ci new_nir_variant->info.tess._primitive_mode = (tess_primitive_mode)key.hs.primitive_mode; 1158bf215546Sopenharmony_ci new_nir_variant->info.tess.ccw = key.hs.ccw; 1159bf215546Sopenharmony_ci new_nir_variant->info.tess.point_mode = key.hs.point_mode; 1160bf215546Sopenharmony_ci new_nir_variant->info.tess.spacing = key.hs.spacing; 1161bf215546Sopenharmony_ci 1162bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, dxil_nir_set_tcs_patches_in, key.hs.patch_vertices_in); 1163bf215546Sopenharmony_ci } else if (new_nir_variant->info.stage == MESA_SHADER_TESS_EVAL) { 1164bf215546Sopenharmony_ci new_nir_variant->info.tess.tcs_vertices_out = key.ds.tcs_vertices_out; 1165bf215546Sopenharmony_ci } 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_ci { 1168bf215546Sopenharmony_ci struct nir_lower_tex_options tex_options = { }; 1169bf215546Sopenharmony_ci tex_options.lower_txp = ~0u; /* No equivalent for textureProj */ 1170bf215546Sopenharmony_ci tex_options.lower_rect = true; 1171bf215546Sopenharmony_ci tex_options.lower_rect_offset = true; 1172bf215546Sopenharmony_ci tex_options.saturate_s = key.tex_saturate_s; 1173bf215546Sopenharmony_ci tex_options.saturate_r = key.tex_saturate_r; 1174bf215546Sopenharmony_ci tex_options.saturate_t = key.tex_saturate_t; 1175bf215546Sopenharmony_ci tex_options.lower_invalid_implicit_lod = true; 1176bf215546Sopenharmony_ci tex_options.lower_tg4_offsets = true; 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_ci NIR_PASS_V(new_nir_variant, nir_lower_tex, &tex_options); 1179bf215546Sopenharmony_ci } 1180bf215546Sopenharmony_ci 1181bf215546Sopenharmony_ci /* Add the needed in and outputs, and re-sort */ 1182bf215546Sopenharmony_ci if (prev) { 1183bf215546Sopenharmony_ci uint64_t mask = key.required_varying_inputs.mask & ~new_nir_variant->info.inputs_read; 1184bf215546Sopenharmony_ci new_nir_variant->info.inputs_read |= mask; 1185bf215546Sopenharmony_ci while (mask) { 1186bf215546Sopenharmony_ci int slot = u_bit_scan64(&mask); 1187bf215546Sopenharmony_ci create_varyings_from_info(new_nir_variant, &key.required_varying_inputs, slot, nir_var_shader_in, false); 1188bf215546Sopenharmony_ci } 1189bf215546Sopenharmony_ci 1190bf215546Sopenharmony_ci if (sel->stage == PIPE_SHADER_TESS_EVAL) { 1191bf215546Sopenharmony_ci uint32_t patch_mask = (uint32_t)key.ds.required_patch_inputs.mask & ~new_nir_variant->info.patch_inputs_read; 1192bf215546Sopenharmony_ci new_nir_variant->info.patch_inputs_read |= patch_mask; 1193bf215546Sopenharmony_ci while (patch_mask) { 1194bf215546Sopenharmony_ci int slot = u_bit_scan(&patch_mask); 1195bf215546Sopenharmony_ci create_varyings_from_info(new_nir_variant, &key.ds.required_patch_inputs, slot, nir_var_shader_in, true); 1196bf215546Sopenharmony_ci } 1197bf215546Sopenharmony_ci } 1198bf215546Sopenharmony_ci dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_in, 1199bf215546Sopenharmony_ci key.prev_varying_outputs); 1200bf215546Sopenharmony_ci } 1201bf215546Sopenharmony_ci 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci if (next) { 1204bf215546Sopenharmony_ci uint64_t mask = key.required_varying_outputs.mask & ~new_nir_variant->info.outputs_written; 1205bf215546Sopenharmony_ci new_nir_variant->info.outputs_written |= mask; 1206bf215546Sopenharmony_ci while (mask) { 1207bf215546Sopenharmony_ci int slot = u_bit_scan64(&mask); 1208bf215546Sopenharmony_ci create_varyings_from_info(new_nir_variant, &key.required_varying_outputs, slot, nir_var_shader_out, false); 1209bf215546Sopenharmony_ci } 1210bf215546Sopenharmony_ci 1211bf215546Sopenharmony_ci if (sel->stage == PIPE_SHADER_TESS_CTRL) { 1212bf215546Sopenharmony_ci uint32_t patch_mask = (uint32_t)key.hs.required_patch_outputs.mask & ~new_nir_variant->info.patch_outputs_written; 1213bf215546Sopenharmony_ci new_nir_variant->info.patch_outputs_written |= patch_mask; 1214bf215546Sopenharmony_ci while (patch_mask) { 1215bf215546Sopenharmony_ci int slot = u_bit_scan(&patch_mask); 1216bf215546Sopenharmony_ci create_varyings_from_info(new_nir_variant, &key.ds.required_patch_inputs, slot, nir_var_shader_out, true); 1217bf215546Sopenharmony_ci } 1218bf215546Sopenharmony_ci } 1219bf215546Sopenharmony_ci dxil_reassign_driver_locations(new_nir_variant, nir_var_shader_out, 1220bf215546Sopenharmony_ci key.next_varying_inputs); 1221bf215546Sopenharmony_ci } 1222bf215546Sopenharmony_ci 1223bf215546Sopenharmony_ci d3d12_shader *new_variant = compile_nir(ctx, sel, &key, new_nir_variant); 1224bf215546Sopenharmony_ci assert(new_variant); 1225bf215546Sopenharmony_ci 1226bf215546Sopenharmony_ci /* keep track of polygon stipple texture binding */ 1227bf215546Sopenharmony_ci new_variant->pstipple_binding = pstipple_binding; 1228bf215546Sopenharmony_ci 1229bf215546Sopenharmony_ci /* prepend the new shader in the selector chain and pick it */ 1230bf215546Sopenharmony_ci new_variant->next_variant = sel->first; 1231bf215546Sopenharmony_ci sel->current = sel->first = new_variant; 1232bf215546Sopenharmony_ci} 1233bf215546Sopenharmony_ci 1234bf215546Sopenharmony_cistatic d3d12_shader_selector * 1235bf215546Sopenharmony_ciget_prev_shader(struct d3d12_context *ctx, pipe_shader_type current) 1236bf215546Sopenharmony_ci{ 1237bf215546Sopenharmony_ci switch (current) { 1238bf215546Sopenharmony_ci case PIPE_SHADER_VERTEX: 1239bf215546Sopenharmony_ci return NULL; 1240bf215546Sopenharmony_ci case PIPE_SHADER_FRAGMENT: 1241bf215546Sopenharmony_ci if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY]) 1242bf215546Sopenharmony_ci return ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; 1243bf215546Sopenharmony_ci FALLTHROUGH; 1244bf215546Sopenharmony_ci case PIPE_SHADER_GEOMETRY: 1245bf215546Sopenharmony_ci if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]) 1246bf215546Sopenharmony_ci return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]; 1247bf215546Sopenharmony_ci FALLTHROUGH; 1248bf215546Sopenharmony_ci case PIPE_SHADER_TESS_EVAL: 1249bf215546Sopenharmony_ci if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL]) 1250bf215546Sopenharmony_ci return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL]; 1251bf215546Sopenharmony_ci FALLTHROUGH; 1252bf215546Sopenharmony_ci case PIPE_SHADER_TESS_CTRL: 1253bf215546Sopenharmony_ci return ctx->gfx_stages[PIPE_SHADER_VERTEX]; 1254bf215546Sopenharmony_ci default: 1255bf215546Sopenharmony_ci unreachable("shader type not supported"); 1256bf215546Sopenharmony_ci } 1257bf215546Sopenharmony_ci} 1258bf215546Sopenharmony_ci 1259bf215546Sopenharmony_cistatic d3d12_shader_selector * 1260bf215546Sopenharmony_ciget_next_shader(struct d3d12_context *ctx, pipe_shader_type current) 1261bf215546Sopenharmony_ci{ 1262bf215546Sopenharmony_ci switch (current) { 1263bf215546Sopenharmony_ci case PIPE_SHADER_VERTEX: 1264bf215546Sopenharmony_ci if (ctx->gfx_stages[PIPE_SHADER_TESS_CTRL]) 1265bf215546Sopenharmony_ci return ctx->gfx_stages[PIPE_SHADER_TESS_CTRL]; 1266bf215546Sopenharmony_ci FALLTHROUGH; 1267bf215546Sopenharmony_ci case PIPE_SHADER_TESS_CTRL: 1268bf215546Sopenharmony_ci if (ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]) 1269bf215546Sopenharmony_ci return ctx->gfx_stages[PIPE_SHADER_TESS_EVAL]; 1270bf215546Sopenharmony_ci FALLTHROUGH; 1271bf215546Sopenharmony_ci case PIPE_SHADER_TESS_EVAL: 1272bf215546Sopenharmony_ci if (ctx->gfx_stages[PIPE_SHADER_GEOMETRY]) 1273bf215546Sopenharmony_ci return ctx->gfx_stages[PIPE_SHADER_GEOMETRY]; 1274bf215546Sopenharmony_ci FALLTHROUGH; 1275bf215546Sopenharmony_ci case PIPE_SHADER_GEOMETRY: 1276bf215546Sopenharmony_ci return ctx->gfx_stages[PIPE_SHADER_FRAGMENT]; 1277bf215546Sopenharmony_ci case PIPE_SHADER_FRAGMENT: 1278bf215546Sopenharmony_ci return NULL; 1279bf215546Sopenharmony_ci default: 1280bf215546Sopenharmony_ci unreachable("shader type not supported"); 1281bf215546Sopenharmony_ci } 1282bf215546Sopenharmony_ci} 1283bf215546Sopenharmony_ci 1284bf215546Sopenharmony_cienum tex_scan_flags { 1285bf215546Sopenharmony_ci TEX_SAMPLE_INTEGER_TEXTURE = 1 << 0, 1286bf215546Sopenharmony_ci TEX_CMP_WITH_LOD_BIAS_GRAD = 1 << 1, 1287bf215546Sopenharmony_ci TEX_SCAN_ALL_FLAGS = (1 << 2) - 1 1288bf215546Sopenharmony_ci}; 1289bf215546Sopenharmony_ci 1290bf215546Sopenharmony_cistatic unsigned 1291bf215546Sopenharmony_ciscan_texture_use(nir_shader *nir) 1292bf215546Sopenharmony_ci{ 1293bf215546Sopenharmony_ci unsigned result = 0; 1294bf215546Sopenharmony_ci nir_foreach_function(func, nir) { 1295bf215546Sopenharmony_ci nir_foreach_block(block, func->impl) { 1296bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 1297bf215546Sopenharmony_ci if (instr->type == nir_instr_type_tex) { 1298bf215546Sopenharmony_ci auto tex = nir_instr_as_tex(instr); 1299bf215546Sopenharmony_ci switch (tex->op) { 1300bf215546Sopenharmony_ci case nir_texop_txb: 1301bf215546Sopenharmony_ci case nir_texop_txl: 1302bf215546Sopenharmony_ci case nir_texop_txd: 1303bf215546Sopenharmony_ci if (tex->is_shadow) 1304bf215546Sopenharmony_ci result |= TEX_CMP_WITH_LOD_BIAS_GRAD; 1305bf215546Sopenharmony_ci FALLTHROUGH; 1306bf215546Sopenharmony_ci case nir_texop_tex: 1307bf215546Sopenharmony_ci if (tex->dest_type & (nir_type_int | nir_type_uint)) 1308bf215546Sopenharmony_ci result |= TEX_SAMPLE_INTEGER_TEXTURE; 1309bf215546Sopenharmony_ci default: 1310bf215546Sopenharmony_ci ; 1311bf215546Sopenharmony_ci } 1312bf215546Sopenharmony_ci } 1313bf215546Sopenharmony_ci if (TEX_SCAN_ALL_FLAGS == result) 1314bf215546Sopenharmony_ci return result; 1315bf215546Sopenharmony_ci } 1316bf215546Sopenharmony_ci } 1317bf215546Sopenharmony_ci } 1318bf215546Sopenharmony_ci return result; 1319bf215546Sopenharmony_ci} 1320bf215546Sopenharmony_ci 1321bf215546Sopenharmony_cistatic uint64_t 1322bf215546Sopenharmony_ciupdate_so_info(struct pipe_stream_output_info *so_info, 1323bf215546Sopenharmony_ci uint64_t outputs_written) 1324bf215546Sopenharmony_ci{ 1325bf215546Sopenharmony_ci uint64_t so_outputs = 0; 1326bf215546Sopenharmony_ci uint8_t reverse_map[64] = {0}; 1327bf215546Sopenharmony_ci unsigned slot = 0; 1328bf215546Sopenharmony_ci 1329bf215546Sopenharmony_ci while (outputs_written) 1330bf215546Sopenharmony_ci reverse_map[slot++] = u_bit_scan64(&outputs_written); 1331bf215546Sopenharmony_ci 1332bf215546Sopenharmony_ci for (unsigned i = 0; i < so_info->num_outputs; i++) { 1333bf215546Sopenharmony_ci struct pipe_stream_output *output = &so_info->output[i]; 1334bf215546Sopenharmony_ci 1335bf215546Sopenharmony_ci /* Map Gallium's condensed "slots" back to real VARYING_SLOT_* enums */ 1336bf215546Sopenharmony_ci output->register_index = reverse_map[output->register_index]; 1337bf215546Sopenharmony_ci 1338bf215546Sopenharmony_ci so_outputs |= 1ull << output->register_index; 1339bf215546Sopenharmony_ci } 1340bf215546Sopenharmony_ci 1341bf215546Sopenharmony_ci return so_outputs; 1342bf215546Sopenharmony_ci} 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_cistatic struct d3d12_shader_selector * 1345bf215546Sopenharmony_cid3d12_create_shader_impl(struct d3d12_context *ctx, 1346bf215546Sopenharmony_ci struct d3d12_shader_selector *sel, 1347bf215546Sopenharmony_ci struct nir_shader *nir, 1348bf215546Sopenharmony_ci struct d3d12_shader_selector *prev, 1349bf215546Sopenharmony_ci struct d3d12_shader_selector *next) 1350bf215546Sopenharmony_ci{ 1351bf215546Sopenharmony_ci unsigned tex_scan_result = scan_texture_use(nir); 1352bf215546Sopenharmony_ci sel->samples_int_textures = (tex_scan_result & TEX_SAMPLE_INTEGER_TEXTURE) != 0; 1353bf215546Sopenharmony_ci sel->compare_with_lod_bias_grad = (tex_scan_result & TEX_CMP_WITH_LOD_BIAS_GRAD) != 0; 1354bf215546Sopenharmony_ci sel->workgroup_size_variable = nir->info.workgroup_size_variable; 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_ci /* Integer cube maps are not supported in DirectX because sampling is not supported 1357bf215546Sopenharmony_ci * on integer textures and TextureLoad is not supported for cube maps, so we have to 1358bf215546Sopenharmony_ci * lower integer cube maps to be handled like 2D textures arrays*/ 1359bf215546Sopenharmony_ci NIR_PASS_V(nir, dxil_nir_lower_int_cubemaps, true); 1360bf215546Sopenharmony_ci 1361bf215546Sopenharmony_ci /* Keep this initial shader as the blue print for possible variants */ 1362bf215546Sopenharmony_ci sel->initial = nir; 1363bf215546Sopenharmony_ci 1364bf215546Sopenharmony_ci /* 1365bf215546Sopenharmony_ci * We must compile some shader here, because if the previous or a next shaders exists later 1366bf215546Sopenharmony_ci * when the shaders are bound, then the key evaluation in the shader selector will access 1367bf215546Sopenharmony_ci * the current variant of these prev and next shader, and we can only assign 1368bf215546Sopenharmony_ci * a current variant when it has been successfully compiled. 1369bf215546Sopenharmony_ci * 1370bf215546Sopenharmony_ci * For shaders that require lowering because certain instructions are not available 1371bf215546Sopenharmony_ci * and their emulation is state depended (like sampling an integer texture that must be 1372bf215546Sopenharmony_ci * emulated and needs handling of boundary conditions, or shadow compare sampling with LOD), 1373bf215546Sopenharmony_ci * we must go through the shader selector here to create a compilable variant. 1374bf215546Sopenharmony_ci * For shaders that are not depended on the state this is just compiling the original 1375bf215546Sopenharmony_ci * shader. 1376bf215546Sopenharmony_ci * 1377bf215546Sopenharmony_ci * TODO: get rid of having to compiling the shader here if it can be forseen that it will 1378bf215546Sopenharmony_ci * be thrown away (i.e. it depends on states that are likely to change before the shader is 1379bf215546Sopenharmony_ci * used for the first time) 1380bf215546Sopenharmony_ci */ 1381bf215546Sopenharmony_ci struct d3d12_selection_context sel_ctx = {0}; 1382bf215546Sopenharmony_ci sel_ctx.ctx = ctx; 1383bf215546Sopenharmony_ci select_shader_variant(&sel_ctx, sel, prev, next); 1384bf215546Sopenharmony_ci 1385bf215546Sopenharmony_ci if (!sel->current) { 1386bf215546Sopenharmony_ci ralloc_free(sel); 1387bf215546Sopenharmony_ci return NULL; 1388bf215546Sopenharmony_ci } 1389bf215546Sopenharmony_ci 1390bf215546Sopenharmony_ci return sel; 1391bf215546Sopenharmony_ci} 1392bf215546Sopenharmony_ci 1393bf215546Sopenharmony_cistruct d3d12_shader_selector * 1394bf215546Sopenharmony_cid3d12_create_shader(struct d3d12_context *ctx, 1395bf215546Sopenharmony_ci pipe_shader_type stage, 1396bf215546Sopenharmony_ci const struct pipe_shader_state *shader) 1397bf215546Sopenharmony_ci{ 1398bf215546Sopenharmony_ci struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector); 1399bf215546Sopenharmony_ci sel->stage = stage; 1400bf215546Sopenharmony_ci 1401bf215546Sopenharmony_ci struct nir_shader *nir = NULL; 1402bf215546Sopenharmony_ci 1403bf215546Sopenharmony_ci if (shader->type == PIPE_SHADER_IR_NIR) { 1404bf215546Sopenharmony_ci nir = (nir_shader *)shader->ir.nir; 1405bf215546Sopenharmony_ci } else { 1406bf215546Sopenharmony_ci assert(shader->type == PIPE_SHADER_IR_TGSI); 1407bf215546Sopenharmony_ci nir = tgsi_to_nir(shader->tokens, ctx->base.screen, false); 1408bf215546Sopenharmony_ci } 1409bf215546Sopenharmony_ci 1410bf215546Sopenharmony_ci nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 1411bf215546Sopenharmony_ci memcpy(&sel->so_info, &shader->stream_output, sizeof(sel->so_info)); 1412bf215546Sopenharmony_ci update_so_info(&sel->so_info, nir->info.outputs_written); 1413bf215546Sopenharmony_ci 1414bf215546Sopenharmony_ci assert(nir != NULL); 1415bf215546Sopenharmony_ci d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage); 1416bf215546Sopenharmony_ci d3d12_shader_selector *next = get_next_shader(ctx, sel->stage); 1417bf215546Sopenharmony_ci 1418bf215546Sopenharmony_ci NIR_PASS_V(nir, dxil_nir_split_clip_cull_distance); 1419bf215546Sopenharmony_ci NIR_PASS_V(nir, d3d12_split_multistream_varyings); 1420bf215546Sopenharmony_ci 1421bf215546Sopenharmony_ci if (nir->info.stage != MESA_SHADER_VERTEX) 1422bf215546Sopenharmony_ci nir->info.inputs_read = 1423bf215546Sopenharmony_ci dxil_reassign_driver_locations(nir, nir_var_shader_in, 1424bf215546Sopenharmony_ci prev ? prev->current->nir->info.outputs_written : 0); 1425bf215546Sopenharmony_ci else 1426bf215546Sopenharmony_ci nir->info.inputs_read = dxil_sort_by_driver_location(nir, nir_var_shader_in); 1427bf215546Sopenharmony_ci 1428bf215546Sopenharmony_ci if (nir->info.stage != MESA_SHADER_FRAGMENT) { 1429bf215546Sopenharmony_ci nir->info.outputs_written = 1430bf215546Sopenharmony_ci dxil_reassign_driver_locations(nir, nir_var_shader_out, 1431bf215546Sopenharmony_ci next ? next->current->nir->info.inputs_read : 0); 1432bf215546Sopenharmony_ci } else { 1433bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_fragcoord_wtrans); 1434bf215546Sopenharmony_ci NIR_PASS_V(nir, d3d12_lower_sample_pos); 1435bf215546Sopenharmony_ci dxil_sort_ps_outputs(nir); 1436bf215546Sopenharmony_ci } 1437bf215546Sopenharmony_ci 1438bf215546Sopenharmony_ci return d3d12_create_shader_impl(ctx, sel, nir, prev, next); 1439bf215546Sopenharmony_ci} 1440bf215546Sopenharmony_ci 1441bf215546Sopenharmony_cistruct d3d12_shader_selector * 1442bf215546Sopenharmony_cid3d12_create_compute_shader(struct d3d12_context *ctx, 1443bf215546Sopenharmony_ci const struct pipe_compute_state *shader) 1444bf215546Sopenharmony_ci{ 1445bf215546Sopenharmony_ci struct d3d12_shader_selector *sel = rzalloc(nullptr, d3d12_shader_selector); 1446bf215546Sopenharmony_ci sel->stage = PIPE_SHADER_COMPUTE; 1447bf215546Sopenharmony_ci 1448bf215546Sopenharmony_ci struct nir_shader *nir = NULL; 1449bf215546Sopenharmony_ci 1450bf215546Sopenharmony_ci if (shader->ir_type == PIPE_SHADER_IR_NIR) { 1451bf215546Sopenharmony_ci nir = (nir_shader *)shader->prog; 1452bf215546Sopenharmony_ci } else { 1453bf215546Sopenharmony_ci assert(shader->ir_type == PIPE_SHADER_IR_TGSI); 1454bf215546Sopenharmony_ci nir = tgsi_to_nir(shader->prog, ctx->base.screen, false); 1455bf215546Sopenharmony_ci } 1456bf215546Sopenharmony_ci 1457bf215546Sopenharmony_ci nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); 1458bf215546Sopenharmony_ci 1459bf215546Sopenharmony_ci NIR_PASS_V(nir, d3d12_lower_compute_state_vars); 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_ci return d3d12_create_shader_impl(ctx, sel, nir, nullptr, nullptr); 1462bf215546Sopenharmony_ci} 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_civoid 1465bf215546Sopenharmony_cid3d12_select_shader_variants(struct d3d12_context *ctx, const struct pipe_draw_info *dinfo) 1466bf215546Sopenharmony_ci{ 1467bf215546Sopenharmony_ci static unsigned order[] = { 1468bf215546Sopenharmony_ci PIPE_SHADER_VERTEX, 1469bf215546Sopenharmony_ci PIPE_SHADER_TESS_CTRL, 1470bf215546Sopenharmony_ci PIPE_SHADER_TESS_EVAL, 1471bf215546Sopenharmony_ci PIPE_SHADER_GEOMETRY, 1472bf215546Sopenharmony_ci PIPE_SHADER_FRAGMENT 1473bf215546Sopenharmony_ci }; 1474bf215546Sopenharmony_ci struct d3d12_selection_context sel_ctx; 1475bf215546Sopenharmony_ci 1476bf215546Sopenharmony_ci sel_ctx.ctx = ctx; 1477bf215546Sopenharmony_ci sel_ctx.needs_point_sprite_lowering = needs_point_sprite_lowering(ctx, dinfo); 1478bf215546Sopenharmony_ci sel_ctx.fill_mode_lowered = fill_mode_lowered(ctx, dinfo); 1479bf215546Sopenharmony_ci sel_ctx.cull_mode_lowered = cull_mode_lowered(ctx, sel_ctx.fill_mode_lowered); 1480bf215546Sopenharmony_ci sel_ctx.provoking_vertex = get_provoking_vertex(&sel_ctx, &sel_ctx.alternate_tri, dinfo); 1481bf215546Sopenharmony_ci sel_ctx.needs_vertex_reordering = needs_vertex_reordering(&sel_ctx, dinfo); 1482bf215546Sopenharmony_ci sel_ctx.missing_dual_src_outputs = missing_dual_src_outputs(ctx); 1483bf215546Sopenharmony_ci sel_ctx.frag_result_color_lowering = frag_result_color_lowering(ctx); 1484bf215546Sopenharmony_ci sel_ctx.manual_depth_range = manual_depth_range(ctx); 1485bf215546Sopenharmony_ci 1486bf215546Sopenharmony_ci validate_geometry_shader_variant(&sel_ctx); 1487bf215546Sopenharmony_ci validate_tess_ctrl_shader_variant(&sel_ctx); 1488bf215546Sopenharmony_ci 1489bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(order); ++i) { 1490bf215546Sopenharmony_ci auto sel = ctx->gfx_stages[order[i]]; 1491bf215546Sopenharmony_ci if (!sel) 1492bf215546Sopenharmony_ci continue; 1493bf215546Sopenharmony_ci 1494bf215546Sopenharmony_ci d3d12_shader_selector *prev = get_prev_shader(ctx, sel->stage); 1495bf215546Sopenharmony_ci d3d12_shader_selector *next = get_next_shader(ctx, sel->stage); 1496bf215546Sopenharmony_ci 1497bf215546Sopenharmony_ci select_shader_variant(&sel_ctx, sel, prev, next); 1498bf215546Sopenharmony_ci } 1499bf215546Sopenharmony_ci} 1500bf215546Sopenharmony_ci 1501bf215546Sopenharmony_cistatic const unsigned * 1502bf215546Sopenharmony_ciworkgroup_size_variable(struct d3d12_context *ctx, 1503bf215546Sopenharmony_ci const struct pipe_grid_info *info) 1504bf215546Sopenharmony_ci{ 1505bf215546Sopenharmony_ci if (ctx->compute_state->workgroup_size_variable) 1506bf215546Sopenharmony_ci return info->block; 1507bf215546Sopenharmony_ci return nullptr; 1508bf215546Sopenharmony_ci} 1509bf215546Sopenharmony_ci 1510bf215546Sopenharmony_civoid 1511bf215546Sopenharmony_cid3d12_select_compute_shader_variants(struct d3d12_context *ctx, const struct pipe_grid_info *info) 1512bf215546Sopenharmony_ci{ 1513bf215546Sopenharmony_ci struct d3d12_selection_context sel_ctx = {}; 1514bf215546Sopenharmony_ci 1515bf215546Sopenharmony_ci sel_ctx.ctx = ctx; 1516bf215546Sopenharmony_ci sel_ctx.variable_workgroup_size = workgroup_size_variable(ctx, info); 1517bf215546Sopenharmony_ci 1518bf215546Sopenharmony_ci select_shader_variant(&sel_ctx, ctx->compute_state, nullptr, nullptr); 1519bf215546Sopenharmony_ci} 1520bf215546Sopenharmony_ci 1521bf215546Sopenharmony_civoid 1522bf215546Sopenharmony_cid3d12_shader_free(struct d3d12_shader_selector *sel) 1523bf215546Sopenharmony_ci{ 1524bf215546Sopenharmony_ci auto shader = sel->first; 1525bf215546Sopenharmony_ci while (shader) { 1526bf215546Sopenharmony_ci free(shader->bytecode); 1527bf215546Sopenharmony_ci shader = shader->next_variant; 1528bf215546Sopenharmony_ci } 1529bf215546Sopenharmony_ci ralloc_free(sel->initial); 1530bf215546Sopenharmony_ci ralloc_free(sel); 1531bf215546Sopenharmony_ci} 1532