1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <assert.h> 25#include <stdbool.h> 26#include <string.h> 27#include <unistd.h> 28#include <fcntl.h> 29 30#include "anv_private.h" 31 32#include "genxml/gen_macros.h" 33#include "genxml/genX_pack.h" 34 35void 36genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable) 37{ 38 if (cmd_buffer->state.pma_fix_enabled == enable) 39 return; 40 41 cmd_buffer->state.pma_fix_enabled = enable; 42 43 /* According to the Broadwell PIPE_CONTROL documentation, software should 44 * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set 45 * prior to the LRI. If stencil buffer writes are enabled, then a Render 46 * Cache Flush is also necessary. 47 * 48 * The Skylake docs say to use a depth stall rather than a command 49 * streamer stall. However, the hardware seems to violently disagree. 50 * A full command streamer stall seems to be needed in both cases. 51 */ 52 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { 53 pc.DepthCacheFlushEnable = true; 54 pc.CommandStreamerStallEnable = true; 55 pc.RenderTargetCacheFlushEnable = true; 56#if GFX_VER >= 12 57 pc.TileCacheFlushEnable = true; 58 59 /* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must 60 * be set with any PIPE_CONTROL with Depth Flush Enable bit set. 61 */ 62 pc.DepthStallEnable = true; 63#endif 64 } 65 66#if GFX_VER == 9 67 68 uint32_t cache_mode; 69 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0), 70 .STCPMAOptimizationEnable = enable, 71 .STCPMAOptimizationEnableMask = true); 72 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 73 lri.RegisterOffset = GENX(CACHE_MODE_0_num); 74 lri.DataDWord = cache_mode; 75 } 76 77#elif GFX_VER == 8 78 79 uint32_t cache_mode; 80 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1), 81 .NPPMAFixEnable = enable, 82 .NPEarlyZFailsDisable = enable, 83 .NPPMAFixEnableMask = true, 84 .NPEarlyZFailsDisableMask = true); 85 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) { 86 lri.RegisterOffset = GENX(CACHE_MODE_1_num); 87 lri.DataDWord = cache_mode; 88 } 89 90#endif /* GFX_VER == 8 */ 91 92 /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache 93 * Flush bits is often necessary. We do it regardless because it's easier. 94 * The render cache flush is also necessary if stencil writes are enabled. 95 * 96 * Again, the Skylake docs give a different set of flushes but the BDW 97 * flushes seem to work just as well. 98 */ 99 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) { 100 pc.DepthStallEnable = true; 101 pc.DepthCacheFlushEnable = true; 102 pc.RenderTargetCacheFlushEnable = true; 103#if GFX_VER >= 12 104 pc.TileCacheFlushEnable = true; 105#endif 106 } 107} 108 109UNUSED static bool 110want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer, 111 const struct vk_depth_stencil_state *ds) 112{ 113 assert(GFX_VER == 8); 114 115 /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE: 116 * 117 * SW must set this bit in order to enable this fix when following 118 * expression is TRUE. 119 * 120 * 3DSTATE_WM::ForceThreadDispatch != 1 && 121 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) && 122 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && 123 * (3DSTATE_DEPTH_BUFFER::HIZ Enable) && 124 * !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) && 125 * (3DSTATE_PS_EXTRA::PixelShaderValid) && 126 * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 127 * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 128 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 129 * 3DSTATE_WM_HZ_OP::StencilBufferClear) && 130 * (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) && 131 * (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 132 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 133 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 134 * 3DSTATE_PS_BLEND::AlphaTestEnable || 135 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) && 136 * 3DSTATE_WM::ForceKillPix != ForceOff && 137 * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && 138 * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) || 139 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && 140 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && 141 * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) || 142 * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) 143 */ 144 145 /* These are always true: 146 * 3DSTATE_WM::ForceThreadDispatch != 1 && 147 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) 148 */ 149 150 /* We only enable the PMA fix if we know for certain that HiZ is enabled. 151 * If we don't know whether HiZ is enabled or not, we disable the PMA fix 152 * and there is no harm. 153 * 154 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && 155 * 3DSTATE_DEPTH_BUFFER::HIZ Enable 156 */ 157 if (!cmd_buffer->state.hiz_enabled) 158 return false; 159 160 /* 3DSTATE_PS_EXTRA::PixelShaderValid */ 161 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 162 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) 163 return false; 164 165 /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */ 166 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 167 if (wm_prog_data->early_fragment_tests) 168 return false; 169 170 /* We never use anv_pipeline for HiZ ops so this is trivially true: 171 * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 172 * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 173 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 174 * 3DSTATE_WM_HZ_OP::StencilBufferClear) 175 */ 176 177 /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */ 178 if (!ds->depth.test_enable) 179 return false; 180 181 /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 182 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 183 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 184 * 3DSTATE_PS_BLEND::AlphaTestEnable || 185 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) && 186 * 3DSTATE_WM::ForceKillPix != ForceOff && 187 * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable && 188 * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) || 189 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && 190 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE && 191 * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) || 192 * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) 193 */ 194 return (pipeline->kill_pixel && (ds->depth.write_enable || 195 ds->stencil.write_enable)) || 196 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; 197} 198 199UNUSED static bool 200want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer, 201 const struct vk_depth_stencil_state *ds) 202{ 203 if (GFX_VER > 9) 204 return false; 205 assert(GFX_VER == 9); 206 207 /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable: 208 * 209 * Clearing this bit will force the STC cache to wait for pending 210 * retirement of pixels at the HZ-read stage and do the STC-test for 211 * Non-promoted, R-computed and Computed depth modes instead of 212 * postponing the STC-test to RCPFE. 213 * 214 * STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && 215 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable 216 * 217 * STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && 218 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && 219 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE) 220 * 221 * COMP_STC_EN = STC_TEST_EN && 222 * 3DSTATE_PS_EXTRA::PixelShaderComputesStencil 223 * 224 * SW parses the pipeline states to generate the following logical 225 * signal indicating if PMA FIX can be enabled. 226 * 227 * STC_PMA_OPT = 228 * 3DSTATE_WM::ForceThreadDispatch != 1 && 229 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) && 230 * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL && 231 * 3DSTATE_DEPTH_BUFFER::HIZ Enable && 232 * !(3DSTATE_WM::EDSC_Mode == 2) && 233 * 3DSTATE_PS_EXTRA::PixelShaderValid && 234 * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 235 * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 236 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 237 * 3DSTATE_WM_HZ_OP::StencilBufferClear) && 238 * (COMP_STC_EN || STC_WRITE_EN) && 239 * ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 240 * 3DSTATE_WM::ForceKillPix == ON || 241 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 242 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 243 * 3DSTATE_PS_BLEND::AlphaTestEnable || 244 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) || 245 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)) 246 */ 247 248 /* These are always true: 249 * 3DSTATE_WM::ForceThreadDispatch != 1 && 250 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) 251 */ 252 253 /* We only enable the PMA fix if we know for certain that HiZ is enabled. 254 * If we don't know whether HiZ is enabled or not, we disable the PMA fix 255 * and there is no harm. 256 * 257 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) && 258 * 3DSTATE_DEPTH_BUFFER::HIZ Enable 259 */ 260 if (!cmd_buffer->state.hiz_enabled) 261 return false; 262 263 /* We can't possibly know if HiZ is enabled without the depth attachment */ 264 ASSERTED const struct anv_image_view *d_iview = 265 cmd_buffer->state.gfx.depth_att.iview; 266 assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ); 267 268 /* 3DSTATE_PS_EXTRA::PixelShaderValid */ 269 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 270 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) 271 return false; 272 273 /* !(3DSTATE_WM::EDSC_Mode == 2) */ 274 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline); 275 if (wm_prog_data->early_fragment_tests) 276 return false; 277 278 /* We never use anv_pipeline for HiZ ops so this is trivially true: 279 * !(3DSTATE_WM_HZ_OP::DepthBufferClear || 280 * 3DSTATE_WM_HZ_OP::DepthBufferResolve || 281 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable || 282 * 3DSTATE_WM_HZ_OP::StencilBufferClear) 283 */ 284 285 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && 286 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable 287 */ 288 const bool stc_test_en = ds->stencil.test_enable; 289 290 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE && 291 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable && 292 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE) 293 */ 294 const bool stc_write_en = ds->stencil.write_enable; 295 296 /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */ 297 const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil; 298 299 /* COMP_STC_EN || STC_WRITE_EN */ 300 if (!(comp_stc_en || stc_write_en)) 301 return false; 302 303 /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels || 304 * 3DSTATE_WM::ForceKillPix == ON || 305 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget || 306 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable || 307 * 3DSTATE_PS_BLEND::AlphaTestEnable || 308 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) || 309 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF) 310 */ 311 return pipeline->kill_pixel || 312 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF; 313} 314 315void 316genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer) 317{ 318 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline; 319 const struct vk_dynamic_graphics_state *dyn = 320 &cmd_buffer->vk.dynamic_graphics_state; 321 322#if GFX_VER >= 11 323 if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate && 324 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR)) 325 genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr); 326#endif /* GFX_VER >= 11 */ 327 328 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || 329 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) { 330 uint32_t sf_dw[GENX(3DSTATE_SF_length)]; 331 struct GENX(3DSTATE_SF) sf = { 332 GENX(3DSTATE_SF_header), 333 }; 334#if GFX_VER == 8 335 if (cmd_buffer->device->info.platform == INTEL_PLATFORM_CHV) { 336 sf.CHVLineWidth = dyn->rs.line.width; 337 } else { 338 sf.LineWidth = dyn->rs.line.width; 339 } 340#else 341 sf.LineWidth = dyn->rs.line.width, 342#endif 343 GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf); 344 anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf); 345 } 346 347 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || 348 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) || 349 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) || 350 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) || 351 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) || 352 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) { 353 /* Take dynamic primitive topology in to account with 354 * 3DSTATE_RASTER::APIMode 355 * 3DSTATE_RASTER::DXMultisampleRasterizationEnable 356 * 3DSTATE_RASTER::AntialiasingEnable 357 */ 358 uint32_t api_mode = 0; 359 bool msaa_raster_enable = false; 360 361 VkPolygonMode dynamic_raster_mode = 362 genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline, 363 dyn->ia.primitive_topology); 364 365 genX(rasterization_mode)(dynamic_raster_mode, 366 pipeline->line_mode, dyn->rs.line.width, 367 &api_mode, &msaa_raster_enable); 368 369 bool aa_enable = anv_rasterization_aa_mode(dynamic_raster_mode, 370 pipeline->line_mode); 371 372 uint32_t raster_dw[GENX(3DSTATE_RASTER_length)]; 373 struct GENX(3DSTATE_RASTER) raster = { 374 GENX(3DSTATE_RASTER_header), 375 .APIMode = api_mode, 376 .DXMultisampleRasterizationEnable = msaa_raster_enable, 377 .AntialiasingEnable = aa_enable, 378 .CullMode = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode], 379 .FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face], 380 .GlobalDepthOffsetEnableSolid = dyn->rs.depth_bias.enable, 381 .GlobalDepthOffsetEnableWireframe = dyn->rs.depth_bias.enable, 382 .GlobalDepthOffsetEnablePoint = dyn->rs.depth_bias.enable, 383 .GlobalDepthOffsetConstant = dyn->rs.depth_bias.constant, 384 .GlobalDepthOffsetScale = dyn->rs.depth_bias.slope, 385 .GlobalDepthOffsetClamp = dyn->rs.depth_bias.clamp, 386 }; 387 GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster); 388 anv_batch_emit_merge(&cmd_buffer->batch, raster_dw, 389 pipeline->gfx8.raster); 390 } 391 392 /* Stencil reference values moved from COLOR_CALC_STATE in gfx8 to 393 * 3DSTATE_WM_DEPTH_STENCIL in gfx9. That means the dirty bits gets split 394 * across different state packets for gfx8 and gfx9. We handle that by 395 * using a big old #if switch here. 396 */ 397#if GFX_VER == 8 398 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) || 399 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) { 400 struct anv_state cc_state = 401 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 402 GENX(COLOR_CALC_STATE_length) * 4, 403 64); 404 struct GENX(COLOR_CALC_STATE) cc = { 405 .BlendConstantColorRed = dyn->cb.blend_constants[0], 406 .BlendConstantColorGreen = dyn->cb.blend_constants[1], 407 .BlendConstantColorBlue = dyn->cb.blend_constants[2], 408 .BlendConstantColorAlpha = dyn->cb.blend_constants[3], 409 .StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff, 410 .BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff, 411 }; 412 GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); 413 414 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { 415 ccp.ColorCalcStatePointer = cc_state.offset; 416 ccp.ColorCalcStatePointerValid = true; 417 } 418 } 419 420 if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 421 ANV_CMD_DIRTY_RENDER_TARGETS)) || 422 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) || 423 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) || 424 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) || 425 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) || 426 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) || 427 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) || 428 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) { 429 VkImageAspectFlags ds_aspects = 0; 430 if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED) 431 ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; 432 if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED) 433 ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 434 435 struct vk_depth_stencil_state opt_ds = dyn->ds; 436 vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true); 437 438 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) { 439 ds.DoubleSidedStencilEnable = true; 440 441 ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff; 442 ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff; 443 444 ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff; 445 ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff; 446 447 ds.DepthTestEnable = opt_ds.depth.test_enable; 448 ds.DepthBufferWriteEnable = opt_ds.depth.write_enable; 449 ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op]; 450 ds.StencilTestEnable = opt_ds.stencil.test_enable; 451 ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable; 452 ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail]; 453 ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass]; 454 ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail]; 455 ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare]; 456 ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail]; 457 ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass]; 458 ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail]; 459 ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare]; 460 } 461 462 const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds); 463 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma); 464 } 465#else 466 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) { 467 struct anv_state cc_state = 468 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, 469 GENX(COLOR_CALC_STATE_length) * 4, 470 64); 471 struct GENX(COLOR_CALC_STATE) cc = { 472 .BlendConstantColorRed = dyn->cb.blend_constants[0], 473 .BlendConstantColorGreen = dyn->cb.blend_constants[1], 474 .BlendConstantColorBlue = dyn->cb.blend_constants[2], 475 .BlendConstantColorAlpha = dyn->cb.blend_constants[3], 476 }; 477 GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc); 478 479 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) { 480 ccp.ColorCalcStatePointer = cc_state.offset; 481 ccp.ColorCalcStatePointerValid = true; 482 } 483 } 484 485 if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 486 ANV_CMD_DIRTY_RENDER_TARGETS)) || 487 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) || 488 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) || 489 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) || 490 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) || 491 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) || 492 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) || 493 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) || 494 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) { 495 VkImageAspectFlags ds_aspects = 0; 496 if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED) 497 ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT; 498 if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED) 499 ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT; 500 501 struct vk_depth_stencil_state opt_ds = dyn->ds; 502 vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true); 503 504 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) { 505 ds.DoubleSidedStencilEnable = true; 506 507 ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff; 508 ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff; 509 510 ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff; 511 ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff; 512 513 ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff; 514 ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff; 515 516 ds.DepthTestEnable = opt_ds.depth.test_enable; 517 ds.DepthBufferWriteEnable = opt_ds.depth.write_enable; 518 ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op]; 519 ds.StencilTestEnable = opt_ds.stencil.test_enable; 520 ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable; 521 ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail]; 522 ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass]; 523 ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail]; 524 ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare]; 525 ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail]; 526 ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass]; 527 ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail]; 528 ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare]; 529 } 530 531 const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds); 532 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma); 533 } 534#endif 535 536#if GFX_VER >= 12 537 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) || 538 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) { 539 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) { 540 db.DepthBoundsTestEnable = dyn->ds.depth.bounds_test.enable; 541 db.DepthBoundsTestMinValue = dyn->ds.depth.bounds_test.min; 542 db.DepthBoundsTestMaxValue = dyn->ds.depth.bounds_test.max; 543 } 544 } 545#endif 546 547 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) { 548 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) { 549 ls.LineStipplePattern = dyn->rs.line.stipple.pattern; 550 ls.LineStippleInverseRepeatCount = 551 1.0f / MAX2(1, dyn->rs.line.stipple.factor); 552 ls.LineStippleRepeatCount = dyn->rs.line.stipple.factor; 553 } 554 } 555 556 if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE | 557 ANV_CMD_DIRTY_INDEX_BUFFER)) || 558 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) { 559 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) { 560#if GFX_VERx10 >= 125 561 vf.GeometryDistributionEnable = true; 562#endif 563 vf.IndexedDrawCutIndexEnable = dyn->ia.primitive_restart_enable; 564 vf.CutIndex = cmd_buffer->state.gfx.restart_index; 565 } 566 } 567 568 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDEX_BUFFER) { 569 struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer; 570 uint32_t offset = cmd_buffer->state.gfx.index_offset; 571 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) { 572 ib.IndexFormat = cmd_buffer->state.gfx.index_type; 573 ib.MOCS = anv_mocs(cmd_buffer->device, 574 buffer->address.bo, 575 ISL_SURF_USAGE_INDEX_BUFFER_BIT); 576#if GFX_VER >= 12 577 ib.L3BypassDisable = true; 578#endif 579 ib.BufferStartingAddress = anv_address_add(buffer->address, offset); 580 ib.BufferSize = vk_buffer_range(&buffer->vk, offset, 581 VK_WHOLE_SIZE); 582 } 583 } 584 585#if GFX_VERx10 >= 125 586 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || 587 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) { 588 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) { 589 /* If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE*/ 590 vfg.DistributionMode = 591 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_STRICT : 592 RR_FREE; 593 vfg.DistributionGranularity = BatchLevelGranularity; 594 /* Wa_14014890652 */ 595 if (intel_device_info_is_dg2(&cmd_buffer->device->info)) 596 vfg.GranularityThresholdDisable = 1; 597 vfg.ListCutIndexEnable = dyn->ia.primitive_restart_enable; 598 /* 192 vertices for TRILIST_ADJ */ 599 vfg.ListNBatchSizeScale = 0; 600 /* Batch size of 384 vertices */ 601 vfg.List3BatchSizeScale = 2; 602 /* Batch size of 128 vertices */ 603 vfg.List2BatchSizeScale = 1; 604 /* Batch size of 128 vertices */ 605 vfg.List1BatchSizeScale = 2; 606 /* Batch size of 256 vertices for STRIP topologies */ 607 vfg.StripBatchSizeScale = 3; 608 /* 192 control points for PATCHLIST_3 */ 609 vfg.PatchBatchSizeScale = 1; 610 /* 192 control points for PATCHLIST_3 */ 611 vfg.PatchBatchSizeMultiplier = 31; 612 } 613 } 614#endif 615 616 if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations && 617 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS)) 618 genX(emit_sample_pattern)(&cmd_buffer->batch, dyn->ms.sample_locations); 619 620 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || 621 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) { 622 /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders 623 * threads. 624 */ 625 uint32_t wm_dwords[GENX(3DSTATE_WM_length)]; 626 struct GENX(3DSTATE_WM) wm = { 627 GENX(3DSTATE_WM_header), 628 629 .ForceThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) && 630 (pipeline->force_fragment_thread_dispatch || 631 anv_cmd_buffer_all_color_write_masked(cmd_buffer)) ? 632 ForceON : 0, 633 }; 634 GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm); 635 636 anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm); 637 } 638 639 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) || 640 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) || 641 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) { 642 const uint8_t color_writes = dyn->cb.color_write_enables; 643 const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx; 644 bool has_writeable_rt = 645 anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) && 646 (color_writes & ((1u << state->color_att_count) - 1)) != 0; 647 648 /* 3DSTATE_PS_BLEND to be consistent with the rest of the 649 * BLEND_STATE_ENTRY. 650 */ 651 uint32_t ps_blend_dwords[GENX(3DSTATE_PS_BLEND_length)]; 652 struct GENX(3DSTATE_PS_BLEND) ps_blend = { 653 GENX(3DSTATE_PS_BLEND_header), 654 .HasWriteableRT = has_writeable_rt, 655 }; 656 GENX(3DSTATE_PS_BLEND_pack)(NULL, ps_blend_dwords, &ps_blend); 657 anv_batch_emit_merge(&cmd_buffer->batch, ps_blend_dwords, 658 pipeline->gfx8.ps_blend); 659 660 uint32_t blend_dws[GENX(BLEND_STATE_length) + 661 MAX_RTS * GENX(BLEND_STATE_ENTRY_length)]; 662 uint32_t *dws = blend_dws; 663 memset(blend_dws, 0, sizeof(blend_dws)); 664 665 /* Skip this part */ 666 dws += GENX(BLEND_STATE_length); 667 668 for (uint32_t i = 0; i < MAX_RTS; i++) { 669 /* Disable anything above the current number of color attachments. */ 670 bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count || 671 (color_writes & BITFIELD_BIT(i)) == 0; 672 struct GENX(BLEND_STATE_ENTRY) entry = { 673 .WriteDisableAlpha = write_disabled || 674 (pipeline->color_comp_writes[i] & 675 VK_COLOR_COMPONENT_A_BIT) == 0, 676 .WriteDisableRed = write_disabled || 677 (pipeline->color_comp_writes[i] & 678 VK_COLOR_COMPONENT_R_BIT) == 0, 679 .WriteDisableGreen = write_disabled || 680 (pipeline->color_comp_writes[i] & 681 VK_COLOR_COMPONENT_G_BIT) == 0, 682 .WriteDisableBlue = write_disabled || 683 (pipeline->color_comp_writes[i] & 684 VK_COLOR_COMPONENT_B_BIT) == 0, 685 .LogicOpFunction = genX(vk_to_intel_logic_op)[dyn->cb.logic_op], 686 }; 687 GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry); 688 dws += GENX(BLEND_STATE_ENTRY_length); 689 } 690 691 uint32_t num_dwords = GENX(BLEND_STATE_length) + 692 GENX(BLEND_STATE_ENTRY_length) * MAX_RTS; 693 694 struct anv_state blend_states = 695 anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws, 696 pipeline->gfx8.blend_state, num_dwords, 64); 697 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) { 698 bsp.BlendStatePointer = blend_states.offset; 699 bsp.BlendStatePointerValid = true; 700 } 701 } 702 703 /* When we're done, there is no more dirty gfx state. */ 704 vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state); 705 cmd_buffer->state.gfx.dirty = 0; 706} 707