1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include <assert.h>
25#include <stdbool.h>
26#include <string.h>
27#include <unistd.h>
28#include <fcntl.h>
29
30#include "anv_private.h"
31
32#include "genxml/gen_macros.h"
33#include "genxml/genX_pack.h"
34
35void
36genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
37{
38   if (cmd_buffer->state.pma_fix_enabled == enable)
39      return;
40
41   cmd_buffer->state.pma_fix_enabled = enable;
42
43   /* According to the Broadwell PIPE_CONTROL documentation, software should
44    * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
45    * prior to the LRI.  If stencil buffer writes are enabled, then a Render
46    * Cache Flush is also necessary.
47    *
48    * The Skylake docs say to use a depth stall rather than a command
49    * streamer stall.  However, the hardware seems to violently disagree.
50    * A full command streamer stall seems to be needed in both cases.
51    */
52   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
53      pc.DepthCacheFlushEnable = true;
54      pc.CommandStreamerStallEnable = true;
55      pc.RenderTargetCacheFlushEnable = true;
56#if GFX_VER >= 12
57      pc.TileCacheFlushEnable = true;
58
59      /* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must
60       * be set with any PIPE_CONTROL with Depth Flush Enable bit set.
61       */
62      pc.DepthStallEnable = true;
63#endif
64   }
65
66#if GFX_VER == 9
67
68   uint32_t cache_mode;
69   anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
70                   .STCPMAOptimizationEnable = enable,
71                   .STCPMAOptimizationEnableMask = true);
72   anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
73      lri.RegisterOffset   = GENX(CACHE_MODE_0_num);
74      lri.DataDWord        = cache_mode;
75   }
76
77#elif GFX_VER == 8
78
79   uint32_t cache_mode;
80   anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
81                   .NPPMAFixEnable = enable,
82                   .NPEarlyZFailsDisable = enable,
83                   .NPPMAFixEnableMask = true,
84                   .NPEarlyZFailsDisableMask = true);
85   anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
86      lri.RegisterOffset   = GENX(CACHE_MODE_1_num);
87      lri.DataDWord        = cache_mode;
88   }
89
90#endif /* GFX_VER == 8 */
91
92   /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
93    * Flush bits is often necessary.  We do it regardless because it's easier.
94    * The render cache flush is also necessary if stencil writes are enabled.
95    *
96    * Again, the Skylake docs give a different set of flushes but the BDW
97    * flushes seem to work just as well.
98    */
99   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
100      pc.DepthStallEnable = true;
101      pc.DepthCacheFlushEnable = true;
102      pc.RenderTargetCacheFlushEnable = true;
103#if GFX_VER >= 12
104      pc.TileCacheFlushEnable = true;
105#endif
106   }
107}
108
109UNUSED static bool
110want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer,
111                   const struct vk_depth_stencil_state *ds)
112{
113   assert(GFX_VER == 8);
114
115   /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
116    *
117    *    SW must set this bit in order to enable this fix when following
118    *    expression is TRUE.
119    *
120    *    3DSTATE_WM::ForceThreadDispatch != 1 &&
121    *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
122    *    (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
123    *    (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
124    *    !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
125    *    (3DSTATE_PS_EXTRA::PixelShaderValid) &&
126    *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
127    *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
128    *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
129    *      3DSTATE_WM_HZ_OP::StencilBufferClear) &&
130    *    (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
131    *    (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
132    *       3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
133    *       3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
134    *       3DSTATE_PS_BLEND::AlphaTestEnable ||
135    *       3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
136    *      3DSTATE_WM::ForceKillPix != ForceOff &&
137    *      ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
138    *        3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
139    *       (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
140    *        3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
141    *        3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
142    *     (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
143    */
144
145   /* These are always true:
146    *    3DSTATE_WM::ForceThreadDispatch != 1 &&
147    *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
148    */
149
150   /* We only enable the PMA fix if we know for certain that HiZ is enabled.
151    * If we don't know whether HiZ is enabled or not, we disable the PMA fix
152    * and there is no harm.
153    *
154    * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
155    * 3DSTATE_DEPTH_BUFFER::HIZ Enable
156    */
157   if (!cmd_buffer->state.hiz_enabled)
158      return false;
159
160   /* 3DSTATE_PS_EXTRA::PixelShaderValid */
161   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
162   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
163      return false;
164
165   /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
166   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
167   if (wm_prog_data->early_fragment_tests)
168      return false;
169
170   /* We never use anv_pipeline for HiZ ops so this is trivially true:
171    *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
172    *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
173    *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
174    *      3DSTATE_WM_HZ_OP::StencilBufferClear)
175    */
176
177   /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
178   if (!ds->depth.test_enable)
179      return false;
180
181   /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
182    *    3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
183    *    3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
184    *    3DSTATE_PS_BLEND::AlphaTestEnable ||
185    *    3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
186    *   3DSTATE_WM::ForceKillPix != ForceOff &&
187    *   ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
188    *     3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
189    *    (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
190    *     3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
191    *     3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
192    *  (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
193    */
194   return (pipeline->kill_pixel && (ds->depth.write_enable ||
195                                    ds->stencil.write_enable)) ||
196          wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
197}
198
199UNUSED static bool
200want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
201                     const struct vk_depth_stencil_state *ds)
202{
203   if (GFX_VER > 9)
204      return false;
205   assert(GFX_VER == 9);
206
207   /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
208    *
209    *    Clearing this bit will force the STC cache to wait for pending
210    *    retirement of pixels at the HZ-read stage and do the STC-test for
211    *    Non-promoted, R-computed and Computed depth modes instead of
212    *    postponing the STC-test to RCPFE.
213    *
214    *    STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
215    *                  3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
216    *
217    *    STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
218    *                   (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
219    *                    3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
220    *
221    *    COMP_STC_EN = STC_TEST_EN &&
222    *                  3DSTATE_PS_EXTRA::PixelShaderComputesStencil
223    *
224    *    SW parses the pipeline states to generate the following logical
225    *    signal indicating if PMA FIX can be enabled.
226    *
227    *    STC_PMA_OPT =
228    *       3DSTATE_WM::ForceThreadDispatch != 1 &&
229    *       !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
230    *       3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
231    *       3DSTATE_DEPTH_BUFFER::HIZ Enable &&
232    *       !(3DSTATE_WM::EDSC_Mode == 2) &&
233    *       3DSTATE_PS_EXTRA::PixelShaderValid &&
234    *       !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
235    *         3DSTATE_WM_HZ_OP::DepthBufferResolve ||
236    *         3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
237    *         3DSTATE_WM_HZ_OP::StencilBufferClear) &&
238    *       (COMP_STC_EN || STC_WRITE_EN) &&
239    *       ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
240    *         3DSTATE_WM::ForceKillPix == ON ||
241    *         3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
242    *         3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
243    *         3DSTATE_PS_BLEND::AlphaTestEnable ||
244    *         3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
245    *        (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
246    */
247
248   /* These are always true:
249    *    3DSTATE_WM::ForceThreadDispatch != 1 &&
250    *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
251    */
252
253   /* We only enable the PMA fix if we know for certain that HiZ is enabled.
254    * If we don't know whether HiZ is enabled or not, we disable the PMA fix
255    * and there is no harm.
256    *
257    * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
258    * 3DSTATE_DEPTH_BUFFER::HIZ Enable
259    */
260   if (!cmd_buffer->state.hiz_enabled)
261      return false;
262
263   /* We can't possibly know if HiZ is enabled without the depth attachment */
264   ASSERTED const struct anv_image_view *d_iview =
265      cmd_buffer->state.gfx.depth_att.iview;
266   assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
267
268   /* 3DSTATE_PS_EXTRA::PixelShaderValid */
269   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
270   if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
271      return false;
272
273   /* !(3DSTATE_WM::EDSC_Mode == 2) */
274   const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
275   if (wm_prog_data->early_fragment_tests)
276      return false;
277
278   /* We never use anv_pipeline for HiZ ops so this is trivially true:
279   *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
280    *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
281    *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
282    *      3DSTATE_WM_HZ_OP::StencilBufferClear)
283    */
284
285   /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
286    * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
287    */
288   const bool stc_test_en = ds->stencil.test_enable;
289
290   /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
291    * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
292    *  3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
293    */
294   const bool stc_write_en = ds->stencil.write_enable;
295
296   /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
297   const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
298
299   /* COMP_STC_EN || STC_WRITE_EN */
300   if (!(comp_stc_en || stc_write_en))
301      return false;
302
303   /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
304    *  3DSTATE_WM::ForceKillPix == ON ||
305    *  3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
306    *  3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
307    *  3DSTATE_PS_BLEND::AlphaTestEnable ||
308    *  3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
309    * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
310    */
311   return pipeline->kill_pixel ||
312          wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
313}
314
315void
316genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
317{
318   struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
319   const struct vk_dynamic_graphics_state *dyn =
320      &cmd_buffer->vk.dynamic_graphics_state;
321
322#if GFX_VER >= 11
323   if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
324       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
325      genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr);
326#endif /* GFX_VER >= 11 */
327
328   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
329       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) {
330      uint32_t sf_dw[GENX(3DSTATE_SF_length)];
331      struct GENX(3DSTATE_SF) sf = {
332         GENX(3DSTATE_SF_header),
333      };
334#if GFX_VER == 8
335      if (cmd_buffer->device->info.platform == INTEL_PLATFORM_CHV) {
336         sf.CHVLineWidth = dyn->rs.line.width;
337      } else {
338         sf.LineWidth = dyn->rs.line.width;
339      }
340#else
341      sf.LineWidth = dyn->rs.line.width,
342#endif
343      GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
344      anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf);
345   }
346
347   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
348       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
349       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
350       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
351       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
352       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
353      /* Take dynamic primitive topology in to account with
354       *    3DSTATE_RASTER::APIMode
355       *    3DSTATE_RASTER::DXMultisampleRasterizationEnable
356       *    3DSTATE_RASTER::AntialiasingEnable
357       */
358      uint32_t api_mode = 0;
359      bool msaa_raster_enable = false;
360
361      VkPolygonMode dynamic_raster_mode =
362         genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
363                                   dyn->ia.primitive_topology);
364
365      genX(rasterization_mode)(dynamic_raster_mode,
366                               pipeline->line_mode, dyn->rs.line.width,
367                               &api_mode, &msaa_raster_enable);
368
369      bool aa_enable = anv_rasterization_aa_mode(dynamic_raster_mode,
370                                                 pipeline->line_mode);
371
372      uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
373      struct GENX(3DSTATE_RASTER) raster = {
374         GENX(3DSTATE_RASTER_header),
375         .APIMode = api_mode,
376         .DXMultisampleRasterizationEnable = msaa_raster_enable,
377         .AntialiasingEnable = aa_enable,
378         .CullMode     = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
379         .FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
380         .GlobalDepthOffsetEnableSolid       = dyn->rs.depth_bias.enable,
381         .GlobalDepthOffsetEnableWireframe   = dyn->rs.depth_bias.enable,
382         .GlobalDepthOffsetEnablePoint       = dyn->rs.depth_bias.enable,
383         .GlobalDepthOffsetConstant          = dyn->rs.depth_bias.constant,
384         .GlobalDepthOffsetScale             = dyn->rs.depth_bias.slope,
385         .GlobalDepthOffsetClamp             = dyn->rs.depth_bias.clamp,
386      };
387      GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
388      anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
389                           pipeline->gfx8.raster);
390   }
391
392   /* Stencil reference values moved from COLOR_CALC_STATE in gfx8 to
393    * 3DSTATE_WM_DEPTH_STENCIL in gfx9. That means the dirty bits gets split
394    * across different state packets for gfx8 and gfx9. We handle that by
395    * using a big old #if switch here.
396    */
397#if GFX_VER == 8
398   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
399       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
400      struct anv_state cc_state =
401         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
402                                            GENX(COLOR_CALC_STATE_length) * 4,
403                                            64);
404      struct GENX(COLOR_CALC_STATE) cc = {
405         .BlendConstantColorRed = dyn->cb.blend_constants[0],
406         .BlendConstantColorGreen = dyn->cb.blend_constants[1],
407         .BlendConstantColorBlue = dyn->cb.blend_constants[2],
408         .BlendConstantColorAlpha = dyn->cb.blend_constants[3],
409         .StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff,
410         .BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff,
411      };
412      GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
413
414      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
415         ccp.ColorCalcStatePointer        = cc_state.offset;
416         ccp.ColorCalcStatePointerValid   = true;
417      }
418   }
419
420   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
421                                       ANV_CMD_DIRTY_RENDER_TARGETS)) ||
422       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
423       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
424       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
425       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
426       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
427       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
428       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
429      VkImageAspectFlags ds_aspects = 0;
430      if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
431         ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
432      if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
433         ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
434
435      struct vk_depth_stencil_state opt_ds = dyn->ds;
436      vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
437
438      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
439         ds.DoubleSidedStencilEnable = true;
440
441         ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
442         ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
443
444         ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
445         ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
446
447         ds.DepthTestEnable = opt_ds.depth.test_enable;
448         ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
449         ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
450         ds.StencilTestEnable = opt_ds.stencil.test_enable;
451         ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
452         ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
453         ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
454         ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
455         ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
456         ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
457         ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
458         ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
459         ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
460      }
461
462      const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds);
463      genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
464   }
465#else
466   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
467      struct anv_state cc_state =
468         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
469                                            GENX(COLOR_CALC_STATE_length) * 4,
470                                            64);
471      struct GENX(COLOR_CALC_STATE) cc = {
472         .BlendConstantColorRed = dyn->cb.blend_constants[0],
473         .BlendConstantColorGreen = dyn->cb.blend_constants[1],
474         .BlendConstantColorBlue = dyn->cb.blend_constants[2],
475         .BlendConstantColorAlpha = dyn->cb.blend_constants[3],
476      };
477      GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
478
479      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
480         ccp.ColorCalcStatePointer = cc_state.offset;
481         ccp.ColorCalcStatePointerValid = true;
482      }
483   }
484
485   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
486                                       ANV_CMD_DIRTY_RENDER_TARGETS)) ||
487       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
488       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
489       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
490       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
491       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
492       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
493       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
494       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
495      VkImageAspectFlags ds_aspects = 0;
496      if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
497         ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
498      if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
499         ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
500
501      struct vk_depth_stencil_state opt_ds = dyn->ds;
502      vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
503
504      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
505         ds.DoubleSidedStencilEnable = true;
506
507         ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
508         ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
509
510         ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
511         ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
512
513         ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff;
514         ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff;
515
516         ds.DepthTestEnable = opt_ds.depth.test_enable;
517         ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
518         ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
519         ds.StencilTestEnable = opt_ds.stencil.test_enable;
520         ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
521         ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
522         ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
523         ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
524         ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
525         ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
526         ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
527         ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
528         ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
529      }
530
531      const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds);
532      genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
533   }
534#endif
535
536#if GFX_VER >= 12
537   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
538       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) {
539      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
540         db.DepthBoundsTestEnable = dyn->ds.depth.bounds_test.enable;
541         db.DepthBoundsTestMinValue = dyn->ds.depth.bounds_test.min;
542         db.DepthBoundsTestMaxValue = dyn->ds.depth.bounds_test.max;
543      }
544   }
545#endif
546
547   if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
548      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
549         ls.LineStipplePattern = dyn->rs.line.stipple.pattern;
550         ls.LineStippleInverseRepeatCount =
551            1.0f / MAX2(1, dyn->rs.line.stipple.factor);
552         ls.LineStippleRepeatCount = dyn->rs.line.stipple.factor;
553      }
554   }
555
556   if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
557                                       ANV_CMD_DIRTY_INDEX_BUFFER)) ||
558       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
559      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
560#if GFX_VERx10 >= 125
561         vf.GeometryDistributionEnable = true;
562#endif
563         vf.IndexedDrawCutIndexEnable  = dyn->ia.primitive_restart_enable;
564         vf.CutIndex                   = cmd_buffer->state.gfx.restart_index;
565      }
566   }
567
568   if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDEX_BUFFER) {
569      struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer;
570      uint32_t offset = cmd_buffer->state.gfx.index_offset;
571      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
572         ib.IndexFormat           = cmd_buffer->state.gfx.index_type;
573         ib.MOCS                  = anv_mocs(cmd_buffer->device,
574                                             buffer->address.bo,
575                                             ISL_SURF_USAGE_INDEX_BUFFER_BIT);
576#if GFX_VER >= 12
577         ib.L3BypassDisable       = true;
578#endif
579         ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
580         ib.BufferSize            = vk_buffer_range(&buffer->vk, offset,
581                                                    VK_WHOLE_SIZE);
582      }
583   }
584
585#if GFX_VERx10 >= 125
586   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
587       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
588      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
589         /* If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE*/
590         vfg.DistributionMode =
591            anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_STRICT :
592                                                                      RR_FREE;
593         vfg.DistributionGranularity = BatchLevelGranularity;
594         /* Wa_14014890652 */
595         if (intel_device_info_is_dg2(&cmd_buffer->device->info))
596            vfg.GranularityThresholdDisable = 1;
597         vfg.ListCutIndexEnable = dyn->ia.primitive_restart_enable;
598         /* 192 vertices for TRILIST_ADJ */
599         vfg.ListNBatchSizeScale = 0;
600         /* Batch size of 384 vertices */
601         vfg.List3BatchSizeScale = 2;
602         /* Batch size of 128 vertices */
603         vfg.List2BatchSizeScale = 1;
604         /* Batch size of 128 vertices */
605         vfg.List1BatchSizeScale = 2;
606         /* Batch size of 256 vertices for STRIP topologies */
607         vfg.StripBatchSizeScale = 3;
608         /* 192 control points for PATCHLIST_3 */
609         vfg.PatchBatchSizeScale = 1;
610         /* 192 control points for PATCHLIST_3 */
611         vfg.PatchBatchSizeMultiplier = 31;
612      }
613   }
614#endif
615
616   if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
617       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS))
618      genX(emit_sample_pattern)(&cmd_buffer->batch, dyn->ms.sample_locations);
619
620   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
621       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
622      /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
623       * threads.
624       */
625      uint32_t wm_dwords[GENX(3DSTATE_WM_length)];
626      struct GENX(3DSTATE_WM) wm = {
627         GENX(3DSTATE_WM_header),
628
629         .ForceThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
630                                      (pipeline->force_fragment_thread_dispatch ||
631                                       anv_cmd_buffer_all_color_write_masked(cmd_buffer)) ?
632                                      ForceON : 0,
633      };
634      GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm);
635
636      anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm);
637   }
638
639   if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
640       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
641       BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
642      const uint8_t color_writes = dyn->cb.color_write_enables;
643      const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
644      bool has_writeable_rt =
645         anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
646         (color_writes & ((1u << state->color_att_count) - 1)) != 0;
647
648      /* 3DSTATE_PS_BLEND to be consistent with the rest of the
649       * BLEND_STATE_ENTRY.
650       */
651      uint32_t ps_blend_dwords[GENX(3DSTATE_PS_BLEND_length)];
652      struct GENX(3DSTATE_PS_BLEND) ps_blend = {
653         GENX(3DSTATE_PS_BLEND_header),
654         .HasWriteableRT = has_writeable_rt,
655      };
656      GENX(3DSTATE_PS_BLEND_pack)(NULL, ps_blend_dwords, &ps_blend);
657      anv_batch_emit_merge(&cmd_buffer->batch, ps_blend_dwords,
658                           pipeline->gfx8.ps_blend);
659
660      uint32_t blend_dws[GENX(BLEND_STATE_length) +
661                         MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
662      uint32_t *dws = blend_dws;
663      memset(blend_dws, 0, sizeof(blend_dws));
664
665      /* Skip this part */
666      dws += GENX(BLEND_STATE_length);
667
668      for (uint32_t i = 0; i < MAX_RTS; i++) {
669         /* Disable anything above the current number of color attachments. */
670         bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count ||
671                               (color_writes & BITFIELD_BIT(i)) == 0;
672         struct GENX(BLEND_STATE_ENTRY) entry = {
673            .WriteDisableAlpha = write_disabled ||
674                                 (pipeline->color_comp_writes[i] &
675                                  VK_COLOR_COMPONENT_A_BIT) == 0,
676            .WriteDisableRed   = write_disabled ||
677                                 (pipeline->color_comp_writes[i] &
678                                  VK_COLOR_COMPONENT_R_BIT) == 0,
679            .WriteDisableGreen = write_disabled ||
680                                 (pipeline->color_comp_writes[i] &
681                                  VK_COLOR_COMPONENT_G_BIT) == 0,
682            .WriteDisableBlue  = write_disabled ||
683                                 (pipeline->color_comp_writes[i] &
684                                  VK_COLOR_COMPONENT_B_BIT) == 0,
685            .LogicOpFunction   = genX(vk_to_intel_logic_op)[dyn->cb.logic_op],
686         };
687         GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
688         dws += GENX(BLEND_STATE_ENTRY_length);
689      }
690
691      uint32_t num_dwords = GENX(BLEND_STATE_length) +
692         GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
693
694      struct anv_state blend_states =
695         anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
696                                      pipeline->gfx8.blend_state, num_dwords, 64);
697      anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
698         bsp.BlendStatePointer      = blend_states.offset;
699         bsp.BlendStatePointerValid = true;
700      }
701   }
702
703   /* When we're done, there is no more dirty gfx state. */
704   vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
705   cmd_buffer->state.gfx.dirty = 0;
706}
707