1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2012 Advanced Micro Devices, Inc. 3bf215546Sopenharmony_ci * All Rights Reserved. 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 6bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 7bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 8bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 9bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 10bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 11bf215546Sopenharmony_ci * 12bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 13bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 14bf215546Sopenharmony_ci * Software. 15bf215546Sopenharmony_ci * 16bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "si_build_pm4.h" 26bf215546Sopenharmony_ci#include "si_query.h" 27bf215546Sopenharmony_ci#include "si_shader_internal.h" 28bf215546Sopenharmony_ci#include "sid.h" 29bf215546Sopenharmony_ci#include "util/fast_idiv_by_const.h" 30bf215546Sopenharmony_ci#include "util/format/u_format.h" 31bf215546Sopenharmony_ci#include "util/format/u_format_s3tc.h" 32bf215546Sopenharmony_ci#include "util/u_dual_blend.h" 33bf215546Sopenharmony_ci#include "util/u_helpers.h" 34bf215546Sopenharmony_ci#include "util/u_memory.h" 35bf215546Sopenharmony_ci#include "util/u_resource.h" 36bf215546Sopenharmony_ci#include "util/u_upload_mgr.h" 37bf215546Sopenharmony_ci#include "util/u_blend.h" 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci#include "gfx10_format_table.h" 40bf215546Sopenharmony_ci 41bf215546Sopenharmony_cistatic unsigned si_map_swizzle(unsigned swizzle) 42bf215546Sopenharmony_ci{ 43bf215546Sopenharmony_ci switch (swizzle) { 44bf215546Sopenharmony_ci case PIPE_SWIZZLE_Y: 45bf215546Sopenharmony_ci return V_008F0C_SQ_SEL_Y; 46bf215546Sopenharmony_ci case PIPE_SWIZZLE_Z: 47bf215546Sopenharmony_ci return V_008F0C_SQ_SEL_Z; 48bf215546Sopenharmony_ci case PIPE_SWIZZLE_W: 49bf215546Sopenharmony_ci return V_008F0C_SQ_SEL_W; 50bf215546Sopenharmony_ci case PIPE_SWIZZLE_0: 51bf215546Sopenharmony_ci return V_008F0C_SQ_SEL_0; 52bf215546Sopenharmony_ci case PIPE_SWIZZLE_1: 53bf215546Sopenharmony_ci return V_008F0C_SQ_SEL_1; 54bf215546Sopenharmony_ci default: /* PIPE_SWIZZLE_X */ 55bf215546Sopenharmony_ci return V_008F0C_SQ_SEL_X; 56bf215546Sopenharmony_ci } 57bf215546Sopenharmony_ci} 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci/* 12.4 fixed-point */ 60bf215546Sopenharmony_cistatic unsigned si_pack_float_12p4(float x) 61bf215546Sopenharmony_ci{ 62bf215546Sopenharmony_ci return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16; 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci/* 66bf215546Sopenharmony_ci * Inferred framebuffer and blender state. 67bf215546Sopenharmony_ci * 68bf215546Sopenharmony_ci * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 69bf215546Sopenharmony_ci * if there is not enough PS outputs. 70bf215546Sopenharmony_ci */ 71bf215546Sopenharmony_cistatic void si_emit_cb_render_state(struct si_context *sctx) 72bf215546Sopenharmony_ci{ 73bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 74bf215546Sopenharmony_ci struct si_state_blend *blend = sctx->queued.named.blend; 75bf215546Sopenharmony_ci /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, 76bf215546Sopenharmony_ci * but you never know. */ 77bf215546Sopenharmony_ci uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_mask; 78bf215546Sopenharmony_ci unsigned i; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci /* Avoid a hang that happens when dual source blending is enabled 81bf215546Sopenharmony_ci * but there is not enough color outputs. This is undefined behavior, 82bf215546Sopenharmony_ci * so disable color writes completely. 83bf215546Sopenharmony_ci * 84bf215546Sopenharmony_ci * Reproducible with Unigine Heaven 4.0 and drirc missing. 85bf215546Sopenharmony_ci */ 86bf215546Sopenharmony_ci if (blend->dual_src_blend && sctx->shader.ps.cso && 87bf215546Sopenharmony_ci (sctx->shader.ps.cso->info.colors_written & 0x3) != 0x3) 88bf215546Sopenharmony_ci cb_target_mask = 0; 89bf215546Sopenharmony_ci 90bf215546Sopenharmony_ci /* GFX9: Flush DFSM when CB_TARGET_MASK changes. 91bf215546Sopenharmony_ci * I think we don't have to do anything between IBs. 92bf215546Sopenharmony_ci */ 93bf215546Sopenharmony_ci if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask) { 94bf215546Sopenharmony_ci sctx->last_cb_target_mask = cb_target_mask; 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci radeon_begin(cs); 97bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 98bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 99bf215546Sopenharmony_ci radeon_end(); 100bf215546Sopenharmony_ci } 101bf215546Sopenharmony_ci 102bf215546Sopenharmony_ci radeon_begin(cs); 103bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, 104bf215546Sopenharmony_ci cb_target_mask); 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX8) { 107bf215546Sopenharmony_ci /* DCC MSAA workaround. 108bf215546Sopenharmony_ci * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- 109bf215546Sopenharmony_ci * COMBINER_DISABLE, but that would be more complicated. 110bf215546Sopenharmony_ci */ 111bf215546Sopenharmony_ci bool oc_disable = 112bf215546Sopenharmony_ci blend->dcc_msaa_corruption_4bit & cb_target_mask && sctx->framebuffer.nr_samples >= 2; 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 115bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_028424_CB_FDCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, 116bf215546Sopenharmony_ci S_028424_SAMPLE_MASK_TRACKER_DISABLE(oc_disable) | 117bf215546Sopenharmony_ci S_028424_SAMPLE_MASK_TRACKER_WATERMARK(15)); 118bf215546Sopenharmony_ci } else { 119bf215546Sopenharmony_ci radeon_opt_set_context_reg( 120bf215546Sopenharmony_ci sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, 121bf215546Sopenharmony_ci S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(sctx->gfx_level <= GFX9) | 122bf215546Sopenharmony_ci S_028424_OVERWRITE_COMBINER_WATERMARK(sctx->gfx_level >= GFX10 ? 6 : 4) | 123bf215546Sopenharmony_ci S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) | 124bf215546Sopenharmony_ci S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->gfx_level < GFX11 && 125bf215546Sopenharmony_ci sctx->screen->info.has_dcc_constant_encode)); 126bf215546Sopenharmony_ci } 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci /* RB+ register settings. */ 130bf215546Sopenharmony_ci if (sctx->screen->info.rbplus_allowed) { 131bf215546Sopenharmony_ci unsigned spi_shader_col_format = 132bf215546Sopenharmony_ci sctx->shader.ps.cso ? sctx->shader.ps.current->key.ps.part.epilog.spi_shader_col_format 133bf215546Sopenharmony_ci : 0; 134bf215546Sopenharmony_ci unsigned sx_ps_downconvert = 0; 135bf215546Sopenharmony_ci unsigned sx_blend_opt_epsilon = 0; 136bf215546Sopenharmony_ci unsigned sx_blend_opt_control = 0; 137bf215546Sopenharmony_ci 138bf215546Sopenharmony_ci for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 139bf215546Sopenharmony_ci struct si_surface *surf = (struct si_surface *)sctx->framebuffer.state.cbufs[i]; 140bf215546Sopenharmony_ci unsigned format, swap, spi_format, colormask; 141bf215546Sopenharmony_ci bool has_alpha, has_rgb; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci if (!surf) { 144bf215546Sopenharmony_ci /* If the color buffer is not set, the driver sets 32_R 145bf215546Sopenharmony_ci * as the SPI color format, because the hw doesn't allow 146bf215546Sopenharmony_ci * holes between color outputs, so also set this to 147bf215546Sopenharmony_ci * enable RB+. 148bf215546Sopenharmony_ci */ 149bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 150bf215546Sopenharmony_ci continue; 151bf215546Sopenharmony_ci } 152bf215546Sopenharmony_ci 153bf215546Sopenharmony_ci format = sctx->gfx_level >= GFX11 ? G_028C70_FORMAT_GFX11(surf->cb_color_info): 154bf215546Sopenharmony_ci G_028C70_FORMAT_GFX6(surf->cb_color_info); 155bf215546Sopenharmony_ci swap = G_028C70_COMP_SWAP(surf->cb_color_info); 156bf215546Sopenharmony_ci spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 157bf215546Sopenharmony_ci colormask = (cb_target_mask >> (i * 4)) & 0xf; 158bf215546Sopenharmony_ci 159bf215546Sopenharmony_ci /* Set if RGB and A are present. */ 160bf215546Sopenharmony_ci has_alpha = !(sctx->gfx_level >= GFX11 ? G_028C74_FORCE_DST_ALPHA_1_GFX11(surf->cb_color_attrib): 161bf215546Sopenharmony_ci G_028C74_FORCE_DST_ALPHA_1_GFX6(surf->cb_color_attrib)); 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_ci if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || 164bf215546Sopenharmony_ci format == V_028C70_COLOR_32) 165bf215546Sopenharmony_ci has_rgb = !has_alpha; 166bf215546Sopenharmony_ci else 167bf215546Sopenharmony_ci has_rgb = true; 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci /* Check the colormask and export format. */ 170bf215546Sopenharmony_ci if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 171bf215546Sopenharmony_ci has_rgb = false; 172bf215546Sopenharmony_ci if (!(colormask & PIPE_MASK_A)) 173bf215546Sopenharmony_ci has_alpha = false; 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_ZERO) { 176bf215546Sopenharmony_ci has_rgb = false; 177bf215546Sopenharmony_ci has_alpha = false; 178bf215546Sopenharmony_ci } 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci /* Disable value checking for disabled channels. */ 181bf215546Sopenharmony_ci if (!has_rgb) 182bf215546Sopenharmony_ci sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 183bf215546Sopenharmony_ci if (!has_alpha) 184bf215546Sopenharmony_ci sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci /* Enable down-conversion for 32bpp and smaller formats. */ 187bf215546Sopenharmony_ci switch (format) { 188bf215546Sopenharmony_ci case V_028C70_COLOR_8: 189bf215546Sopenharmony_ci case V_028C70_COLOR_8_8: 190bf215546Sopenharmony_ci case V_028C70_COLOR_8_8_8_8: 191bf215546Sopenharmony_ci /* For 1 and 2-channel formats, use the superset thereof. */ 192bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 193bf215546Sopenharmony_ci spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 194bf215546Sopenharmony_ci spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 195bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 196bf215546Sopenharmony_ci sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 197bf215546Sopenharmony_ci } 198bf215546Sopenharmony_ci break; 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci case V_028C70_COLOR_5_6_5: 201bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 202bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 203bf215546Sopenharmony_ci sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci break; 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci case V_028C70_COLOR_1_5_5_5: 208bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 209bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 210bf215546Sopenharmony_ci sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 211bf215546Sopenharmony_ci } 212bf215546Sopenharmony_ci break; 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci case V_028C70_COLOR_4_4_4_4: 215bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 216bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 217bf215546Sopenharmony_ci sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci break; 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci case V_028C70_COLOR_32: 222bf215546Sopenharmony_ci if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R) 223bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 224bf215546Sopenharmony_ci else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR) 225bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 226bf215546Sopenharmony_ci break; 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci case V_028C70_COLOR_16: 229bf215546Sopenharmony_ci case V_028C70_COLOR_16_16: 230bf215546Sopenharmony_ci /* For 1-channel formats, use the superset thereof. */ 231bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 232bf215546Sopenharmony_ci spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 233bf215546Sopenharmony_ci spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 234bf215546Sopenharmony_ci spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 235bf215546Sopenharmony_ci if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV) 236bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 237bf215546Sopenharmony_ci else 238bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 239bf215546Sopenharmony_ci } 240bf215546Sopenharmony_ci break; 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci case V_028C70_COLOR_10_11_11: 243bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) 244bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 245bf215546Sopenharmony_ci break; 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci case V_028C70_COLOR_2_10_10_10: 248bf215546Sopenharmony_ci case V_028C70_COLOR_10_10_10_2: 249bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 250bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 251bf215546Sopenharmony_ci sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 252bf215546Sopenharmony_ci } 253bf215546Sopenharmony_ci break; 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci case V_028C70_COLOR_5_9_9_9: 256bf215546Sopenharmony_ci if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) 257bf215546Sopenharmony_ci sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4); 258bf215546Sopenharmony_ci break; 259bf215546Sopenharmony_ci } 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci /* If there are no color outputs, the first color export is 263bf215546Sopenharmony_ci * always enabled as 32_R, so also set this to enable RB+. 264bf215546Sopenharmony_ci */ 265bf215546Sopenharmony_ci if (!sx_ps_downconvert) 266bf215546Sopenharmony_ci sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R; 267bf215546Sopenharmony_ci 268bf215546Sopenharmony_ci /* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */ 269bf215546Sopenharmony_ci radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT, 270bf215546Sopenharmony_ci sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control); 271bf215546Sopenharmony_ci } 272bf215546Sopenharmony_ci radeon_end_update_context_roll(sctx); 273bf215546Sopenharmony_ci} 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci/* 276bf215546Sopenharmony_ci * Blender functions 277bf215546Sopenharmony_ci */ 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_cistatic uint32_t si_translate_blend_function(int blend_func) 280bf215546Sopenharmony_ci{ 281bf215546Sopenharmony_ci switch (blend_func) { 282bf215546Sopenharmony_ci case PIPE_BLEND_ADD: 283bf215546Sopenharmony_ci return V_028780_COMB_DST_PLUS_SRC; 284bf215546Sopenharmony_ci case PIPE_BLEND_SUBTRACT: 285bf215546Sopenharmony_ci return V_028780_COMB_SRC_MINUS_DST; 286bf215546Sopenharmony_ci case PIPE_BLEND_REVERSE_SUBTRACT: 287bf215546Sopenharmony_ci return V_028780_COMB_DST_MINUS_SRC; 288bf215546Sopenharmony_ci case PIPE_BLEND_MIN: 289bf215546Sopenharmony_ci return V_028780_COMB_MIN_DST_SRC; 290bf215546Sopenharmony_ci case PIPE_BLEND_MAX: 291bf215546Sopenharmony_ci return V_028780_COMB_MAX_DST_SRC; 292bf215546Sopenharmony_ci default: 293bf215546Sopenharmony_ci PRINT_ERR("Unknown blend function %d\n", blend_func); 294bf215546Sopenharmony_ci assert(0); 295bf215546Sopenharmony_ci break; 296bf215546Sopenharmony_ci } 297bf215546Sopenharmony_ci return 0; 298bf215546Sopenharmony_ci} 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_cistatic uint32_t si_translate_blend_factor(enum amd_gfx_level gfx_level, int blend_fact) 301bf215546Sopenharmony_ci{ 302bf215546Sopenharmony_ci switch (blend_fact) { 303bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_ONE: 304bf215546Sopenharmony_ci return V_028780_BLEND_ONE; 305bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_SRC_COLOR: 306bf215546Sopenharmony_ci return V_028780_BLEND_SRC_COLOR; 307bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_SRC_ALPHA: 308bf215546Sopenharmony_ci return V_028780_BLEND_SRC_ALPHA; 309bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_DST_ALPHA: 310bf215546Sopenharmony_ci return V_028780_BLEND_DST_ALPHA; 311bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_DST_COLOR: 312bf215546Sopenharmony_ci return V_028780_BLEND_DST_COLOR; 313bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 314bf215546Sopenharmony_ci return V_028780_BLEND_SRC_ALPHA_SATURATE; 315bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_CONST_COLOR: 316bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11: 317bf215546Sopenharmony_ci V_028780_BLEND_CONSTANT_COLOR_GFX6; 318bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_CONST_ALPHA: 319bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11 : 320bf215546Sopenharmony_ci V_028780_BLEND_CONSTANT_ALPHA_GFX6; 321bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_ZERO: 322bf215546Sopenharmony_ci return V_028780_BLEND_ZERO; 323bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_INV_SRC_COLOR: 324bf215546Sopenharmony_ci return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 325bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 326bf215546Sopenharmony_ci return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 327bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_INV_DST_ALPHA: 328bf215546Sopenharmony_ci return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 329bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_INV_DST_COLOR: 330bf215546Sopenharmony_ci return V_028780_BLEND_ONE_MINUS_DST_COLOR; 331bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_INV_CONST_COLOR: 332bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11: 333bf215546Sopenharmony_ci V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6; 334bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 335bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11: 336bf215546Sopenharmony_ci V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6; 337bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_SRC1_COLOR: 338bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_COLOR_GFX11: 339bf215546Sopenharmony_ci V_028780_BLEND_SRC1_COLOR_GFX6; 340bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_SRC1_ALPHA: 341bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_ALPHA_GFX11: 342bf215546Sopenharmony_ci V_028780_BLEND_SRC1_ALPHA_GFX6; 343bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 344bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11: 345bf215546Sopenharmony_ci V_028780_BLEND_INV_SRC1_COLOR_GFX6; 346bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 347bf215546Sopenharmony_ci return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11: 348bf215546Sopenharmony_ci V_028780_BLEND_INV_SRC1_ALPHA_GFX6; 349bf215546Sopenharmony_ci default: 350bf215546Sopenharmony_ci PRINT_ERR("Bad blend factor %d not supported!\n", blend_fact); 351bf215546Sopenharmony_ci assert(0); 352bf215546Sopenharmony_ci break; 353bf215546Sopenharmony_ci } 354bf215546Sopenharmony_ci return 0; 355bf215546Sopenharmony_ci} 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_cistatic uint32_t si_translate_blend_opt_function(int blend_func) 358bf215546Sopenharmony_ci{ 359bf215546Sopenharmony_ci switch (blend_func) { 360bf215546Sopenharmony_ci case PIPE_BLEND_ADD: 361bf215546Sopenharmony_ci return V_028760_OPT_COMB_ADD; 362bf215546Sopenharmony_ci case PIPE_BLEND_SUBTRACT: 363bf215546Sopenharmony_ci return V_028760_OPT_COMB_SUBTRACT; 364bf215546Sopenharmony_ci case PIPE_BLEND_REVERSE_SUBTRACT: 365bf215546Sopenharmony_ci return V_028760_OPT_COMB_REVSUBTRACT; 366bf215546Sopenharmony_ci case PIPE_BLEND_MIN: 367bf215546Sopenharmony_ci return V_028760_OPT_COMB_MIN; 368bf215546Sopenharmony_ci case PIPE_BLEND_MAX: 369bf215546Sopenharmony_ci return V_028760_OPT_COMB_MAX; 370bf215546Sopenharmony_ci default: 371bf215546Sopenharmony_ci return V_028760_OPT_COMB_BLEND_DISABLED; 372bf215546Sopenharmony_ci } 373bf215546Sopenharmony_ci} 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_cistatic uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 376bf215546Sopenharmony_ci{ 377bf215546Sopenharmony_ci switch (blend_fact) { 378bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_ZERO: 379bf215546Sopenharmony_ci return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 380bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_ONE: 381bf215546Sopenharmony_ci return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 382bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_SRC_COLOR: 383bf215546Sopenharmony_ci return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 384bf215546Sopenharmony_ci : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 385bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_INV_SRC_COLOR: 386bf215546Sopenharmony_ci return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 387bf215546Sopenharmony_ci : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 388bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_SRC_ALPHA: 389bf215546Sopenharmony_ci return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 390bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 391bf215546Sopenharmony_ci return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 392bf215546Sopenharmony_ci case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 393bf215546Sopenharmony_ci return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 394bf215546Sopenharmony_ci : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 395bf215546Sopenharmony_ci default: 396bf215546Sopenharmony_ci return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 397bf215546Sopenharmony_ci } 398bf215546Sopenharmony_ci} 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_cistatic void si_blend_check_commutativity(struct si_screen *sscreen, struct si_state_blend *blend, 401bf215546Sopenharmony_ci enum pipe_blend_func func, enum pipe_blendfactor src, 402bf215546Sopenharmony_ci enum pipe_blendfactor dst, unsigned chanmask) 403bf215546Sopenharmony_ci{ 404bf215546Sopenharmony_ci /* Src factor is allowed when it does not depend on Dst */ 405bf215546Sopenharmony_ci static const uint32_t src_allowed = 406bf215546Sopenharmony_ci (1u << PIPE_BLENDFACTOR_ONE) | (1u << PIPE_BLENDFACTOR_SRC_COLOR) | 407bf215546Sopenharmony_ci (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) | 408bf215546Sopenharmony_ci (1u << PIPE_BLENDFACTOR_CONST_COLOR) | (1u << PIPE_BLENDFACTOR_CONST_ALPHA) | 409bf215546Sopenharmony_ci (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) | 410bf215546Sopenharmony_ci (1u << PIPE_BLENDFACTOR_ZERO) | (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) | 411bf215546Sopenharmony_ci (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) | 412bf215546Sopenharmony_ci (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) | 413bf215546Sopenharmony_ci (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA); 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci if (dst == PIPE_BLENDFACTOR_ONE && (src_allowed & (1u << src)) && 416bf215546Sopenharmony_ci (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN)) 417bf215546Sopenharmony_ci blend->commutative_4bit |= chanmask; 418bf215546Sopenharmony_ci} 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci/** 421bf215546Sopenharmony_ci * Get rid of DST in the blend factors by commuting the operands: 422bf215546Sopenharmony_ci * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 423bf215546Sopenharmony_ci */ 424bf215546Sopenharmony_cistatic void si_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *dst_factor, 425bf215546Sopenharmony_ci unsigned expected_dst, unsigned replacement_src) 426bf215546Sopenharmony_ci{ 427bf215546Sopenharmony_ci if (*src_factor == expected_dst && *dst_factor == PIPE_BLENDFACTOR_ZERO) { 428bf215546Sopenharmony_ci *src_factor = PIPE_BLENDFACTOR_ZERO; 429bf215546Sopenharmony_ci *dst_factor = replacement_src; 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci /* Commuting the operands requires reversing subtractions. */ 432bf215546Sopenharmony_ci if (*func == PIPE_BLEND_SUBTRACT) 433bf215546Sopenharmony_ci *func = PIPE_BLEND_REVERSE_SUBTRACT; 434bf215546Sopenharmony_ci else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 435bf215546Sopenharmony_ci *func = PIPE_BLEND_SUBTRACT; 436bf215546Sopenharmony_ci } 437bf215546Sopenharmony_ci} 438bf215546Sopenharmony_ci 439bf215546Sopenharmony_cistatic void *si_create_blend_state_mode(struct pipe_context *ctx, 440bf215546Sopenharmony_ci const struct pipe_blend_state *state, unsigned mode) 441bf215546Sopenharmony_ci{ 442bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 443bf215546Sopenharmony_ci struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 444bf215546Sopenharmony_ci struct si_pm4_state *pm4 = &blend->pm4; 445bf215546Sopenharmony_ci uint32_t sx_mrt_blend_opt[8] = {0}; 446bf215546Sopenharmony_ci uint32_t color_control = 0; 447bf215546Sopenharmony_ci bool logicop_enable = state->logicop_enable && state->logicop_func != PIPE_LOGICOP_COPY; 448bf215546Sopenharmony_ci 449bf215546Sopenharmony_ci if (!blend) 450bf215546Sopenharmony_ci return NULL; 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci blend->alpha_to_coverage = state->alpha_to_coverage; 453bf215546Sopenharmony_ci blend->alpha_to_one = state->alpha_to_one; 454bf215546Sopenharmony_ci blend->dual_src_blend = util_blend_state_is_dual(state, 0); 455bf215546Sopenharmony_ci blend->logicop_enable = logicop_enable; 456bf215546Sopenharmony_ci blend->allows_noop_optimization = 457bf215546Sopenharmony_ci state->rt[0].rgb_func == PIPE_BLEND_ADD && 458bf215546Sopenharmony_ci state->rt[0].alpha_func == PIPE_BLEND_ADD && 459bf215546Sopenharmony_ci state->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_DST_COLOR && 460bf215546Sopenharmony_ci state->rt[0].alpha_src_factor == PIPE_BLENDFACTOR_DST_COLOR && 461bf215546Sopenharmony_ci state->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ZERO && 462bf215546Sopenharmony_ci state->rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_ZERO && 463bf215546Sopenharmony_ci mode == V_028808_CB_NORMAL; 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci unsigned num_shader_outputs = state->max_rt + 1; /* estimate */ 466bf215546Sopenharmony_ci if (blend->dual_src_blend) 467bf215546Sopenharmony_ci num_shader_outputs = MAX2(num_shader_outputs, 2); 468bf215546Sopenharmony_ci 469bf215546Sopenharmony_ci if (logicop_enable) { 470bf215546Sopenharmony_ci color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 471bf215546Sopenharmony_ci } else { 472bf215546Sopenharmony_ci color_control |= S_028808_ROP3(0xcc); 473bf215546Sopenharmony_ci } 474bf215546Sopenharmony_ci 475bf215546Sopenharmony_ci if (state->alpha_to_coverage && state->alpha_to_coverage_dither) { 476bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 477bf215546Sopenharmony_ci S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 478bf215546Sopenharmony_ci S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) | 479bf215546Sopenharmony_ci S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 480bf215546Sopenharmony_ci S_028B70_OFFSET_ROUND(1)); 481bf215546Sopenharmony_ci } else { 482bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 483bf215546Sopenharmony_ci S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 484bf215546Sopenharmony_ci S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 485bf215546Sopenharmony_ci S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 486bf215546Sopenharmony_ci S_028B70_OFFSET_ROUND(0)); 487bf215546Sopenharmony_ci } 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci if (state->alpha_to_coverage) 490bf215546Sopenharmony_ci blend->need_src_alpha_4bit |= 0xf; 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci blend->cb_target_mask = 0; 493bf215546Sopenharmony_ci blend->cb_target_enabled_4bit = 0; 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci unsigned last_blend_cntl; 496bf215546Sopenharmony_ci 497bf215546Sopenharmony_ci for (int i = 0; i < num_shader_outputs; i++) { 498bf215546Sopenharmony_ci /* state->rt entries > 0 only written if independent blending */ 499bf215546Sopenharmony_ci const int j = state->independent_blend_enable ? i : 0; 500bf215546Sopenharmony_ci 501bf215546Sopenharmony_ci unsigned eqRGB = state->rt[j].rgb_func; 502bf215546Sopenharmony_ci unsigned srcRGB = state->rt[j].rgb_src_factor; 503bf215546Sopenharmony_ci unsigned dstRGB = state->rt[j].rgb_dst_factor; 504bf215546Sopenharmony_ci unsigned eqA = state->rt[j].alpha_func; 505bf215546Sopenharmony_ci unsigned srcA = state->rt[j].alpha_src_factor; 506bf215546Sopenharmony_ci unsigned dstA = state->rt[j].alpha_dst_factor; 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_ci unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 509bf215546Sopenharmony_ci unsigned blend_cntl = 0; 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 512bf215546Sopenharmony_ci S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 513bf215546Sopenharmony_ci 514bf215546Sopenharmony_ci /* Only set dual source blending for MRT0 to avoid a hang. */ 515bf215546Sopenharmony_ci if (i >= 1 && blend->dual_src_blend) { 516bf215546Sopenharmony_ci if (i == 1) { 517bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) 518bf215546Sopenharmony_ci blend_cntl = last_blend_cntl; 519bf215546Sopenharmony_ci else 520bf215546Sopenharmony_ci blend_cntl = S_028780_ENABLE(1); 521bf215546Sopenharmony_ci } 522bf215546Sopenharmony_ci 523bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 524bf215546Sopenharmony_ci continue; 525bf215546Sopenharmony_ci } 526bf215546Sopenharmony_ci 527bf215546Sopenharmony_ci /* Only addition and subtraction equations are supported with 528bf215546Sopenharmony_ci * dual source blending. 529bf215546Sopenharmony_ci */ 530bf215546Sopenharmony_ci if (blend->dual_src_blend && (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 531bf215546Sopenharmony_ci eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 532bf215546Sopenharmony_ci assert(!"Unsupported equation for dual source blending"); 533bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 534bf215546Sopenharmony_ci continue; 535bf215546Sopenharmony_ci } 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci /* cb_render_state will disable unused ones */ 538bf215546Sopenharmony_ci blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 539bf215546Sopenharmony_ci if (state->rt[j].colormask) 540bf215546Sopenharmony_ci blend->cb_target_enabled_4bit |= 0xf << (4 * i); 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci if (!state->rt[j].colormask || !state->rt[j].blend_enable) { 543bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 544bf215546Sopenharmony_ci continue; 545bf215546Sopenharmony_ci } 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci si_blend_check_commutativity(sctx->screen, blend, eqRGB, srcRGB, dstRGB, 0x7 << (4 * i)); 548bf215546Sopenharmony_ci si_blend_check_commutativity(sctx->screen, blend, eqA, srcA, dstA, 0x8 << (4 * i)); 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci /* Blending optimizations for RB+. 551bf215546Sopenharmony_ci * These transformations don't change the behavior. 552bf215546Sopenharmony_ci * 553bf215546Sopenharmony_ci * First, get rid of DST in the blend factors: 554bf215546Sopenharmony_ci * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 555bf215546Sopenharmony_ci */ 556bf215546Sopenharmony_ci si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, PIPE_BLENDFACTOR_DST_COLOR, 557bf215546Sopenharmony_ci PIPE_BLENDFACTOR_SRC_COLOR); 558bf215546Sopenharmony_ci si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_COLOR, 559bf215546Sopenharmony_ci PIPE_BLENDFACTOR_SRC_COLOR); 560bf215546Sopenharmony_ci si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_ALPHA, 561bf215546Sopenharmony_ci PIPE_BLENDFACTOR_SRC_ALPHA); 562bf215546Sopenharmony_ci 563bf215546Sopenharmony_ci /* Look up the ideal settings from tables. */ 564bf215546Sopenharmony_ci srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 565bf215546Sopenharmony_ci dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 566bf215546Sopenharmony_ci srcA_opt = si_translate_blend_opt_factor(srcA, true); 567bf215546Sopenharmony_ci dstA_opt = si_translate_blend_opt_factor(dstA, true); 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci /* Handle interdependencies. */ 570bf215546Sopenharmony_ci if (util_blend_factor_uses_dest(srcRGB, false)) 571bf215546Sopenharmony_ci dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 572bf215546Sopenharmony_ci if (util_blend_factor_uses_dest(srcA, false)) 573bf215546Sopenharmony_ci dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 574bf215546Sopenharmony_ci 575bf215546Sopenharmony_ci if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 576bf215546Sopenharmony_ci (dstRGB == PIPE_BLENDFACTOR_ZERO || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 577bf215546Sopenharmony_ci dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 578bf215546Sopenharmony_ci dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 579bf215546Sopenharmony_ci 580bf215546Sopenharmony_ci /* Set the final value. */ 581bf215546Sopenharmony_ci sx_mrt_blend_opt[i] = S_028760_COLOR_SRC_OPT(srcRGB_opt) | 582bf215546Sopenharmony_ci S_028760_COLOR_DST_OPT(dstRGB_opt) | 583bf215546Sopenharmony_ci S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 584bf215546Sopenharmony_ci S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) | 585bf215546Sopenharmony_ci S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci /* Set blend state. */ 588bf215546Sopenharmony_ci blend_cntl |= S_028780_ENABLE(1); 589bf215546Sopenharmony_ci blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 590bf215546Sopenharmony_ci blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(sctx->gfx_level, srcRGB)); 591bf215546Sopenharmony_ci blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(sctx->gfx_level, dstRGB)); 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 594bf215546Sopenharmony_ci blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 595bf215546Sopenharmony_ci blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 596bf215546Sopenharmony_ci blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(sctx->gfx_level, srcA)); 597bf215546Sopenharmony_ci blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(sctx->gfx_level, dstA)); 598bf215546Sopenharmony_ci } 599bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 600bf215546Sopenharmony_ci last_blend_cntl = blend_cntl; 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_ci blend->blend_enable_4bit |= 0xfu << (i * 4); 603bf215546Sopenharmony_ci 604bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX8 && sctx->gfx_level <= GFX10) 605bf215546Sopenharmony_ci blend->dcc_msaa_corruption_4bit |= 0xfu << (i * 4); 606bf215546Sopenharmony_ci 607bf215546Sopenharmony_ci /* This is only important for formats without alpha. */ 608bf215546Sopenharmony_ci if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 609bf215546Sopenharmony_ci srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 610bf215546Sopenharmony_ci dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 611bf215546Sopenharmony_ci srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 612bf215546Sopenharmony_ci blend->need_src_alpha_4bit |= 0xfu << (i * 4); 613bf215546Sopenharmony_ci } 614bf215546Sopenharmony_ci 615bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX8 && sctx->gfx_level <= GFX10 && logicop_enable) 616bf215546Sopenharmony_ci blend->dcc_msaa_corruption_4bit |= blend->cb_target_enabled_4bit; 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci if (blend->cb_target_mask) { 619bf215546Sopenharmony_ci color_control |= S_028808_MODE(mode); 620bf215546Sopenharmony_ci } else { 621bf215546Sopenharmony_ci color_control |= S_028808_MODE(V_028808_CB_DISABLE); 622bf215546Sopenharmony_ci } 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci if (sctx->screen->info.rbplus_allowed) { 625bf215546Sopenharmony_ci /* Disable RB+ blend optimizations for dual source blending. 626bf215546Sopenharmony_ci * Vulkan does this. 627bf215546Sopenharmony_ci */ 628bf215546Sopenharmony_ci if (blend->dual_src_blend) { 629bf215546Sopenharmony_ci for (int i = 0; i < num_shader_outputs; i++) { 630bf215546Sopenharmony_ci sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 631bf215546Sopenharmony_ci S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 632bf215546Sopenharmony_ci } 633bf215546Sopenharmony_ci } 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci for (int i = 0; i < num_shader_outputs; i++) 636bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]); 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 639bf215546Sopenharmony_ci if (blend->dual_src_blend || logicop_enable || mode == V_028808_CB_RESOLVE) 640bf215546Sopenharmony_ci color_control |= S_028808_DISABLE_DUAL_QUAD(1); 641bf215546Sopenharmony_ci } 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 644bf215546Sopenharmony_ci return blend; 645bf215546Sopenharmony_ci} 646bf215546Sopenharmony_ci 647bf215546Sopenharmony_cistatic void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) 648bf215546Sopenharmony_ci{ 649bf215546Sopenharmony_ci return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 650bf215546Sopenharmony_ci} 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_cistatic bool si_check_blend_dst_sampler_noop(struct si_context *sctx) 653bf215546Sopenharmony_ci{ 654bf215546Sopenharmony_ci if (sctx->framebuffer.state.nr_cbufs == 1) { 655bf215546Sopenharmony_ci struct si_shader_selector *sel = sctx->shader.ps.cso; 656bf215546Sopenharmony_ci 657bf215546Sopenharmony_ci if (unlikely(sel->info.writes_1_if_tex_is_1 == 0xff)) { 658bf215546Sopenharmony_ci /* Wait for the shader to be ready. */ 659bf215546Sopenharmony_ci util_queue_fence_wait(&sel->ready); 660bf215546Sopenharmony_ci assert(sel->nir_binary); 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci struct nir_shader *nir = si_deserialize_shader(sel); 663bf215546Sopenharmony_ci 664bf215546Sopenharmony_ci /* Determine if this fragment shader always writes vec4(1) if a specific texture 665bf215546Sopenharmony_ci * is all 1s. 666bf215546Sopenharmony_ci */ 667bf215546Sopenharmony_ci float in[4] = { 1.0, 1.0, 1.0, 1.0 }; 668bf215546Sopenharmony_ci float out[4]; 669bf215546Sopenharmony_ci int texunit; 670bf215546Sopenharmony_ci if (si_nir_is_output_const_if_tex_is_const(nir, in, out, &texunit) && 671bf215546Sopenharmony_ci !memcmp(in, out, 4 * sizeof(float))) { 672bf215546Sopenharmony_ci sel->info.writes_1_if_tex_is_1 = 1 + texunit; 673bf215546Sopenharmony_ci } else { 674bf215546Sopenharmony_ci sel->info.writes_1_if_tex_is_1 = 0; 675bf215546Sopenharmony_ci } 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci ralloc_free(nir); 678bf215546Sopenharmony_ci } 679bf215546Sopenharmony_ci 680bf215546Sopenharmony_ci if (sel->info.writes_1_if_tex_is_1 && 681bf215546Sopenharmony_ci sel->info.writes_1_if_tex_is_1 != 0xff) { 682bf215546Sopenharmony_ci /* Now check if the texture is cleared to 1 */ 683bf215546Sopenharmony_ci int unit = sctx->shader.ps.cso->info.writes_1_if_tex_is_1 - 1; 684bf215546Sopenharmony_ci struct si_samplers *samp = &sctx->samplers[PIPE_SHADER_FRAGMENT]; 685bf215546Sopenharmony_ci if ((1u << unit) & samp->enabled_mask) { 686bf215546Sopenharmony_ci struct si_texture* tex = (struct si_texture*) samp->views[unit]->texture; 687bf215546Sopenharmony_ci if (tex->is_depth && 688bf215546Sopenharmony_ci tex->depth_cleared_level_mask & BITFIELD_BIT(samp->views[unit]->u.tex.first_level) && 689bf215546Sopenharmony_ci tex->depth_clear_value[0] == 1) { 690bf215546Sopenharmony_ci return false; 691bf215546Sopenharmony_ci } 692bf215546Sopenharmony_ci /* TODO: handle color textures */ 693bf215546Sopenharmony_ci } 694bf215546Sopenharmony_ci } 695bf215546Sopenharmony_ci } 696bf215546Sopenharmony_ci 697bf215546Sopenharmony_ci return true; 698bf215546Sopenharmony_ci} 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_cistatic void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx, 701bf215546Sopenharmony_ci const struct pipe_draw_info *info, 702bf215546Sopenharmony_ci unsigned drawid_offset, 703bf215546Sopenharmony_ci const struct pipe_draw_indirect_info *indirect, 704bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draws, 705bf215546Sopenharmony_ci unsigned num_draws) { 706bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_ci if (!si_check_blend_dst_sampler_noop(sctx)) 709bf215546Sopenharmony_ci return; 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws); 712bf215546Sopenharmony_ci} 713bf215546Sopenharmony_ci 714bf215546Sopenharmony_cistatic void si_draw_vstate_blend_dst_sampler_noop(struct pipe_context *ctx, 715bf215546Sopenharmony_ci struct pipe_vertex_state *state, 716bf215546Sopenharmony_ci uint32_t partial_velem_mask, 717bf215546Sopenharmony_ci struct pipe_draw_vertex_state_info info, 718bf215546Sopenharmony_ci const struct pipe_draw_start_count_bias *draws, 719bf215546Sopenharmony_ci unsigned num_draws) { 720bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci if (!si_check_blend_dst_sampler_noop(sctx)) 723bf215546Sopenharmony_ci return; 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci sctx->real_draw_vertex_state(ctx, state, partial_velem_mask, info, draws, num_draws); 726bf215546Sopenharmony_ci} 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_cistatic void si_bind_blend_state(struct pipe_context *ctx, void *state) 729bf215546Sopenharmony_ci{ 730bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 731bf215546Sopenharmony_ci struct si_state_blend *old_blend = sctx->queued.named.blend; 732bf215546Sopenharmony_ci struct si_state_blend *blend = (struct si_state_blend *)state; 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ci if (!blend) 735bf215546Sopenharmony_ci blend = (struct si_state_blend *)sctx->noop_blend; 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci si_pm4_bind_state(sctx, blend, blend); 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci if (old_blend->cb_target_mask != blend->cb_target_mask || 740bf215546Sopenharmony_ci old_blend->dual_src_blend != blend->dual_src_blend || 741bf215546Sopenharmony_ci (old_blend->dcc_msaa_corruption_4bit != blend->dcc_msaa_corruption_4bit && 742bf215546Sopenharmony_ci sctx->framebuffer.has_dcc_msaa)) 743bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 744bf215546Sopenharmony_ci 745bf215546Sopenharmony_ci if (sctx->screen->info.has_export_conflict_bug && 746bf215546Sopenharmony_ci old_blend->blend_enable_4bit != blend->blend_enable_4bit) 747bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci if (old_blend->cb_target_mask != blend->cb_target_mask || 750bf215546Sopenharmony_ci old_blend->alpha_to_coverage != blend->alpha_to_coverage || 751bf215546Sopenharmony_ci old_blend->alpha_to_one != blend->alpha_to_one || 752bf215546Sopenharmony_ci old_blend->dual_src_blend != blend->dual_src_blend || 753bf215546Sopenharmony_ci old_blend->blend_enable_4bit != blend->blend_enable_4bit || 754bf215546Sopenharmony_ci old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit) { 755bf215546Sopenharmony_ci si_ps_key_update_framebuffer_blend(sctx); 756bf215546Sopenharmony_ci si_ps_key_update_blend_rasterizer(sctx); 757bf215546Sopenharmony_ci si_update_ps_inputs_read_or_disabled(sctx); 758bf215546Sopenharmony_ci sctx->do_update_shaders = true; 759bf215546Sopenharmony_ci } 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci if (sctx->screen->dpbb_allowed && 762bf215546Sopenharmony_ci (old_blend->alpha_to_coverage != blend->alpha_to_coverage || 763bf215546Sopenharmony_ci old_blend->blend_enable_4bit != blend->blend_enable_4bit || 764bf215546Sopenharmony_ci old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit)) 765bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci if (sctx->screen->has_out_of_order_rast && 768bf215546Sopenharmony_ci ((old_blend->blend_enable_4bit != blend->blend_enable_4bit || 769bf215546Sopenharmony_ci old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit || 770bf215546Sopenharmony_ci old_blend->commutative_4bit != blend->commutative_4bit || 771bf215546Sopenharmony_ci old_blend->logicop_enable != blend->logicop_enable))) 772bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_ci if (likely(!radeon_uses_secure_bos(sctx->ws))) { 775bf215546Sopenharmony_ci if (unlikely(blend->allows_noop_optimization)) { 776bf215546Sopenharmony_ci si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop, 777bf215546Sopenharmony_ci si_draw_vstate_blend_dst_sampler_noop); 778bf215546Sopenharmony_ci } else { 779bf215546Sopenharmony_ci si_install_draw_wrapper(sctx, NULL, NULL); 780bf215546Sopenharmony_ci } 781bf215546Sopenharmony_ci } 782bf215546Sopenharmony_ci} 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_cistatic void si_delete_blend_state(struct pipe_context *ctx, void *state) 785bf215546Sopenharmony_ci{ 786bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci if (sctx->queued.named.blend == state) 789bf215546Sopenharmony_ci si_bind_blend_state(ctx, sctx->noop_blend); 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(blend)); 792bf215546Sopenharmony_ci} 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_cistatic void si_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) 795bf215546Sopenharmony_ci{ 796bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 797bf215546Sopenharmony_ci static const struct pipe_blend_color zeros; 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_ci sctx->blend_color = *state; 800bf215546Sopenharmony_ci sctx->blend_color_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 801bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color); 802bf215546Sopenharmony_ci} 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_cistatic void si_emit_blend_color(struct si_context *sctx) 805bf215546Sopenharmony_ci{ 806bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci radeon_begin(cs); 809bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028414_CB_BLEND_RED, 4); 810bf215546Sopenharmony_ci radeon_emit_array((uint32_t *)sctx->blend_color.color, 4); 811bf215546Sopenharmony_ci radeon_end(); 812bf215546Sopenharmony_ci} 813bf215546Sopenharmony_ci 814bf215546Sopenharmony_ci/* 815bf215546Sopenharmony_ci * Clipping 816bf215546Sopenharmony_ci */ 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_cistatic void si_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) 819bf215546Sopenharmony_ci{ 820bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 821bf215546Sopenharmony_ci struct pipe_constant_buffer cb; 822bf215546Sopenharmony_ci static const struct pipe_clip_state zeros; 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci if (memcmp(&sctx->clip_state, state, sizeof(*state)) == 0) 825bf215546Sopenharmony_ci return; 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci sctx->clip_state = *state; 828bf215546Sopenharmony_ci sctx->clip_state_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 829bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state); 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_ci cb.buffer = NULL; 832bf215546Sopenharmony_ci cb.user_buffer = state->ucp; 833bf215546Sopenharmony_ci cb.buffer_offset = 0; 834bf215546Sopenharmony_ci cb.buffer_size = 4 * 4 * 8; 835bf215546Sopenharmony_ci si_set_internal_const_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 836bf215546Sopenharmony_ci} 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_cistatic void si_emit_clip_state(struct si_context *sctx) 839bf215546Sopenharmony_ci{ 840bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci radeon_begin(cs); 843bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_0285BC_PA_CL_UCP_0_X, 6 * 4); 844bf215546Sopenharmony_ci radeon_emit_array((uint32_t *)sctx->clip_state.ucp, 6 * 4); 845bf215546Sopenharmony_ci radeon_end(); 846bf215546Sopenharmony_ci} 847bf215546Sopenharmony_ci 848bf215546Sopenharmony_cistatic void si_emit_clip_regs(struct si_context *sctx) 849bf215546Sopenharmony_ci{ 850bf215546Sopenharmony_ci struct si_shader *vs = si_get_vs(sctx)->current; 851bf215546Sopenharmony_ci struct si_shader_selector *vs_sel = vs->selector; 852bf215546Sopenharmony_ci struct si_shader_info *info = &vs_sel->info; 853bf215546Sopenharmony_ci struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 854bf215546Sopenharmony_ci bool window_space = vs_sel->stage == MESA_SHADER_VERTEX ? 855bf215546Sopenharmony_ci info->base.vs.window_space_position : 0; 856bf215546Sopenharmony_ci unsigned clipdist_mask = vs_sel->info.clipdist_mask; 857bf215546Sopenharmony_ci unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SI_USER_CLIP_PLANE_MASK; 858bf215546Sopenharmony_ci unsigned culldist_mask = vs_sel->info.culldist_mask; 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci /* Clip distances on points have no effect, so need to be implemented 861bf215546Sopenharmony_ci * as cull distances. This applies for the clipvertex case as well. 862bf215546Sopenharmony_ci * 863bf215546Sopenharmony_ci * Setting this for primitives other than points should have no adverse 864bf215546Sopenharmony_ci * effects. 865bf215546Sopenharmony_ci */ 866bf215546Sopenharmony_ci clipdist_mask &= rs->clip_plane_enable; 867bf215546Sopenharmony_ci culldist_mask |= clipdist_mask; 868bf215546Sopenharmony_ci 869bf215546Sopenharmony_ci unsigned pa_cl_cntl = S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->gfx_level >= GFX10_3 && 870bf215546Sopenharmony_ci !sctx->screen->options.vrs2x2) | 871bf215546Sopenharmony_ci S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->gfx_level >= GFX10_3) | 872bf215546Sopenharmony_ci clipdist_mask | (culldist_mask << 8); 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_ci radeon_begin(&sctx->gfx_cs); 875bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL, 876bf215546Sopenharmony_ci pa_cl_cntl | vs->pa_cl_vs_out_cntl); 877bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, 878bf215546Sopenharmony_ci rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space)); 879bf215546Sopenharmony_ci radeon_end_update_context_roll(sctx); 880bf215546Sopenharmony_ci} 881bf215546Sopenharmony_ci 882bf215546Sopenharmony_ci/* 883bf215546Sopenharmony_ci * inferred state between framebuffer and rasterizer 884bf215546Sopenharmony_ci */ 885bf215546Sopenharmony_cistatic void si_update_poly_offset_state(struct si_context *sctx) 886bf215546Sopenharmony_ci{ 887bf215546Sopenharmony_ci struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci if (!rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) { 890bf215546Sopenharmony_ci si_pm4_bind_state(sctx, poly_offset, NULL); 891bf215546Sopenharmony_ci return; 892bf215546Sopenharmony_ci } 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_ci /* Use the user format, not db_render_format, so that the polygon 895bf215546Sopenharmony_ci * offset behaves as expected by applications. 896bf215546Sopenharmony_ci */ 897bf215546Sopenharmony_ci switch (sctx->framebuffer.state.zsbuf->texture->format) { 898bf215546Sopenharmony_ci case PIPE_FORMAT_Z16_UNORM: 899bf215546Sopenharmony_ci si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 900bf215546Sopenharmony_ci break; 901bf215546Sopenharmony_ci default: /* 24-bit */ 902bf215546Sopenharmony_ci si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 903bf215546Sopenharmony_ci break; 904bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT: 905bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 906bf215546Sopenharmony_ci si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 907bf215546Sopenharmony_ci break; 908bf215546Sopenharmony_ci } 909bf215546Sopenharmony_ci} 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci/* 912bf215546Sopenharmony_ci * Rasterizer 913bf215546Sopenharmony_ci */ 914bf215546Sopenharmony_ci 915bf215546Sopenharmony_cistatic uint32_t si_translate_fill(uint32_t func) 916bf215546Sopenharmony_ci{ 917bf215546Sopenharmony_ci switch (func) { 918bf215546Sopenharmony_ci case PIPE_POLYGON_MODE_FILL: 919bf215546Sopenharmony_ci return V_028814_X_DRAW_TRIANGLES; 920bf215546Sopenharmony_ci case PIPE_POLYGON_MODE_LINE: 921bf215546Sopenharmony_ci return V_028814_X_DRAW_LINES; 922bf215546Sopenharmony_ci case PIPE_POLYGON_MODE_POINT: 923bf215546Sopenharmony_ci return V_028814_X_DRAW_POINTS; 924bf215546Sopenharmony_ci default: 925bf215546Sopenharmony_ci assert(0); 926bf215546Sopenharmony_ci return V_028814_X_DRAW_POINTS; 927bf215546Sopenharmony_ci } 928bf215546Sopenharmony_ci} 929bf215546Sopenharmony_ci 930bf215546Sopenharmony_cistatic void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state) 931bf215546Sopenharmony_ci{ 932bf215546Sopenharmony_ci struct si_screen *sscreen = ((struct si_context *)ctx)->screen; 933bf215546Sopenharmony_ci struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 934bf215546Sopenharmony_ci struct si_pm4_state *pm4 = &rs->pm4; 935bf215546Sopenharmony_ci unsigned tmp, i; 936bf215546Sopenharmony_ci float psize_min, psize_max; 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_ci if (!rs) { 939bf215546Sopenharmony_ci return NULL; 940bf215546Sopenharmony_ci } 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci rs->scissor_enable = state->scissor; 943bf215546Sopenharmony_ci rs->clip_halfz = state->clip_halfz; 944bf215546Sopenharmony_ci rs->two_side = state->light_twoside; 945bf215546Sopenharmony_ci rs->multisample_enable = state->multisample; 946bf215546Sopenharmony_ci rs->force_persample_interp = state->force_persample_interp; 947bf215546Sopenharmony_ci rs->clip_plane_enable = state->clip_plane_enable; 948bf215546Sopenharmony_ci rs->half_pixel_center = state->half_pixel_center; 949bf215546Sopenharmony_ci rs->line_stipple_enable = state->line_stipple_enable; 950bf215546Sopenharmony_ci rs->poly_stipple_enable = state->poly_stipple_enable; 951bf215546Sopenharmony_ci rs->line_smooth = state->line_smooth; 952bf215546Sopenharmony_ci rs->line_width = state->line_width; 953bf215546Sopenharmony_ci rs->poly_smooth = state->poly_smooth; 954bf215546Sopenharmony_ci rs->point_smooth = state->point_smooth; 955bf215546Sopenharmony_ci rs->uses_poly_offset = state->offset_point || state->offset_line || state->offset_tri; 956bf215546Sopenharmony_ci rs->clamp_fragment_color = state->clamp_fragment_color; 957bf215546Sopenharmony_ci rs->clamp_vertex_color = state->clamp_vertex_color; 958bf215546Sopenharmony_ci rs->flatshade = state->flatshade; 959bf215546Sopenharmony_ci rs->flatshade_first = state->flatshade_first; 960bf215546Sopenharmony_ci rs->sprite_coord_enable = state->sprite_coord_enable; 961bf215546Sopenharmony_ci rs->rasterizer_discard = state->rasterizer_discard; 962bf215546Sopenharmony_ci rs->polygon_mode_is_lines = 963bf215546Sopenharmony_ci (state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) || 964bf215546Sopenharmony_ci (state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK)); 965bf215546Sopenharmony_ci rs->polygon_mode_is_points = 966bf215546Sopenharmony_ci (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) || 967bf215546Sopenharmony_ci (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK)); 968bf215546Sopenharmony_ci rs->pa_sc_line_stipple = state->line_stipple_enable 969bf215546Sopenharmony_ci ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 970bf215546Sopenharmony_ci S_028A0C_REPEAT_COUNT(state->line_stipple_factor) 971bf215546Sopenharmony_ci : 0; 972bf215546Sopenharmony_ci /* TODO: implement line stippling with perpendicular end caps. */ 973bf215546Sopenharmony_ci /* Line width > 2 is an internal recommendation. */ 974bf215546Sopenharmony_ci rs->perpendicular_end_caps = state->multisample && 975bf215546Sopenharmony_ci state->line_width > 2 && !state->line_stipple_enable; 976bf215546Sopenharmony_ci 977bf215546Sopenharmony_ci rs->pa_cl_clip_cntl = S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 978bf215546Sopenharmony_ci S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) | 979bf215546Sopenharmony_ci S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) | 980bf215546Sopenharmony_ci S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 981bf215546Sopenharmony_ci S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 982bf215546Sopenharmony_ci 983bf215546Sopenharmony_ci rs->ngg_cull_flags_tris = SI_NGG_CULL_TRIANGLES | 984bf215546Sopenharmony_ci SI_NGG_CULL_CLIP_PLANE_ENABLE(state->clip_plane_enable); 985bf215546Sopenharmony_ci rs->ngg_cull_flags_tris_y_inverted = rs->ngg_cull_flags_tris; 986bf215546Sopenharmony_ci 987bf215546Sopenharmony_ci rs->ngg_cull_flags_lines = SI_NGG_CULL_LINES | 988bf215546Sopenharmony_ci (!rs->perpendicular_end_caps ? SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT : 0) | 989bf215546Sopenharmony_ci SI_NGG_CULL_CLIP_PLANE_ENABLE(state->clip_plane_enable); 990bf215546Sopenharmony_ci 991bf215546Sopenharmony_ci if (rs->rasterizer_discard) { 992bf215546Sopenharmony_ci rs->ngg_cull_flags_tris |= SI_NGG_CULL_FRONT_FACE | 993bf215546Sopenharmony_ci SI_NGG_CULL_BACK_FACE; 994bf215546Sopenharmony_ci rs->ngg_cull_flags_tris_y_inverted = rs->ngg_cull_flags_tris; 995bf215546Sopenharmony_ci } else { 996bf215546Sopenharmony_ci bool cull_front, cull_back; 997bf215546Sopenharmony_ci 998bf215546Sopenharmony_ci if (!state->front_ccw) { 999bf215546Sopenharmony_ci cull_front = !!(state->cull_face & PIPE_FACE_FRONT); 1000bf215546Sopenharmony_ci cull_back = !!(state->cull_face & PIPE_FACE_BACK); 1001bf215546Sopenharmony_ci } else { 1002bf215546Sopenharmony_ci cull_back = !!(state->cull_face & PIPE_FACE_FRONT); 1003bf215546Sopenharmony_ci cull_front = !!(state->cull_face & PIPE_FACE_BACK); 1004bf215546Sopenharmony_ci } 1005bf215546Sopenharmony_ci 1006bf215546Sopenharmony_ci if (cull_front) { 1007bf215546Sopenharmony_ci rs->ngg_cull_flags_tris |= SI_NGG_CULL_FRONT_FACE; 1008bf215546Sopenharmony_ci rs->ngg_cull_flags_tris_y_inverted |= SI_NGG_CULL_BACK_FACE; 1009bf215546Sopenharmony_ci } 1010bf215546Sopenharmony_ci 1011bf215546Sopenharmony_ci if (cull_back) { 1012bf215546Sopenharmony_ci rs->ngg_cull_flags_tris |= SI_NGG_CULL_BACK_FACE; 1013bf215546Sopenharmony_ci rs->ngg_cull_flags_tris_y_inverted |= SI_NGG_CULL_FRONT_FACE; 1014bf215546Sopenharmony_ci } 1015bf215546Sopenharmony_ci } 1016bf215546Sopenharmony_ci 1017bf215546Sopenharmony_ci si_pm4_set_reg( 1018bf215546Sopenharmony_ci pm4, R_0286D4_SPI_INTERP_CONTROL_0, 1019bf215546Sopenharmony_ci S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) | 1020bf215546Sopenharmony_ci S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 1021bf215546Sopenharmony_ci S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 1022bf215546Sopenharmony_ci S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 1023bf215546Sopenharmony_ci S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 1024bf215546Sopenharmony_ci S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 1025bf215546Sopenharmony_ci 1026bf215546Sopenharmony_ci /* point size 12.4 fixed point */ 1027bf215546Sopenharmony_ci tmp = (unsigned)(state->point_size * 8.0); 1028bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 1029bf215546Sopenharmony_ci 1030bf215546Sopenharmony_ci if (state->point_size_per_vertex) { 1031bf215546Sopenharmony_ci psize_min = util_get_min_point_size(state); 1032bf215546Sopenharmony_ci psize_max = SI_MAX_POINT_SIZE; 1033bf215546Sopenharmony_ci } else { 1034bf215546Sopenharmony_ci /* Force the point size to be as if the vertex output was disabled. */ 1035bf215546Sopenharmony_ci psize_min = state->point_size; 1036bf215546Sopenharmony_ci psize_max = state->point_size; 1037bf215546Sopenharmony_ci } 1038bf215546Sopenharmony_ci rs->max_point_size = psize_max; 1039bf215546Sopenharmony_ci 1040bf215546Sopenharmony_ci /* Divide by two, because 0.5 = 1 pixel. */ 1041bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 1042bf215546Sopenharmony_ci S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min / 2)) | 1043bf215546Sopenharmony_ci S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max / 2))); 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, 1046bf215546Sopenharmony_ci S_028A08_WIDTH(si_pack_float_12p4(state->line_width / 2))); 1047bf215546Sopenharmony_ci si_pm4_set_reg( 1048bf215546Sopenharmony_ci pm4, R_028A48_PA_SC_MODE_CNTL_0, 1049bf215546Sopenharmony_ci S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 1050bf215546Sopenharmony_ci S_028A48_MSAA_ENABLE(state->multisample || state->poly_smooth || state->line_smooth) | 1051bf215546Sopenharmony_ci S_028A48_VPORT_SCISSOR_ENABLE(1) | 1052bf215546Sopenharmony_ci S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.gfx_level >= GFX9)); 1053bf215546Sopenharmony_ci 1054bf215546Sopenharmony_ci bool polygon_mode_enabled = 1055bf215546Sopenharmony_ci (state->fill_front != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_FRONT)) || 1056bf215546Sopenharmony_ci (state->fill_back != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_BACK)); 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 1059bf215546Sopenharmony_ci S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 1060bf215546Sopenharmony_ci S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 1061bf215546Sopenharmony_ci S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 1062bf215546Sopenharmony_ci S_028814_FACE(!state->front_ccw) | 1063bf215546Sopenharmony_ci S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 1064bf215546Sopenharmony_ci S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 1065bf215546Sopenharmony_ci S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 1066bf215546Sopenharmony_ci S_028814_POLY_MODE(polygon_mode_enabled) | 1067bf215546Sopenharmony_ci S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 1068bf215546Sopenharmony_ci S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)) | 1069bf215546Sopenharmony_ci /* this must be set if POLY_MODE or PERPENDICULAR_ENDCAP_ENA is set */ 1070bf215546Sopenharmony_ci S_028814_KEEP_TOGETHER_ENABLE(sscreen->info.gfx_level >= GFX10 ? 1071bf215546Sopenharmony_ci polygon_mode_enabled || 1072bf215546Sopenharmony_ci rs->perpendicular_end_caps : 0)); 1073bf215546Sopenharmony_ci 1074bf215546Sopenharmony_ci if (state->bottom_edge_rule) { 1075bf215546Sopenharmony_ci /* OpenGL windows should set this. */ 1076bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 1077bf215546Sopenharmony_ci S_028230_ER_TRI(0xA) | 1078bf215546Sopenharmony_ci S_028230_ER_POINT(0x5) | 1079bf215546Sopenharmony_ci S_028230_ER_RECT(0x9) | 1080bf215546Sopenharmony_ci S_028230_ER_LINE_LR(0x29) | 1081bf215546Sopenharmony_ci S_028230_ER_LINE_RL(0x29) | 1082bf215546Sopenharmony_ci S_028230_ER_LINE_TB(0xA) | 1083bf215546Sopenharmony_ci S_028230_ER_LINE_BT(0xA)); 1084bf215546Sopenharmony_ci } else { 1085bf215546Sopenharmony_ci /* OpenGL FBOs and Direct3D should set this. */ 1086bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 1087bf215546Sopenharmony_ci S_028230_ER_TRI(0xA) | 1088bf215546Sopenharmony_ci S_028230_ER_POINT(0xA) | 1089bf215546Sopenharmony_ci S_028230_ER_RECT(0xA) | 1090bf215546Sopenharmony_ci S_028230_ER_LINE_LR(0x1A) | 1091bf215546Sopenharmony_ci S_028230_ER_LINE_RL(0x26) | 1092bf215546Sopenharmony_ci S_028230_ER_LINE_TB(0xA) | 1093bf215546Sopenharmony_ci S_028230_ER_LINE_BT(0xA)); 1094bf215546Sopenharmony_ci } 1095bf215546Sopenharmony_ci 1096bf215546Sopenharmony_ci if (!rs->uses_poly_offset) 1097bf215546Sopenharmony_ci return rs; 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state)); 1100bf215546Sopenharmony_ci if (!rs->pm4_poly_offset) { 1101bf215546Sopenharmony_ci FREE(rs); 1102bf215546Sopenharmony_ci return NULL; 1103bf215546Sopenharmony_ci } 1104bf215546Sopenharmony_ci 1105bf215546Sopenharmony_ci /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 1106bf215546Sopenharmony_ci for (i = 0; i < 3; i++) { 1107bf215546Sopenharmony_ci struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 1108bf215546Sopenharmony_ci float offset_units = state->offset_units; 1109bf215546Sopenharmony_ci float offset_scale = state->offset_scale * 16.0f; 1110bf215546Sopenharmony_ci uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci if (!state->offset_units_unscaled) { 1113bf215546Sopenharmony_ci switch (i) { 1114bf215546Sopenharmony_ci case 0: /* 16-bit zbuffer */ 1115bf215546Sopenharmony_ci offset_units *= 4.0f; 1116bf215546Sopenharmony_ci pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 1117bf215546Sopenharmony_ci break; 1118bf215546Sopenharmony_ci case 1: /* 24-bit zbuffer */ 1119bf215546Sopenharmony_ci offset_units *= 2.0f; 1120bf215546Sopenharmony_ci pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 1121bf215546Sopenharmony_ci break; 1122bf215546Sopenharmony_ci case 2: /* 32-bit zbuffer */ 1123bf215546Sopenharmony_ci offset_units *= 1.0f; 1124bf215546Sopenharmony_ci pa_su_poly_offset_db_fmt_cntl = 1125bf215546Sopenharmony_ci S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 1126bf215546Sopenharmony_ci break; 1127bf215546Sopenharmony_ci } 1128bf215546Sopenharmony_ci } 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, pa_su_poly_offset_db_fmt_cntl); 1131bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 1132bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, fui(offset_scale)); 1133bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units)); 1134bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, fui(offset_scale)); 1135bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units)); 1136bf215546Sopenharmony_ci } 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci return rs; 1139bf215546Sopenharmony_ci} 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_cistatic void si_bind_rs_state(struct pipe_context *ctx, void *state) 1142bf215546Sopenharmony_ci{ 1143bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1144bf215546Sopenharmony_ci struct si_state_rasterizer *old_rs = (struct si_state_rasterizer *)sctx->queued.named.rasterizer; 1145bf215546Sopenharmony_ci struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1146bf215546Sopenharmony_ci 1147bf215546Sopenharmony_ci if (!rs) 1148bf215546Sopenharmony_ci rs = (struct si_state_rasterizer *)sctx->discard_rasterizer_state; 1149bf215546Sopenharmony_ci 1150bf215546Sopenharmony_ci if (old_rs->multisample_enable != rs->multisample_enable) { 1151bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1152bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1153bf215546Sopenharmony_ci 1154bf215546Sopenharmony_ci /* Update the small primitive filter workaround if necessary. */ 1155bf215546Sopenharmony_ci if (sctx->screen->info.has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1) 1156bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 1157bf215546Sopenharmony_ci 1158bf215546Sopenharmony_ci /* NGG cull state uses multisample_enable. */ 1159bf215546Sopenharmony_ci if (sctx->screen->use_ngg_culling) 1160bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); 1161bf215546Sopenharmony_ci } 1162bf215546Sopenharmony_ci 1163bf215546Sopenharmony_ci if (old_rs->perpendicular_end_caps != rs->perpendicular_end_caps) 1164bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_ci if (sctx->screen->use_ngg_culling && 1167bf215546Sopenharmony_ci (old_rs->half_pixel_center != rs->half_pixel_center || 1168bf215546Sopenharmony_ci old_rs->line_width != rs->line_width)) 1169bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); 1170bf215546Sopenharmony_ci 1171bf215546Sopenharmony_ci SET_FIELD(sctx->current_vs_state, VS_STATE_CLAMP_VERTEX_COLOR, rs->clamp_vertex_color); 1172bf215546Sopenharmony_ci 1173bf215546Sopenharmony_ci si_pm4_bind_state(sctx, rasterizer, rs); 1174bf215546Sopenharmony_ci si_update_poly_offset_state(sctx); 1175bf215546Sopenharmony_ci 1176bf215546Sopenharmony_ci if (old_rs->scissor_enable != rs->scissor_enable) 1177bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors); 1178bf215546Sopenharmony_ci 1179bf215546Sopenharmony_ci if (old_rs->line_width != rs->line_width || old_rs->max_point_size != rs->max_point_size || 1180bf215546Sopenharmony_ci old_rs->half_pixel_center != rs->half_pixel_center) 1181bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband); 1182bf215546Sopenharmony_ci 1183bf215546Sopenharmony_ci if (old_rs->clip_halfz != rs->clip_halfz) 1184bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports); 1185bf215546Sopenharmony_ci 1186bf215546Sopenharmony_ci if (old_rs->clip_plane_enable != rs->clip_plane_enable || 1187bf215546Sopenharmony_ci old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl) 1188bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); 1189bf215546Sopenharmony_ci 1190bf215546Sopenharmony_ci if (old_rs->sprite_coord_enable != rs->sprite_coord_enable || 1191bf215546Sopenharmony_ci old_rs->flatshade != rs->flatshade) 1192bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.spi_map); 1193bf215546Sopenharmony_ci 1194bf215546Sopenharmony_ci if (old_rs->clip_plane_enable != rs->clip_plane_enable || 1195bf215546Sopenharmony_ci old_rs->rasterizer_discard != rs->rasterizer_discard || 1196bf215546Sopenharmony_ci old_rs->sprite_coord_enable != rs->sprite_coord_enable || 1197bf215546Sopenharmony_ci old_rs->flatshade != rs->flatshade || old_rs->two_side != rs->two_side || 1198bf215546Sopenharmony_ci old_rs->multisample_enable != rs->multisample_enable || 1199bf215546Sopenharmony_ci old_rs->poly_stipple_enable != rs->poly_stipple_enable || 1200bf215546Sopenharmony_ci old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth || 1201bf215546Sopenharmony_ci old_rs->point_smooth != rs->point_smooth || 1202bf215546Sopenharmony_ci old_rs->clamp_fragment_color != rs->clamp_fragment_color || 1203bf215546Sopenharmony_ci old_rs->force_persample_interp != rs->force_persample_interp || 1204bf215546Sopenharmony_ci old_rs->polygon_mode_is_points != rs->polygon_mode_is_points) { 1205bf215546Sopenharmony_ci si_ps_key_update_blend_rasterizer(sctx); 1206bf215546Sopenharmony_ci si_ps_key_update_rasterizer(sctx); 1207bf215546Sopenharmony_ci si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 1208bf215546Sopenharmony_ci si_update_ps_inputs_read_or_disabled(sctx); 1209bf215546Sopenharmony_ci sctx->do_update_shaders = true; 1210bf215546Sopenharmony_ci } 1211bf215546Sopenharmony_ci 1212bf215546Sopenharmony_ci if (old_rs->line_smooth != rs->line_smooth || 1213bf215546Sopenharmony_ci old_rs->poly_smooth != rs->poly_smooth || 1214bf215546Sopenharmony_ci old_rs->point_smooth != rs->point_smooth || 1215bf215546Sopenharmony_ci old_rs->poly_stipple_enable != rs->poly_stipple_enable || 1216bf215546Sopenharmony_ci old_rs->flatshade != rs->flatshade) 1217bf215546Sopenharmony_ci si_update_vrs_flat_shading(sctx); 1218bf215546Sopenharmony_ci} 1219bf215546Sopenharmony_ci 1220bf215546Sopenharmony_cistatic void si_delete_rs_state(struct pipe_context *ctx, void *state) 1221bf215546Sopenharmony_ci{ 1222bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1223bf215546Sopenharmony_ci struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1224bf215546Sopenharmony_ci 1225bf215546Sopenharmony_ci if (sctx->queued.named.rasterizer == state) 1226bf215546Sopenharmony_ci si_bind_rs_state(ctx, sctx->discard_rasterizer_state); 1227bf215546Sopenharmony_ci 1228bf215546Sopenharmony_ci FREE(rs->pm4_poly_offset); 1229bf215546Sopenharmony_ci si_pm4_free_state(sctx, &rs->pm4, SI_STATE_IDX(rasterizer)); 1230bf215546Sopenharmony_ci} 1231bf215546Sopenharmony_ci 1232bf215546Sopenharmony_ci/* 1233bf215546Sopenharmony_ci * inferred state between dsa and stencil ref 1234bf215546Sopenharmony_ci */ 1235bf215546Sopenharmony_cistatic void si_emit_stencil_ref(struct si_context *sctx) 1236bf215546Sopenharmony_ci{ 1237bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 1238bf215546Sopenharmony_ci struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 1239bf215546Sopenharmony_ci struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 1240bf215546Sopenharmony_ci 1241bf215546Sopenharmony_ci radeon_begin(cs); 1242bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028430_DB_STENCILREFMASK, 2); 1243bf215546Sopenharmony_ci radeon_emit(S_028430_STENCILTESTVAL(ref->ref_value[0]) | 1244bf215546Sopenharmony_ci S_028430_STENCILMASK(dsa->valuemask[0]) | 1245bf215546Sopenharmony_ci S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 1246bf215546Sopenharmony_ci S_028430_STENCILOPVAL(1)); 1247bf215546Sopenharmony_ci radeon_emit(S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 1248bf215546Sopenharmony_ci S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 1249bf215546Sopenharmony_ci S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 1250bf215546Sopenharmony_ci S_028434_STENCILOPVAL_BF(1)); 1251bf215546Sopenharmony_ci radeon_end(); 1252bf215546Sopenharmony_ci} 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_cistatic void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state) 1255bf215546Sopenharmony_ci{ 1256bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1257bf215546Sopenharmony_ci 1258bf215546Sopenharmony_ci if (memcmp(&sctx->stencil_ref.state, &state, sizeof(state)) == 0) 1259bf215546Sopenharmony_ci return; 1260bf215546Sopenharmony_ci 1261bf215546Sopenharmony_ci sctx->stencil_ref.state = state; 1262bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1263bf215546Sopenharmony_ci} 1264bf215546Sopenharmony_ci 1265bf215546Sopenharmony_ci/* 1266bf215546Sopenharmony_ci * DSA 1267bf215546Sopenharmony_ci */ 1268bf215546Sopenharmony_ci 1269bf215546Sopenharmony_cistatic uint32_t si_translate_stencil_op(int s_op) 1270bf215546Sopenharmony_ci{ 1271bf215546Sopenharmony_ci switch (s_op) { 1272bf215546Sopenharmony_ci case PIPE_STENCIL_OP_KEEP: 1273bf215546Sopenharmony_ci return V_02842C_STENCIL_KEEP; 1274bf215546Sopenharmony_ci case PIPE_STENCIL_OP_ZERO: 1275bf215546Sopenharmony_ci return V_02842C_STENCIL_ZERO; 1276bf215546Sopenharmony_ci case PIPE_STENCIL_OP_REPLACE: 1277bf215546Sopenharmony_ci return V_02842C_STENCIL_REPLACE_TEST; 1278bf215546Sopenharmony_ci case PIPE_STENCIL_OP_INCR: 1279bf215546Sopenharmony_ci return V_02842C_STENCIL_ADD_CLAMP; 1280bf215546Sopenharmony_ci case PIPE_STENCIL_OP_DECR: 1281bf215546Sopenharmony_ci return V_02842C_STENCIL_SUB_CLAMP; 1282bf215546Sopenharmony_ci case PIPE_STENCIL_OP_INCR_WRAP: 1283bf215546Sopenharmony_ci return V_02842C_STENCIL_ADD_WRAP; 1284bf215546Sopenharmony_ci case PIPE_STENCIL_OP_DECR_WRAP: 1285bf215546Sopenharmony_ci return V_02842C_STENCIL_SUB_WRAP; 1286bf215546Sopenharmony_ci case PIPE_STENCIL_OP_INVERT: 1287bf215546Sopenharmony_ci return V_02842C_STENCIL_INVERT; 1288bf215546Sopenharmony_ci default: 1289bf215546Sopenharmony_ci PRINT_ERR("Unknown stencil op %d", s_op); 1290bf215546Sopenharmony_ci assert(0); 1291bf215546Sopenharmony_ci break; 1292bf215546Sopenharmony_ci } 1293bf215546Sopenharmony_ci return 0; 1294bf215546Sopenharmony_ci} 1295bf215546Sopenharmony_ci 1296bf215546Sopenharmony_cistatic bool si_order_invariant_stencil_op(enum pipe_stencil_op op) 1297bf215546Sopenharmony_ci{ 1298bf215546Sopenharmony_ci /* REPLACE is normally order invariant, except when the stencil 1299bf215546Sopenharmony_ci * reference value is written by the fragment shader. Tracking this 1300bf215546Sopenharmony_ci * interaction does not seem worth the effort, so be conservative. */ 1301bf215546Sopenharmony_ci return op != PIPE_STENCIL_OP_INCR && op != PIPE_STENCIL_OP_DECR && op != PIPE_STENCIL_OP_REPLACE; 1302bf215546Sopenharmony_ci} 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_ci/* Compute whether, assuming Z writes are disabled, this stencil state is order 1305bf215546Sopenharmony_ci * invariant in the sense that the set of passing fragments as well as the 1306bf215546Sopenharmony_ci * final stencil buffer result does not depend on the order of fragments. */ 1307bf215546Sopenharmony_cistatic bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state) 1308bf215546Sopenharmony_ci{ 1309bf215546Sopenharmony_ci return !state->enabled || !state->writemask || 1310bf215546Sopenharmony_ci /* The following assumes that Z writes are disabled. */ 1311bf215546Sopenharmony_ci (state->func == PIPE_FUNC_ALWAYS && si_order_invariant_stencil_op(state->zpass_op) && 1312bf215546Sopenharmony_ci si_order_invariant_stencil_op(state->zfail_op)) || 1313bf215546Sopenharmony_ci (state->func == PIPE_FUNC_NEVER && si_order_invariant_stencil_op(state->fail_op)); 1314bf215546Sopenharmony_ci} 1315bf215546Sopenharmony_ci 1316bf215546Sopenharmony_cistatic void *si_create_dsa_state(struct pipe_context *ctx, 1317bf215546Sopenharmony_ci const struct pipe_depth_stencil_alpha_state *state) 1318bf215546Sopenharmony_ci{ 1319bf215546Sopenharmony_ci struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 1320bf215546Sopenharmony_ci struct si_pm4_state *pm4 = &dsa->pm4; 1321bf215546Sopenharmony_ci unsigned db_depth_control; 1322bf215546Sopenharmony_ci uint32_t db_stencil_control = 0; 1323bf215546Sopenharmony_ci 1324bf215546Sopenharmony_ci if (!dsa) { 1325bf215546Sopenharmony_ci return NULL; 1326bf215546Sopenharmony_ci } 1327bf215546Sopenharmony_ci 1328bf215546Sopenharmony_ci dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 1329bf215546Sopenharmony_ci dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 1330bf215546Sopenharmony_ci dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 1331bf215546Sopenharmony_ci dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 1332bf215546Sopenharmony_ci 1333bf215546Sopenharmony_ci db_depth_control = 1334bf215546Sopenharmony_ci S_028800_Z_ENABLE(state->depth_enabled) | S_028800_Z_WRITE_ENABLE(state->depth_writemask) | 1335bf215546Sopenharmony_ci S_028800_ZFUNC(state->depth_func) | S_028800_DEPTH_BOUNDS_ENABLE(state->depth_bounds_test); 1336bf215546Sopenharmony_ci 1337bf215546Sopenharmony_ci /* stencil */ 1338bf215546Sopenharmony_ci if (state->stencil[0].enabled) { 1339bf215546Sopenharmony_ci db_depth_control |= S_028800_STENCIL_ENABLE(1); 1340bf215546Sopenharmony_ci db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 1341bf215546Sopenharmony_ci db_stencil_control |= 1342bf215546Sopenharmony_ci S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 1343bf215546Sopenharmony_ci db_stencil_control |= 1344bf215546Sopenharmony_ci S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 1345bf215546Sopenharmony_ci db_stencil_control |= 1346bf215546Sopenharmony_ci S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 1347bf215546Sopenharmony_ci 1348bf215546Sopenharmony_ci if (state->stencil[1].enabled) { 1349bf215546Sopenharmony_ci db_depth_control |= S_028800_BACKFACE_ENABLE(1); 1350bf215546Sopenharmony_ci db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 1351bf215546Sopenharmony_ci db_stencil_control |= 1352bf215546Sopenharmony_ci S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 1353bf215546Sopenharmony_ci db_stencil_control |= 1354bf215546Sopenharmony_ci S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 1355bf215546Sopenharmony_ci db_stencil_control |= 1356bf215546Sopenharmony_ci S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 1357bf215546Sopenharmony_ci } 1358bf215546Sopenharmony_ci } 1359bf215546Sopenharmony_ci 1360bf215546Sopenharmony_ci /* alpha */ 1361bf215546Sopenharmony_ci if (state->alpha_enabled) { 1362bf215546Sopenharmony_ci dsa->alpha_func = state->alpha_func; 1363bf215546Sopenharmony_ci 1364bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4, 1365bf215546Sopenharmony_ci fui(state->alpha_ref_value)); 1366bf215546Sopenharmony_ci } else { 1367bf215546Sopenharmony_ci dsa->alpha_func = PIPE_FUNC_ALWAYS; 1368bf215546Sopenharmony_ci } 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1371bf215546Sopenharmony_ci if (state->stencil[0].enabled) 1372bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1373bf215546Sopenharmony_ci if (state->depth_bounds_test) { 1374bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth_bounds_min)); 1375bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth_bounds_max)); 1376bf215546Sopenharmony_ci } 1377bf215546Sopenharmony_ci 1378bf215546Sopenharmony_ci dsa->depth_enabled = state->depth_enabled; 1379bf215546Sopenharmony_ci dsa->depth_write_enabled = state->depth_enabled && state->depth_writemask; 1380bf215546Sopenharmony_ci dsa->stencil_enabled = state->stencil[0].enabled; 1381bf215546Sopenharmony_ci dsa->stencil_write_enabled = 1382bf215546Sopenharmony_ci (util_writes_stencil(&state->stencil[0]) || util_writes_stencil(&state->stencil[1])); 1383bf215546Sopenharmony_ci dsa->db_can_write = dsa->depth_write_enabled || dsa->stencil_write_enabled; 1384bf215546Sopenharmony_ci 1385bf215546Sopenharmony_ci bool zfunc_is_ordered = 1386bf215546Sopenharmony_ci state->depth_func == PIPE_FUNC_NEVER || state->depth_func == PIPE_FUNC_LESS || 1387bf215546Sopenharmony_ci state->depth_func == PIPE_FUNC_LEQUAL || state->depth_func == PIPE_FUNC_GREATER || 1388bf215546Sopenharmony_ci state->depth_func == PIPE_FUNC_GEQUAL; 1389bf215546Sopenharmony_ci 1390bf215546Sopenharmony_ci bool nozwrite_and_order_invariant_stencil = 1391bf215546Sopenharmony_ci !dsa->db_can_write || 1392bf215546Sopenharmony_ci (!dsa->depth_write_enabled && si_order_invariant_stencil_state(&state->stencil[0]) && 1393bf215546Sopenharmony_ci si_order_invariant_stencil_state(&state->stencil[1])); 1394bf215546Sopenharmony_ci 1395bf215546Sopenharmony_ci dsa->order_invariance[1].zs = 1396bf215546Sopenharmony_ci nozwrite_and_order_invariant_stencil || (!dsa->stencil_write_enabled && zfunc_is_ordered); 1397bf215546Sopenharmony_ci dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered; 1398bf215546Sopenharmony_ci 1399bf215546Sopenharmony_ci dsa->order_invariance[1].pass_set = 1400bf215546Sopenharmony_ci nozwrite_and_order_invariant_stencil || 1401bf215546Sopenharmony_ci (!dsa->stencil_write_enabled && 1402bf215546Sopenharmony_ci (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER)); 1403bf215546Sopenharmony_ci dsa->order_invariance[0].pass_set = 1404bf215546Sopenharmony_ci !dsa->depth_write_enabled || 1405bf215546Sopenharmony_ci (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER); 1406bf215546Sopenharmony_ci 1407bf215546Sopenharmony_ci return dsa; 1408bf215546Sopenharmony_ci} 1409bf215546Sopenharmony_ci 1410bf215546Sopenharmony_cistatic void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1411bf215546Sopenharmony_ci{ 1412bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1413bf215546Sopenharmony_ci struct si_state_dsa *old_dsa = sctx->queued.named.dsa; 1414bf215546Sopenharmony_ci struct si_state_dsa *dsa = state; 1415bf215546Sopenharmony_ci 1416bf215546Sopenharmony_ci if (!dsa) 1417bf215546Sopenharmony_ci dsa = (struct si_state_dsa *)sctx->noop_dsa; 1418bf215546Sopenharmony_ci 1419bf215546Sopenharmony_ci si_pm4_bind_state(sctx, dsa, dsa); 1420bf215546Sopenharmony_ci 1421bf215546Sopenharmony_ci if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1422bf215546Sopenharmony_ci sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1423bf215546Sopenharmony_ci sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1424bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1425bf215546Sopenharmony_ci } 1426bf215546Sopenharmony_ci 1427bf215546Sopenharmony_ci if (old_dsa->alpha_func != dsa->alpha_func) { 1428bf215546Sopenharmony_ci si_ps_key_update_dsa(sctx); 1429bf215546Sopenharmony_ci si_update_ps_inputs_read_or_disabled(sctx); 1430bf215546Sopenharmony_ci sctx->do_update_shaders = true; 1431bf215546Sopenharmony_ci } 1432bf215546Sopenharmony_ci 1433bf215546Sopenharmony_ci if (sctx->screen->dpbb_allowed && ((old_dsa->depth_enabled != dsa->depth_enabled || 1434bf215546Sopenharmony_ci old_dsa->stencil_enabled != dsa->stencil_enabled || 1435bf215546Sopenharmony_ci old_dsa->db_can_write != dsa->db_can_write))) 1436bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 1437bf215546Sopenharmony_ci 1438bf215546Sopenharmony_ci if (sctx->screen->has_out_of_order_rast && 1439bf215546Sopenharmony_ci (memcmp(old_dsa->order_invariance, dsa->order_invariance, 1440bf215546Sopenharmony_ci sizeof(old_dsa->order_invariance)))) 1441bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1442bf215546Sopenharmony_ci} 1443bf215546Sopenharmony_ci 1444bf215546Sopenharmony_cistatic void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1445bf215546Sopenharmony_ci{ 1446bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1447bf215546Sopenharmony_ci 1448bf215546Sopenharmony_ci if (sctx->queued.named.dsa == state) 1449bf215546Sopenharmony_ci si_bind_dsa_state(ctx, sctx->noop_dsa); 1450bf215546Sopenharmony_ci 1451bf215546Sopenharmony_ci si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(dsa)); 1452bf215546Sopenharmony_ci} 1453bf215546Sopenharmony_ci 1454bf215546Sopenharmony_cistatic void *si_create_db_flush_dsa(struct si_context *sctx) 1455bf215546Sopenharmony_ci{ 1456bf215546Sopenharmony_ci struct pipe_depth_stencil_alpha_state dsa = {}; 1457bf215546Sopenharmony_ci 1458bf215546Sopenharmony_ci return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa); 1459bf215546Sopenharmony_ci} 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_ci/* DB RENDER STATE */ 1462bf215546Sopenharmony_ci 1463bf215546Sopenharmony_cistatic void si_set_active_query_state(struct pipe_context *ctx, bool enable) 1464bf215546Sopenharmony_ci{ 1465bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_ci /* Pipeline stat & streamout queries. */ 1468bf215546Sopenharmony_ci if (enable) { 1469bf215546Sopenharmony_ci sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; 1470bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS; 1471bf215546Sopenharmony_ci } else { 1472bf215546Sopenharmony_ci sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS; 1473bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS; 1474bf215546Sopenharmony_ci } 1475bf215546Sopenharmony_ci 1476bf215546Sopenharmony_ci /* Occlusion queries. */ 1477bf215546Sopenharmony_ci if (sctx->occlusion_queries_disabled != !enable) { 1478bf215546Sopenharmony_ci sctx->occlusion_queries_disabled = !enable; 1479bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1480bf215546Sopenharmony_ci } 1481bf215546Sopenharmony_ci} 1482bf215546Sopenharmony_ci 1483bf215546Sopenharmony_civoid si_set_occlusion_query_state(struct si_context *sctx, bool old_perfect_enable) 1484bf215546Sopenharmony_ci{ 1485bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1486bf215546Sopenharmony_ci 1487bf215546Sopenharmony_ci bool perfect_enable = sctx->num_perfect_occlusion_queries != 0; 1488bf215546Sopenharmony_ci 1489bf215546Sopenharmony_ci if (perfect_enable != old_perfect_enable) 1490bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1491bf215546Sopenharmony_ci} 1492bf215546Sopenharmony_ci 1493bf215546Sopenharmony_civoid si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st) 1494bf215546Sopenharmony_ci{ 1495bf215546Sopenharmony_ci si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 1496bf215546Sopenharmony_ci} 1497bf215546Sopenharmony_ci 1498bf215546Sopenharmony_civoid si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st) 1499bf215546Sopenharmony_ci{ 1500bf215546Sopenharmony_ci sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, true, &st->saved_const0); 1501bf215546Sopenharmony_ci} 1502bf215546Sopenharmony_ci 1503bf215546Sopenharmony_cistatic void si_emit_db_render_state(struct si_context *sctx) 1504bf215546Sopenharmony_ci{ 1505bf215546Sopenharmony_ci struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1506bf215546Sopenharmony_ci unsigned db_shader_control, db_render_control, db_count_control; 1507bf215546Sopenharmony_ci 1508bf215546Sopenharmony_ci /* DB_RENDER_CONTROL */ 1509bf215546Sopenharmony_ci if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) { 1510bf215546Sopenharmony_ci db_render_control = S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1511bf215546Sopenharmony_ci S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1512bf215546Sopenharmony_ci S_028000_COPY_CENTROID(1) | S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample); 1513bf215546Sopenharmony_ci } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1514bf215546Sopenharmony_ci db_render_control = S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1515bf215546Sopenharmony_ci S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace); 1516bf215546Sopenharmony_ci } else { 1517bf215546Sopenharmony_ci db_render_control = S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1518bf215546Sopenharmony_ci S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear); 1519bf215546Sopenharmony_ci } 1520bf215546Sopenharmony_ci 1521bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 1522bf215546Sopenharmony_ci unsigned max_allowed_tiles_in_wave = 0; 1523bf215546Sopenharmony_ci 1524bf215546Sopenharmony_ci if (sctx->screen->info.has_dedicated_vram) { 1525bf215546Sopenharmony_ci if (sctx->framebuffer.nr_samples == 8) 1526bf215546Sopenharmony_ci max_allowed_tiles_in_wave = 7; 1527bf215546Sopenharmony_ci else if (sctx->framebuffer.nr_samples == 4) 1528bf215546Sopenharmony_ci max_allowed_tiles_in_wave = 14; 1529bf215546Sopenharmony_ci } else { 1530bf215546Sopenharmony_ci if (sctx->framebuffer.nr_samples == 8) 1531bf215546Sopenharmony_ci max_allowed_tiles_in_wave = 8; 1532bf215546Sopenharmony_ci } 1533bf215546Sopenharmony_ci 1534bf215546Sopenharmony_ci /* TODO: We may want to disable this workaround for future chips. */ 1535bf215546Sopenharmony_ci if (sctx->framebuffer.nr_samples >= 4) { 1536bf215546Sopenharmony_ci if (max_allowed_tiles_in_wave) 1537bf215546Sopenharmony_ci max_allowed_tiles_in_wave--; 1538bf215546Sopenharmony_ci else 1539bf215546Sopenharmony_ci max_allowed_tiles_in_wave = 15; 1540bf215546Sopenharmony_ci } 1541bf215546Sopenharmony_ci 1542bf215546Sopenharmony_ci db_render_control |= S_028000_OREO_MODE(V_028000_OMODE_O_THEN_B) | 1543bf215546Sopenharmony_ci S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave); 1544bf215546Sopenharmony_ci } 1545bf215546Sopenharmony_ci 1546bf215546Sopenharmony_ci /* DB_COUNT_CONTROL (occlusion queries) */ 1547bf215546Sopenharmony_ci if (sctx->num_occlusion_queries > 0 && !sctx->occlusion_queries_disabled) { 1548bf215546Sopenharmony_ci bool perfect = sctx->num_perfect_occlusion_queries > 0; 1549bf215546Sopenharmony_ci bool gfx10_perfect = sctx->gfx_level >= GFX10 && perfect; 1550bf215546Sopenharmony_ci 1551bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7) { 1552bf215546Sopenharmony_ci unsigned log_sample_rate = sctx->framebuffer.log_samples; 1553bf215546Sopenharmony_ci 1554bf215546Sopenharmony_ci db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1555bf215546Sopenharmony_ci S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) | 1556bf215546Sopenharmony_ci S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) | 1557bf215546Sopenharmony_ci S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1); 1558bf215546Sopenharmony_ci } else { 1559bf215546Sopenharmony_ci db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1560bf215546Sopenharmony_ci S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples); 1561bf215546Sopenharmony_ci } 1562bf215546Sopenharmony_ci } else { 1563bf215546Sopenharmony_ci /* Disable occlusion queries. */ 1564bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7) { 1565bf215546Sopenharmony_ci db_count_control = 0; 1566bf215546Sopenharmony_ci } else { 1567bf215546Sopenharmony_ci db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); 1568bf215546Sopenharmony_ci } 1569bf215546Sopenharmony_ci } 1570bf215546Sopenharmony_ci 1571bf215546Sopenharmony_ci radeon_begin(&sctx->gfx_cs); 1572bf215546Sopenharmony_ci radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL, 1573bf215546Sopenharmony_ci db_render_control, db_count_control); 1574bf215546Sopenharmony_ci 1575bf215546Sopenharmony_ci /* DB_RENDER_OVERRIDE2 */ 1576bf215546Sopenharmony_ci radeon_opt_set_context_reg( 1577bf215546Sopenharmony_ci sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2, 1578bf215546Sopenharmony_ci S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1579bf215546Sopenharmony_ci S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1580bf215546Sopenharmony_ci S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) | 1581bf215546Sopenharmony_ci S_028010_CENTROID_COMPUTATION_MODE(sctx->gfx_level >= GFX10_3 ? 1 : 0)); 1582bf215546Sopenharmony_ci 1583bf215546Sopenharmony_ci db_shader_control = sctx->ps_db_shader_control; 1584bf215546Sopenharmony_ci 1585bf215546Sopenharmony_ci /* Bug workaround for smoothing (overrasterization) on GFX6. */ 1586bf215546Sopenharmony_ci if (sctx->gfx_level == GFX6 && sctx->smoothing_enabled) { 1587bf215546Sopenharmony_ci db_shader_control &= C_02880C_Z_ORDER; 1588bf215546Sopenharmony_ci db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1589bf215546Sopenharmony_ci } 1590bf215546Sopenharmony_ci 1591bf215546Sopenharmony_ci /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1592bf215546Sopenharmony_ci if (!rs->multisample_enable) 1593bf215546Sopenharmony_ci db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1594bf215546Sopenharmony_ci 1595bf215546Sopenharmony_ci if (sctx->screen->info.has_rbplus && !sctx->screen->info.rbplus_allowed) 1596bf215546Sopenharmony_ci db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1597bf215546Sopenharmony_ci 1598bf215546Sopenharmony_ci if (sctx->screen->info.has_export_conflict_bug && 1599bf215546Sopenharmony_ci sctx->queued.named.blend->blend_enable_4bit && 1600bf215546Sopenharmony_ci si_get_num_coverage_samples(sctx) == 1) { 1601bf215546Sopenharmony_ci db_shader_control |= S_02880C_OVERRIDE_INTRINSIC_RATE_ENABLE(1) | 1602bf215546Sopenharmony_ci S_02880C_OVERRIDE_INTRINSIC_RATE(2); 1603bf215546Sopenharmony_ci } 1604bf215546Sopenharmony_ci 1605bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL, 1606bf215546Sopenharmony_ci db_shader_control); 1607bf215546Sopenharmony_ci 1608bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10_3) { 1609bf215546Sopenharmony_ci if (sctx->allow_flat_shading) { 1610bf215546Sopenharmony_ci if (sctx->gfx_level == GFX11) { 1611bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, 1612bf215546Sopenharmony_ci SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, 1613bf215546Sopenharmony_ci S_0283D0_VRS_OVERRIDE_RATE_COMBINER_MODE( 1614bf215546Sopenharmony_ci V_0283D0_SC_VRS_COMB_MODE_OVERRIDE) | 1615bf215546Sopenharmony_ci /* If the hw doesn't support VRS 4x4, it will silently 1616bf215546Sopenharmony_ci * use 2x2 instead. */ 1617bf215546Sopenharmony_ci S_0283D0_VRS_RATE(V_0283D0_VRS_SHADING_RATE_4X4)); 1618bf215546Sopenharmony_ci } else { 1619bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL, 1620bf215546Sopenharmony_ci SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, 1621bf215546Sopenharmony_ci S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE( 1622bf215546Sopenharmony_ci V_028064_VRS_COMB_MODE_OVERRIDE) | 1623bf215546Sopenharmony_ci S_028064_VRS_OVERRIDE_RATE_X(1) | 1624bf215546Sopenharmony_ci S_028064_VRS_OVERRIDE_RATE_Y(1)); 1625bf215546Sopenharmony_ci } 1626bf215546Sopenharmony_ci } else { 1627bf215546Sopenharmony_ci /* If the shader is using discard, turn off coarse shading because 1628bf215546Sopenharmony_ci * discard at 2x2 pixel granularity degrades quality too much. 1629bf215546Sopenharmony_ci * 1630bf215546Sopenharmony_ci * MIN allows sample shading but not coarse shading. 1631bf215546Sopenharmony_ci */ 1632bf215546Sopenharmony_ci if (sctx->gfx_level == GFX11) { 1633bf215546Sopenharmony_ci unsigned mode = sctx->screen->options.vrs2x2 && G_02880C_KILL_ENABLE(db_shader_control) ? 1634bf215546Sopenharmony_ci V_0283D0_SC_VRS_COMB_MODE_MIN : V_0283D0_SC_VRS_COMB_MODE_PASSTHRU; 1635bf215546Sopenharmony_ci 1636bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, 1637bf215546Sopenharmony_ci SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, 1638bf215546Sopenharmony_ci S_0283D0_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | 1639bf215546Sopenharmony_ci S_0283D0_VRS_RATE(V_0283D0_VRS_SHADING_RATE_1X1)); 1640bf215546Sopenharmony_ci } else { 1641bf215546Sopenharmony_ci unsigned mode = sctx->screen->options.vrs2x2 && G_02880C_KILL_ENABLE(db_shader_control) ? 1642bf215546Sopenharmony_ci V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU; 1643bf215546Sopenharmony_ci 1644bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL, 1645bf215546Sopenharmony_ci SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, 1646bf215546Sopenharmony_ci S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | 1647bf215546Sopenharmony_ci S_028064_VRS_OVERRIDE_RATE_X(0) | 1648bf215546Sopenharmony_ci S_028064_VRS_OVERRIDE_RATE_Y(0)); 1649bf215546Sopenharmony_ci } 1650bf215546Sopenharmony_ci } 1651bf215546Sopenharmony_ci } 1652bf215546Sopenharmony_ci radeon_end_update_context_roll(sctx); 1653bf215546Sopenharmony_ci} 1654bf215546Sopenharmony_ci 1655bf215546Sopenharmony_ci/* 1656bf215546Sopenharmony_ci * format translation 1657bf215546Sopenharmony_ci */ 1658bf215546Sopenharmony_ciuint32_t si_translate_colorformat(enum amd_gfx_level gfx_level, 1659bf215546Sopenharmony_ci enum pipe_format format) 1660bf215546Sopenharmony_ci{ 1661bf215546Sopenharmony_ci const struct util_format_description *desc = util_format_description(format); 1662bf215546Sopenharmony_ci 1663bf215546Sopenharmony_ci#define HAS_SIZE(x, y, z, w) \ 1664bf215546Sopenharmony_ci (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1665bf215546Sopenharmony_ci desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1666bf215546Sopenharmony_ci 1667bf215546Sopenharmony_ci if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1668bf215546Sopenharmony_ci return V_028C70_COLOR_10_11_11; 1669bf215546Sopenharmony_ci 1670bf215546Sopenharmony_ci if (gfx_level >= GFX10_3 && 1671bf215546Sopenharmony_ci format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */ 1672bf215546Sopenharmony_ci return V_028C70_COLOR_5_9_9_9; 1673bf215546Sopenharmony_ci 1674bf215546Sopenharmony_ci if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1675bf215546Sopenharmony_ci return V_028C70_COLOR_INVALID; 1676bf215546Sopenharmony_ci 1677bf215546Sopenharmony_ci /* hw cannot support mixed formats (except depth/stencil, since 1678bf215546Sopenharmony_ci * stencil is not written to). */ 1679bf215546Sopenharmony_ci if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1680bf215546Sopenharmony_ci return V_028C70_COLOR_INVALID; 1681bf215546Sopenharmony_ci 1682bf215546Sopenharmony_ci int first_non_void = util_format_get_first_non_void_channel(format); 1683bf215546Sopenharmony_ci 1684bf215546Sopenharmony_ci /* Reject SCALED formats because we don't implement them for CB. */ 1685bf215546Sopenharmony_ci if (first_non_void >= 0 && first_non_void <= 3 && 1686bf215546Sopenharmony_ci (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED || 1687bf215546Sopenharmony_ci desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_SIGNED) && 1688bf215546Sopenharmony_ci !desc->channel[first_non_void].normalized && 1689bf215546Sopenharmony_ci !desc->channel[first_non_void].pure_integer) 1690bf215546Sopenharmony_ci return V_028C70_COLOR_INVALID; 1691bf215546Sopenharmony_ci 1692bf215546Sopenharmony_ci switch (desc->nr_channels) { 1693bf215546Sopenharmony_ci case 1: 1694bf215546Sopenharmony_ci switch (desc->channel[0].size) { 1695bf215546Sopenharmony_ci case 8: 1696bf215546Sopenharmony_ci return V_028C70_COLOR_8; 1697bf215546Sopenharmony_ci case 16: 1698bf215546Sopenharmony_ci return V_028C70_COLOR_16; 1699bf215546Sopenharmony_ci case 32: 1700bf215546Sopenharmony_ci return V_028C70_COLOR_32; 1701bf215546Sopenharmony_ci } 1702bf215546Sopenharmony_ci break; 1703bf215546Sopenharmony_ci case 2: 1704bf215546Sopenharmony_ci if (desc->channel[0].size == desc->channel[1].size) { 1705bf215546Sopenharmony_ci switch (desc->channel[0].size) { 1706bf215546Sopenharmony_ci case 8: 1707bf215546Sopenharmony_ci return V_028C70_COLOR_8_8; 1708bf215546Sopenharmony_ci case 16: 1709bf215546Sopenharmony_ci return V_028C70_COLOR_16_16; 1710bf215546Sopenharmony_ci case 32: 1711bf215546Sopenharmony_ci return V_028C70_COLOR_32_32; 1712bf215546Sopenharmony_ci } 1713bf215546Sopenharmony_ci } else if (HAS_SIZE(8, 24, 0, 0)) { 1714bf215546Sopenharmony_ci return V_028C70_COLOR_24_8; 1715bf215546Sopenharmony_ci } else if (HAS_SIZE(24, 8, 0, 0)) { 1716bf215546Sopenharmony_ci return V_028C70_COLOR_8_24; 1717bf215546Sopenharmony_ci } 1718bf215546Sopenharmony_ci break; 1719bf215546Sopenharmony_ci case 3: 1720bf215546Sopenharmony_ci if (HAS_SIZE(5, 6, 5, 0)) { 1721bf215546Sopenharmony_ci return V_028C70_COLOR_5_6_5; 1722bf215546Sopenharmony_ci } else if (HAS_SIZE(32, 8, 24, 0)) { 1723bf215546Sopenharmony_ci return V_028C70_COLOR_X24_8_32_FLOAT; 1724bf215546Sopenharmony_ci } 1725bf215546Sopenharmony_ci break; 1726bf215546Sopenharmony_ci case 4: 1727bf215546Sopenharmony_ci if (desc->channel[0].size == desc->channel[1].size && 1728bf215546Sopenharmony_ci desc->channel[0].size == desc->channel[2].size && 1729bf215546Sopenharmony_ci desc->channel[0].size == desc->channel[3].size) { 1730bf215546Sopenharmony_ci switch (desc->channel[0].size) { 1731bf215546Sopenharmony_ci case 4: 1732bf215546Sopenharmony_ci return V_028C70_COLOR_4_4_4_4; 1733bf215546Sopenharmony_ci case 8: 1734bf215546Sopenharmony_ci return V_028C70_COLOR_8_8_8_8; 1735bf215546Sopenharmony_ci case 16: 1736bf215546Sopenharmony_ci return V_028C70_COLOR_16_16_16_16; 1737bf215546Sopenharmony_ci case 32: 1738bf215546Sopenharmony_ci return V_028C70_COLOR_32_32_32_32; 1739bf215546Sopenharmony_ci } 1740bf215546Sopenharmony_ci } else if (HAS_SIZE(5, 5, 5, 1)) { 1741bf215546Sopenharmony_ci return V_028C70_COLOR_1_5_5_5; 1742bf215546Sopenharmony_ci } else if (HAS_SIZE(1, 5, 5, 5)) { 1743bf215546Sopenharmony_ci return V_028C70_COLOR_5_5_5_1; 1744bf215546Sopenharmony_ci } else if (HAS_SIZE(10, 10, 10, 2)) { 1745bf215546Sopenharmony_ci return V_028C70_COLOR_2_10_10_10; 1746bf215546Sopenharmony_ci } else if (HAS_SIZE(2, 10, 10, 10)) { 1747bf215546Sopenharmony_ci return V_028C70_COLOR_10_10_10_2; 1748bf215546Sopenharmony_ci } 1749bf215546Sopenharmony_ci break; 1750bf215546Sopenharmony_ci } 1751bf215546Sopenharmony_ci return V_028C70_COLOR_INVALID; 1752bf215546Sopenharmony_ci} 1753bf215546Sopenharmony_ci 1754bf215546Sopenharmony_cistatic uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1755bf215546Sopenharmony_ci{ 1756bf215546Sopenharmony_ci if (SI_BIG_ENDIAN) { 1757bf215546Sopenharmony_ci switch (colorformat) { 1758bf215546Sopenharmony_ci /* 8-bit buffers. */ 1759bf215546Sopenharmony_ci case V_028C70_COLOR_8: 1760bf215546Sopenharmony_ci return V_028C70_ENDIAN_NONE; 1761bf215546Sopenharmony_ci 1762bf215546Sopenharmony_ci /* 16-bit buffers. */ 1763bf215546Sopenharmony_ci case V_028C70_COLOR_5_6_5: 1764bf215546Sopenharmony_ci case V_028C70_COLOR_1_5_5_5: 1765bf215546Sopenharmony_ci case V_028C70_COLOR_4_4_4_4: 1766bf215546Sopenharmony_ci case V_028C70_COLOR_16: 1767bf215546Sopenharmony_ci case V_028C70_COLOR_8_8: 1768bf215546Sopenharmony_ci return V_028C70_ENDIAN_8IN16; 1769bf215546Sopenharmony_ci 1770bf215546Sopenharmony_ci /* 32-bit buffers. */ 1771bf215546Sopenharmony_ci case V_028C70_COLOR_8_8_8_8: 1772bf215546Sopenharmony_ci case V_028C70_COLOR_2_10_10_10: 1773bf215546Sopenharmony_ci case V_028C70_COLOR_10_10_10_2: 1774bf215546Sopenharmony_ci case V_028C70_COLOR_8_24: 1775bf215546Sopenharmony_ci case V_028C70_COLOR_24_8: 1776bf215546Sopenharmony_ci case V_028C70_COLOR_16_16: 1777bf215546Sopenharmony_ci return V_028C70_ENDIAN_8IN32; 1778bf215546Sopenharmony_ci 1779bf215546Sopenharmony_ci /* 64-bit buffers. */ 1780bf215546Sopenharmony_ci case V_028C70_COLOR_16_16_16_16: 1781bf215546Sopenharmony_ci return V_028C70_ENDIAN_8IN16; 1782bf215546Sopenharmony_ci 1783bf215546Sopenharmony_ci case V_028C70_COLOR_32_32: 1784bf215546Sopenharmony_ci return V_028C70_ENDIAN_8IN32; 1785bf215546Sopenharmony_ci 1786bf215546Sopenharmony_ci /* 128-bit buffers. */ 1787bf215546Sopenharmony_ci case V_028C70_COLOR_32_32_32_32: 1788bf215546Sopenharmony_ci return V_028C70_ENDIAN_8IN32; 1789bf215546Sopenharmony_ci default: 1790bf215546Sopenharmony_ci return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1791bf215546Sopenharmony_ci } 1792bf215546Sopenharmony_ci } else { 1793bf215546Sopenharmony_ci return V_028C70_ENDIAN_NONE; 1794bf215546Sopenharmony_ci } 1795bf215546Sopenharmony_ci} 1796bf215546Sopenharmony_ci 1797bf215546Sopenharmony_cistatic uint32_t si_translate_dbformat(enum pipe_format format) 1798bf215546Sopenharmony_ci{ 1799bf215546Sopenharmony_ci switch (format) { 1800bf215546Sopenharmony_ci case PIPE_FORMAT_Z16_UNORM: 1801bf215546Sopenharmony_ci return V_028040_Z_16; 1802bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1803bf215546Sopenharmony_ci case PIPE_FORMAT_X8Z24_UNORM: 1804bf215546Sopenharmony_ci case PIPE_FORMAT_Z24X8_UNORM: 1805bf215546Sopenharmony_ci case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1806bf215546Sopenharmony_ci return V_028040_Z_24; /* deprecated on AMD GCN */ 1807bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT: 1808bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1809bf215546Sopenharmony_ci return V_028040_Z_32_FLOAT; 1810bf215546Sopenharmony_ci default: 1811bf215546Sopenharmony_ci return V_028040_Z_INVALID; 1812bf215546Sopenharmony_ci } 1813bf215546Sopenharmony_ci} 1814bf215546Sopenharmony_ci 1815bf215546Sopenharmony_ci/* 1816bf215546Sopenharmony_ci * Texture translation 1817bf215546Sopenharmony_ci */ 1818bf215546Sopenharmony_ci 1819bf215546Sopenharmony_cistatic uint32_t si_translate_texformat(struct pipe_screen *screen, enum pipe_format format, 1820bf215546Sopenharmony_ci const struct util_format_description *desc, 1821bf215546Sopenharmony_ci int first_non_void) 1822bf215546Sopenharmony_ci{ 1823bf215546Sopenharmony_ci struct si_screen *sscreen = (struct si_screen *)screen; 1824bf215546Sopenharmony_ci bool uniform = true; 1825bf215546Sopenharmony_ci int i; 1826bf215546Sopenharmony_ci 1827bf215546Sopenharmony_ci assert(sscreen->info.gfx_level <= GFX9); 1828bf215546Sopenharmony_ci 1829bf215546Sopenharmony_ci /* Colorspace (return non-RGB formats directly). */ 1830bf215546Sopenharmony_ci switch (desc->colorspace) { 1831bf215546Sopenharmony_ci /* Depth stencil formats */ 1832bf215546Sopenharmony_ci case UTIL_FORMAT_COLORSPACE_ZS: 1833bf215546Sopenharmony_ci switch (format) { 1834bf215546Sopenharmony_ci case PIPE_FORMAT_Z16_UNORM: 1835bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_16; 1836bf215546Sopenharmony_ci case PIPE_FORMAT_X24S8_UINT: 1837bf215546Sopenharmony_ci case PIPE_FORMAT_S8X24_UINT: 1838bf215546Sopenharmony_ci /* 1839bf215546Sopenharmony_ci * Implemented as an 8_8_8_8 data format to fix texture 1840bf215546Sopenharmony_ci * gathers in stencil sampling. This affects at least 1841bf215546Sopenharmony_ci * GL45-CTS.texture_cube_map_array.sampling on GFX8. 1842bf215546Sopenharmony_ci */ 1843bf215546Sopenharmony_ci if (sscreen->info.gfx_level <= GFX8) 1844bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1845bf215546Sopenharmony_ci 1846bf215546Sopenharmony_ci if (format == PIPE_FORMAT_X24S8_UINT) 1847bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_8_24; 1848bf215546Sopenharmony_ci else 1849bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_24_8; 1850bf215546Sopenharmony_ci case PIPE_FORMAT_Z24X8_UNORM: 1851bf215546Sopenharmony_ci case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1852bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_8_24; 1853bf215546Sopenharmony_ci case PIPE_FORMAT_X8Z24_UNORM: 1854bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1855bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_24_8; 1856bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT: 1857bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_8; 1858bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT: 1859bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_32; 1860bf215546Sopenharmony_ci case PIPE_FORMAT_X32_S8X24_UINT: 1861bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1862bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1863bf215546Sopenharmony_ci default: 1864bf215546Sopenharmony_ci goto out_unknown; 1865bf215546Sopenharmony_ci } 1866bf215546Sopenharmony_ci 1867bf215546Sopenharmony_ci case UTIL_FORMAT_COLORSPACE_YUV: 1868bf215546Sopenharmony_ci goto out_unknown; /* TODO */ 1869bf215546Sopenharmony_ci 1870bf215546Sopenharmony_ci case UTIL_FORMAT_COLORSPACE_SRGB: 1871bf215546Sopenharmony_ci if (desc->nr_channels != 4 && desc->nr_channels != 1) 1872bf215546Sopenharmony_ci goto out_unknown; 1873bf215546Sopenharmony_ci break; 1874bf215546Sopenharmony_ci 1875bf215546Sopenharmony_ci default: 1876bf215546Sopenharmony_ci break; 1877bf215546Sopenharmony_ci } 1878bf215546Sopenharmony_ci 1879bf215546Sopenharmony_ci if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1880bf215546Sopenharmony_ci switch (format) { 1881bf215546Sopenharmony_ci case PIPE_FORMAT_RGTC1_SNORM: 1882bf215546Sopenharmony_ci case PIPE_FORMAT_LATC1_SNORM: 1883bf215546Sopenharmony_ci case PIPE_FORMAT_RGTC1_UNORM: 1884bf215546Sopenharmony_ci case PIPE_FORMAT_LATC1_UNORM: 1885bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_BC4; 1886bf215546Sopenharmony_ci case PIPE_FORMAT_RGTC2_SNORM: 1887bf215546Sopenharmony_ci case PIPE_FORMAT_LATC2_SNORM: 1888bf215546Sopenharmony_ci case PIPE_FORMAT_RGTC2_UNORM: 1889bf215546Sopenharmony_ci case PIPE_FORMAT_LATC2_UNORM: 1890bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_BC5; 1891bf215546Sopenharmony_ci default: 1892bf215546Sopenharmony_ci goto out_unknown; 1893bf215546Sopenharmony_ci } 1894bf215546Sopenharmony_ci } 1895bf215546Sopenharmony_ci 1896bf215546Sopenharmony_ci if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1897bf215546Sopenharmony_ci (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA10 || 1898bf215546Sopenharmony_ci sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2)) { 1899bf215546Sopenharmony_ci switch (format) { 1900bf215546Sopenharmony_ci case PIPE_FORMAT_ETC1_RGB8: 1901bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_RGB8: 1902bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_SRGB8: 1903bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1904bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_RGB8A1: 1905bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_SRGB8A1: 1906bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1907bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_RGBA8: 1908bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_SRGBA8: 1909bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1910bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_R11_UNORM: 1911bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_R11_SNORM: 1912bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1913bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_RG11_UNORM: 1914bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_RG11_SNORM: 1915bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1916bf215546Sopenharmony_ci default: 1917bf215546Sopenharmony_ci goto out_unknown; 1918bf215546Sopenharmony_ci } 1919bf215546Sopenharmony_ci } 1920bf215546Sopenharmony_ci 1921bf215546Sopenharmony_ci if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1922bf215546Sopenharmony_ci switch (format) { 1923bf215546Sopenharmony_ci case PIPE_FORMAT_BPTC_RGBA_UNORM: 1924bf215546Sopenharmony_ci case PIPE_FORMAT_BPTC_SRGBA: 1925bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_BC7; 1926bf215546Sopenharmony_ci case PIPE_FORMAT_BPTC_RGB_FLOAT: 1927bf215546Sopenharmony_ci case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1928bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_BC6; 1929bf215546Sopenharmony_ci default: 1930bf215546Sopenharmony_ci goto out_unknown; 1931bf215546Sopenharmony_ci } 1932bf215546Sopenharmony_ci } 1933bf215546Sopenharmony_ci 1934bf215546Sopenharmony_ci if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1935bf215546Sopenharmony_ci switch (format) { 1936bf215546Sopenharmony_ci case PIPE_FORMAT_R8G8_B8G8_UNORM: 1937bf215546Sopenharmony_ci case PIPE_FORMAT_G8R8_B8R8_UNORM: 1938bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_GB_GR; 1939bf215546Sopenharmony_ci case PIPE_FORMAT_G8R8_G8B8_UNORM: 1940bf215546Sopenharmony_ci case PIPE_FORMAT_R8G8_R8B8_UNORM: 1941bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_BG_RG; 1942bf215546Sopenharmony_ci default: 1943bf215546Sopenharmony_ci goto out_unknown; 1944bf215546Sopenharmony_ci } 1945bf215546Sopenharmony_ci } 1946bf215546Sopenharmony_ci 1947bf215546Sopenharmony_ci if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1948bf215546Sopenharmony_ci switch (format) { 1949bf215546Sopenharmony_ci case PIPE_FORMAT_DXT1_RGB: 1950bf215546Sopenharmony_ci case PIPE_FORMAT_DXT1_RGBA: 1951bf215546Sopenharmony_ci case PIPE_FORMAT_DXT1_SRGB: 1952bf215546Sopenharmony_ci case PIPE_FORMAT_DXT1_SRGBA: 1953bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_BC1; 1954bf215546Sopenharmony_ci case PIPE_FORMAT_DXT3_RGBA: 1955bf215546Sopenharmony_ci case PIPE_FORMAT_DXT3_SRGBA: 1956bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_BC2; 1957bf215546Sopenharmony_ci case PIPE_FORMAT_DXT5_RGBA: 1958bf215546Sopenharmony_ci case PIPE_FORMAT_DXT5_SRGBA: 1959bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_BC3; 1960bf215546Sopenharmony_ci default: 1961bf215546Sopenharmony_ci goto out_unknown; 1962bf215546Sopenharmony_ci } 1963bf215546Sopenharmony_ci } 1964bf215546Sopenharmony_ci 1965bf215546Sopenharmony_ci if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1966bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1967bf215546Sopenharmony_ci } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1968bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_10_11_11; 1969bf215546Sopenharmony_ci } 1970bf215546Sopenharmony_ci 1971bf215546Sopenharmony_ci /* Other "OTHER" layouts are unsupported. */ 1972bf215546Sopenharmony_ci if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER) 1973bf215546Sopenharmony_ci goto out_unknown; 1974bf215546Sopenharmony_ci 1975bf215546Sopenharmony_ci /* hw cannot support mixed formats (except depth/stencil, since only 1976bf215546Sopenharmony_ci * depth is read).*/ 1977bf215546Sopenharmony_ci if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1978bf215546Sopenharmony_ci goto out_unknown; 1979bf215546Sopenharmony_ci 1980bf215546Sopenharmony_ci if (first_non_void < 0 || first_non_void > 3) 1981bf215546Sopenharmony_ci goto out_unknown; 1982bf215546Sopenharmony_ci 1983bf215546Sopenharmony_ci /* Reject SCALED formats because we don't implement them for CB and do the same for texturing. */ 1984bf215546Sopenharmony_ci if ((desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED || 1985bf215546Sopenharmony_ci desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_SIGNED) && 1986bf215546Sopenharmony_ci !desc->channel[first_non_void].normalized && 1987bf215546Sopenharmony_ci !desc->channel[first_non_void].pure_integer) 1988bf215546Sopenharmony_ci goto out_unknown; 1989bf215546Sopenharmony_ci 1990bf215546Sopenharmony_ci /* Reject unsupported 32_*NORM and FIXED formats. */ 1991bf215546Sopenharmony_ci if (desc->channel[first_non_void].size == 32 && 1992bf215546Sopenharmony_ci (desc->channel[first_non_void].normalized || 1993bf215546Sopenharmony_ci desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_FIXED)) 1994bf215546Sopenharmony_ci goto out_unknown; 1995bf215546Sopenharmony_ci 1996bf215546Sopenharmony_ci /* This format fails on Gfx8/Carrizo´. */ 1997bf215546Sopenharmony_ci if (format == PIPE_FORMAT_A8R8_UNORM) 1998bf215546Sopenharmony_ci goto out_unknown; 1999bf215546Sopenharmony_ci 2000bf215546Sopenharmony_ci /* See whether the components are of the same size. */ 2001bf215546Sopenharmony_ci for (i = 1; i < desc->nr_channels; i++) { 2002bf215546Sopenharmony_ci uniform = uniform && desc->channel[0].size == desc->channel[i].size; 2003bf215546Sopenharmony_ci } 2004bf215546Sopenharmony_ci 2005bf215546Sopenharmony_ci /* Non-uniform formats. */ 2006bf215546Sopenharmony_ci if (!uniform) { 2007bf215546Sopenharmony_ci switch (desc->nr_channels) { 2008bf215546Sopenharmony_ci case 3: 2009bf215546Sopenharmony_ci if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && 2010bf215546Sopenharmony_ci desc->channel[2].size == 5) { 2011bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_5_6_5; 2012bf215546Sopenharmony_ci } 2013bf215546Sopenharmony_ci goto out_unknown; 2014bf215546Sopenharmony_ci case 4: 2015bf215546Sopenharmony_ci /* 5551 and 1555 UINT formats fail on Gfx8/Carrizo´. */ 2016bf215546Sopenharmony_ci if (desc->channel[1].size == 5 && 2017bf215546Sopenharmony_ci desc->channel[2].size == 5 && 2018bf215546Sopenharmony_ci desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED && 2019bf215546Sopenharmony_ci desc->channel[first_non_void].pure_integer) 2020bf215546Sopenharmony_ci goto out_unknown; 2021bf215546Sopenharmony_ci 2022bf215546Sopenharmony_ci if (desc->channel[0].size == 5 && desc->channel[1].size == 5 && 2023bf215546Sopenharmony_ci desc->channel[2].size == 5 && desc->channel[3].size == 1) { 2024bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 2025bf215546Sopenharmony_ci } 2026bf215546Sopenharmony_ci if (desc->channel[0].size == 1 && desc->channel[1].size == 5 && 2027bf215546Sopenharmony_ci desc->channel[2].size == 5 && desc->channel[3].size == 5) { 2028bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_5_5_5_1; 2029bf215546Sopenharmony_ci } 2030bf215546Sopenharmony_ci if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && 2031bf215546Sopenharmony_ci desc->channel[2].size == 10 && desc->channel[3].size == 2) { 2032bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 2033bf215546Sopenharmony_ci } 2034bf215546Sopenharmony_ci goto out_unknown; 2035bf215546Sopenharmony_ci } 2036bf215546Sopenharmony_ci goto out_unknown; 2037bf215546Sopenharmony_ci } 2038bf215546Sopenharmony_ci 2039bf215546Sopenharmony_ci /* uniform formats */ 2040bf215546Sopenharmony_ci switch (desc->channel[first_non_void].size) { 2041bf215546Sopenharmony_ci case 4: 2042bf215546Sopenharmony_ci switch (desc->nr_channels) { 2043bf215546Sopenharmony_ci case 4: 2044bf215546Sopenharmony_ci /* 4444 UINT formats fail on Gfx8/Carrizo´. */ 2045bf215546Sopenharmony_ci if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED && 2046bf215546Sopenharmony_ci desc->channel[first_non_void].pure_integer) 2047bf215546Sopenharmony_ci goto out_unknown; 2048bf215546Sopenharmony_ci 2049bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 2050bf215546Sopenharmony_ci } 2051bf215546Sopenharmony_ci break; 2052bf215546Sopenharmony_ci case 8: 2053bf215546Sopenharmony_ci switch (desc->nr_channels) { 2054bf215546Sopenharmony_ci case 1: 2055bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_8; 2056bf215546Sopenharmony_ci case 2: 2057bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_8_8; 2058bf215546Sopenharmony_ci case 4: 2059bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 2060bf215546Sopenharmony_ci } 2061bf215546Sopenharmony_ci break; 2062bf215546Sopenharmony_ci case 16: 2063bf215546Sopenharmony_ci switch (desc->nr_channels) { 2064bf215546Sopenharmony_ci case 1: 2065bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_16; 2066bf215546Sopenharmony_ci case 2: 2067bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_16_16; 2068bf215546Sopenharmony_ci case 4: 2069bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 2070bf215546Sopenharmony_ci } 2071bf215546Sopenharmony_ci break; 2072bf215546Sopenharmony_ci case 32: 2073bf215546Sopenharmony_ci switch (desc->nr_channels) { 2074bf215546Sopenharmony_ci case 1: 2075bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_32; 2076bf215546Sopenharmony_ci case 2: 2077bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_32_32; 2078bf215546Sopenharmony_ci#if 0 /* Not supported for render targets */ 2079bf215546Sopenharmony_ci case 3: 2080bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_32_32_32; 2081bf215546Sopenharmony_ci#endif 2082bf215546Sopenharmony_ci case 4: 2083bf215546Sopenharmony_ci return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 2084bf215546Sopenharmony_ci } 2085bf215546Sopenharmony_ci } 2086bf215546Sopenharmony_ci 2087bf215546Sopenharmony_ciout_unknown: 2088bf215546Sopenharmony_ci return ~0; 2089bf215546Sopenharmony_ci} 2090bf215546Sopenharmony_ci 2091bf215546Sopenharmony_cistatic unsigned is_wrap_mode_legal(struct si_screen *screen, unsigned wrap) 2092bf215546Sopenharmony_ci{ 2093bf215546Sopenharmony_ci if (!screen->info.has_3d_cube_border_color_mipmap) { 2094bf215546Sopenharmony_ci switch (wrap) { 2095bf215546Sopenharmony_ci case PIPE_TEX_WRAP_CLAMP: 2096bf215546Sopenharmony_ci case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 2097bf215546Sopenharmony_ci case PIPE_TEX_WRAP_MIRROR_CLAMP: 2098bf215546Sopenharmony_ci case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 2099bf215546Sopenharmony_ci return false; 2100bf215546Sopenharmony_ci } 2101bf215546Sopenharmony_ci } 2102bf215546Sopenharmony_ci return true; 2103bf215546Sopenharmony_ci} 2104bf215546Sopenharmony_ci 2105bf215546Sopenharmony_cistatic unsigned si_tex_wrap(unsigned wrap) 2106bf215546Sopenharmony_ci{ 2107bf215546Sopenharmony_ci switch (wrap) { 2108bf215546Sopenharmony_ci default: 2109bf215546Sopenharmony_ci case PIPE_TEX_WRAP_REPEAT: 2110bf215546Sopenharmony_ci return V_008F30_SQ_TEX_WRAP; 2111bf215546Sopenharmony_ci case PIPE_TEX_WRAP_CLAMP: 2112bf215546Sopenharmony_ci return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 2113bf215546Sopenharmony_ci case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 2114bf215546Sopenharmony_ci return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 2115bf215546Sopenharmony_ci case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 2116bf215546Sopenharmony_ci return V_008F30_SQ_TEX_CLAMP_BORDER; 2117bf215546Sopenharmony_ci case PIPE_TEX_WRAP_MIRROR_REPEAT: 2118bf215546Sopenharmony_ci return V_008F30_SQ_TEX_MIRROR; 2119bf215546Sopenharmony_ci case PIPE_TEX_WRAP_MIRROR_CLAMP: 2120bf215546Sopenharmony_ci return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 2121bf215546Sopenharmony_ci case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 2122bf215546Sopenharmony_ci return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 2123bf215546Sopenharmony_ci case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 2124bf215546Sopenharmony_ci return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 2125bf215546Sopenharmony_ci } 2126bf215546Sopenharmony_ci} 2127bf215546Sopenharmony_ci 2128bf215546Sopenharmony_cistatic unsigned si_tex_mipfilter(unsigned filter) 2129bf215546Sopenharmony_ci{ 2130bf215546Sopenharmony_ci switch (filter) { 2131bf215546Sopenharmony_ci case PIPE_TEX_MIPFILTER_NEAREST: 2132bf215546Sopenharmony_ci return V_008F38_SQ_TEX_Z_FILTER_POINT; 2133bf215546Sopenharmony_ci case PIPE_TEX_MIPFILTER_LINEAR: 2134bf215546Sopenharmony_ci return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 2135bf215546Sopenharmony_ci default: 2136bf215546Sopenharmony_ci case PIPE_TEX_MIPFILTER_NONE: 2137bf215546Sopenharmony_ci return V_008F38_SQ_TEX_Z_FILTER_NONE; 2138bf215546Sopenharmony_ci } 2139bf215546Sopenharmony_ci} 2140bf215546Sopenharmony_ci 2141bf215546Sopenharmony_cistatic unsigned si_tex_compare(unsigned compare) 2142bf215546Sopenharmony_ci{ 2143bf215546Sopenharmony_ci switch (compare) { 2144bf215546Sopenharmony_ci default: 2145bf215546Sopenharmony_ci case PIPE_FUNC_NEVER: 2146bf215546Sopenharmony_ci return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 2147bf215546Sopenharmony_ci case PIPE_FUNC_LESS: 2148bf215546Sopenharmony_ci return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 2149bf215546Sopenharmony_ci case PIPE_FUNC_EQUAL: 2150bf215546Sopenharmony_ci return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 2151bf215546Sopenharmony_ci case PIPE_FUNC_LEQUAL: 2152bf215546Sopenharmony_ci return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 2153bf215546Sopenharmony_ci case PIPE_FUNC_GREATER: 2154bf215546Sopenharmony_ci return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 2155bf215546Sopenharmony_ci case PIPE_FUNC_NOTEQUAL: 2156bf215546Sopenharmony_ci return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 2157bf215546Sopenharmony_ci case PIPE_FUNC_GEQUAL: 2158bf215546Sopenharmony_ci return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 2159bf215546Sopenharmony_ci case PIPE_FUNC_ALWAYS: 2160bf215546Sopenharmony_ci return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 2161bf215546Sopenharmony_ci } 2162bf215546Sopenharmony_ci} 2163bf215546Sopenharmony_ci 2164bf215546Sopenharmony_cistatic unsigned si_tex_dim(struct si_screen *sscreen, struct si_texture *tex, unsigned view_target, 2165bf215546Sopenharmony_ci unsigned nr_samples) 2166bf215546Sopenharmony_ci{ 2167bf215546Sopenharmony_ci unsigned res_target = tex->buffer.b.b.target; 2168bf215546Sopenharmony_ci 2169bf215546Sopenharmony_ci if (view_target == PIPE_TEXTURE_CUBE || view_target == PIPE_TEXTURE_CUBE_ARRAY) 2170bf215546Sopenharmony_ci res_target = view_target; 2171bf215546Sopenharmony_ci /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 2172bf215546Sopenharmony_ci else if (res_target == PIPE_TEXTURE_CUBE || res_target == PIPE_TEXTURE_CUBE_ARRAY) 2173bf215546Sopenharmony_ci res_target = PIPE_TEXTURE_2D_ARRAY; 2174bf215546Sopenharmony_ci 2175bf215546Sopenharmony_ci /* GFX9 allocates 1D textures as 2D. */ 2176bf215546Sopenharmony_ci if ((res_target == PIPE_TEXTURE_1D || res_target == PIPE_TEXTURE_1D_ARRAY) && 2177bf215546Sopenharmony_ci sscreen->info.gfx_level == GFX9 && 2178bf215546Sopenharmony_ci tex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) { 2179bf215546Sopenharmony_ci if (res_target == PIPE_TEXTURE_1D) 2180bf215546Sopenharmony_ci res_target = PIPE_TEXTURE_2D; 2181bf215546Sopenharmony_ci else 2182bf215546Sopenharmony_ci res_target = PIPE_TEXTURE_2D_ARRAY; 2183bf215546Sopenharmony_ci } 2184bf215546Sopenharmony_ci 2185bf215546Sopenharmony_ci switch (res_target) { 2186bf215546Sopenharmony_ci default: 2187bf215546Sopenharmony_ci case PIPE_TEXTURE_1D: 2188bf215546Sopenharmony_ci return V_008F1C_SQ_RSRC_IMG_1D; 2189bf215546Sopenharmony_ci case PIPE_TEXTURE_1D_ARRAY: 2190bf215546Sopenharmony_ci return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 2191bf215546Sopenharmony_ci case PIPE_TEXTURE_2D: 2192bf215546Sopenharmony_ci case PIPE_TEXTURE_RECT: 2193bf215546Sopenharmony_ci return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : V_008F1C_SQ_RSRC_IMG_2D; 2194bf215546Sopenharmony_ci case PIPE_TEXTURE_2D_ARRAY: 2195bf215546Sopenharmony_ci return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 2196bf215546Sopenharmony_ci case PIPE_TEXTURE_3D: 2197bf215546Sopenharmony_ci return V_008F1C_SQ_RSRC_IMG_3D; 2198bf215546Sopenharmony_ci case PIPE_TEXTURE_CUBE: 2199bf215546Sopenharmony_ci case PIPE_TEXTURE_CUBE_ARRAY: 2200bf215546Sopenharmony_ci return V_008F1C_SQ_RSRC_IMG_CUBE; 2201bf215546Sopenharmony_ci } 2202bf215546Sopenharmony_ci} 2203bf215546Sopenharmony_ci 2204bf215546Sopenharmony_ci/* 2205bf215546Sopenharmony_ci * Format support testing 2206bf215546Sopenharmony_ci */ 2207bf215546Sopenharmony_ci 2208bf215546Sopenharmony_cistatic bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 2209bf215546Sopenharmony_ci{ 2210bf215546Sopenharmony_ci struct si_screen *sscreen = (struct si_screen *)screen; 2211bf215546Sopenharmony_ci const struct util_format_description *desc = util_format_description(format); 2212bf215546Sopenharmony_ci 2213bf215546Sopenharmony_ci /* Samplers don't support 64 bits per channel. */ 2214bf215546Sopenharmony_ci if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 2215bf215546Sopenharmony_ci desc->channel[0].size == 64) 2216bf215546Sopenharmony_ci return false; 2217bf215546Sopenharmony_ci 2218bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX10) { 2219bf215546Sopenharmony_ci const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&sscreen->info)[format]; 2220bf215546Sopenharmony_ci if (!fmt->img_format || fmt->buffers_only) 2221bf215546Sopenharmony_ci return false; 2222bf215546Sopenharmony_ci return true; 2223bf215546Sopenharmony_ci } 2224bf215546Sopenharmony_ci 2225bf215546Sopenharmony_ci return si_translate_texformat(screen, format, desc, 2226bf215546Sopenharmony_ci util_format_get_first_non_void_channel(format)) != ~0U; 2227bf215546Sopenharmony_ci} 2228bf215546Sopenharmony_ci 2229bf215546Sopenharmony_cistatic uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 2230bf215546Sopenharmony_ci const struct util_format_description *desc, 2231bf215546Sopenharmony_ci int first_non_void) 2232bf215546Sopenharmony_ci{ 2233bf215546Sopenharmony_ci int i; 2234bf215546Sopenharmony_ci 2235bf215546Sopenharmony_ci assert(((struct si_screen *)screen)->info.gfx_level <= GFX9); 2236bf215546Sopenharmony_ci 2237bf215546Sopenharmony_ci if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 2238bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_10_11_11; 2239bf215546Sopenharmony_ci 2240bf215546Sopenharmony_ci assert(first_non_void >= 0); 2241bf215546Sopenharmony_ci 2242bf215546Sopenharmony_ci if (desc->nr_channels == 4 && desc->channel[0].size == 10 && desc->channel[1].size == 10 && 2243bf215546Sopenharmony_ci desc->channel[2].size == 10 && desc->channel[3].size == 2) 2244bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 2245bf215546Sopenharmony_ci 2246bf215546Sopenharmony_ci /* See whether the components are of the same size. */ 2247bf215546Sopenharmony_ci for (i = 0; i < desc->nr_channels; i++) { 2248bf215546Sopenharmony_ci if (desc->channel[first_non_void].size != desc->channel[i].size) 2249bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_INVALID; 2250bf215546Sopenharmony_ci } 2251bf215546Sopenharmony_ci 2252bf215546Sopenharmony_ci switch (desc->channel[first_non_void].size) { 2253bf215546Sopenharmony_ci case 8: 2254bf215546Sopenharmony_ci switch (desc->nr_channels) { 2255bf215546Sopenharmony_ci case 1: 2256bf215546Sopenharmony_ci case 3: /* 3 loads */ 2257bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_8; 2258bf215546Sopenharmony_ci case 2: 2259bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_8_8; 2260bf215546Sopenharmony_ci case 4: 2261bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 2262bf215546Sopenharmony_ci } 2263bf215546Sopenharmony_ci break; 2264bf215546Sopenharmony_ci case 16: 2265bf215546Sopenharmony_ci switch (desc->nr_channels) { 2266bf215546Sopenharmony_ci case 1: 2267bf215546Sopenharmony_ci case 3: /* 3 loads */ 2268bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_16; 2269bf215546Sopenharmony_ci case 2: 2270bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_16_16; 2271bf215546Sopenharmony_ci case 4: 2272bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 2273bf215546Sopenharmony_ci } 2274bf215546Sopenharmony_ci break; 2275bf215546Sopenharmony_ci case 32: 2276bf215546Sopenharmony_ci switch (desc->nr_channels) { 2277bf215546Sopenharmony_ci case 1: 2278bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_32; 2279bf215546Sopenharmony_ci case 2: 2280bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_32_32; 2281bf215546Sopenharmony_ci case 3: 2282bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_32_32_32; 2283bf215546Sopenharmony_ci case 4: 2284bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2285bf215546Sopenharmony_ci } 2286bf215546Sopenharmony_ci break; 2287bf215546Sopenharmony_ci case 64: 2288bf215546Sopenharmony_ci /* Legacy double formats. */ 2289bf215546Sopenharmony_ci switch (desc->nr_channels) { 2290bf215546Sopenharmony_ci case 1: /* 1 load */ 2291bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_32_32; 2292bf215546Sopenharmony_ci case 2: /* 1 load */ 2293bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2294bf215546Sopenharmony_ci case 3: /* 3 loads */ 2295bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_32_32; 2296bf215546Sopenharmony_ci case 4: /* 2 loads */ 2297bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2298bf215546Sopenharmony_ci } 2299bf215546Sopenharmony_ci break; 2300bf215546Sopenharmony_ci } 2301bf215546Sopenharmony_ci 2302bf215546Sopenharmony_ci return V_008F0C_BUF_DATA_FORMAT_INVALID; 2303bf215546Sopenharmony_ci} 2304bf215546Sopenharmony_ci 2305bf215546Sopenharmony_cistatic uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 2306bf215546Sopenharmony_ci const struct util_format_description *desc, 2307bf215546Sopenharmony_ci int first_non_void) 2308bf215546Sopenharmony_ci{ 2309bf215546Sopenharmony_ci assert(((struct si_screen *)screen)->info.gfx_level <= GFX9); 2310bf215546Sopenharmony_ci 2311bf215546Sopenharmony_ci if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 2312bf215546Sopenharmony_ci return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2313bf215546Sopenharmony_ci 2314bf215546Sopenharmony_ci assert(first_non_void >= 0); 2315bf215546Sopenharmony_ci 2316bf215546Sopenharmony_ci switch (desc->channel[first_non_void].type) { 2317bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_SIGNED: 2318bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_FIXED: 2319bf215546Sopenharmony_ci if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer) 2320bf215546Sopenharmony_ci return V_008F0C_BUF_NUM_FORMAT_SINT; 2321bf215546Sopenharmony_ci else if (desc->channel[first_non_void].normalized) 2322bf215546Sopenharmony_ci return V_008F0C_BUF_NUM_FORMAT_SNORM; 2323bf215546Sopenharmony_ci else 2324bf215546Sopenharmony_ci return V_008F0C_BUF_NUM_FORMAT_SSCALED; 2325bf215546Sopenharmony_ci break; 2326bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_UNSIGNED: 2327bf215546Sopenharmony_ci if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer) 2328bf215546Sopenharmony_ci return V_008F0C_BUF_NUM_FORMAT_UINT; 2329bf215546Sopenharmony_ci else if (desc->channel[first_non_void].normalized) 2330bf215546Sopenharmony_ci return V_008F0C_BUF_NUM_FORMAT_UNORM; 2331bf215546Sopenharmony_ci else 2332bf215546Sopenharmony_ci return V_008F0C_BUF_NUM_FORMAT_USCALED; 2333bf215546Sopenharmony_ci break; 2334bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_FLOAT: 2335bf215546Sopenharmony_ci default: 2336bf215546Sopenharmony_ci return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2337bf215546Sopenharmony_ci } 2338bf215546Sopenharmony_ci} 2339bf215546Sopenharmony_ci 2340bf215546Sopenharmony_cistatic unsigned si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format, 2341bf215546Sopenharmony_ci unsigned usage) 2342bf215546Sopenharmony_ci{ 2343bf215546Sopenharmony_ci struct si_screen *sscreen = (struct si_screen *)screen; 2344bf215546Sopenharmony_ci const struct util_format_description *desc; 2345bf215546Sopenharmony_ci int first_non_void; 2346bf215546Sopenharmony_ci unsigned data_format; 2347bf215546Sopenharmony_ci 2348bf215546Sopenharmony_ci assert((usage & ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) == 2349bf215546Sopenharmony_ci 0); 2350bf215546Sopenharmony_ci 2351bf215546Sopenharmony_ci desc = util_format_description(format); 2352bf215546Sopenharmony_ci 2353bf215546Sopenharmony_ci /* There are no native 8_8_8 or 16_16_16 data formats, and we currently 2354bf215546Sopenharmony_ci * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well 2355bf215546Sopenharmony_ci * for read-only access (with caveats surrounding bounds checks), but 2356bf215546Sopenharmony_ci * obviously fails for write access which we have to implement for 2357bf215546Sopenharmony_ci * shader images. Luckily, OpenGL doesn't expect this to be supported 2358bf215546Sopenharmony_ci * anyway, and so the only impact is on PBO uploads / downloads, which 2359bf215546Sopenharmony_ci * shouldn't be expected to be fast for GL_RGB anyway. 2360bf215546Sopenharmony_ci */ 2361bf215546Sopenharmony_ci if (desc->block.bits == 3 * 8 || desc->block.bits == 3 * 16) { 2362bf215546Sopenharmony_ci if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) { 2363bf215546Sopenharmony_ci usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW); 2364bf215546Sopenharmony_ci if (!usage) 2365bf215546Sopenharmony_ci return 0; 2366bf215546Sopenharmony_ci } 2367bf215546Sopenharmony_ci } 2368bf215546Sopenharmony_ci 2369bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX10) { 2370bf215546Sopenharmony_ci const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&sscreen->info)[format]; 2371bf215546Sopenharmony_ci if (!fmt->img_format || fmt->img_format >= 128) 2372bf215546Sopenharmony_ci return 0; 2373bf215546Sopenharmony_ci return usage; 2374bf215546Sopenharmony_ci } 2375bf215546Sopenharmony_ci 2376bf215546Sopenharmony_ci first_non_void = util_format_get_first_non_void_channel(format); 2377bf215546Sopenharmony_ci data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 2378bf215546Sopenharmony_ci if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID) 2379bf215546Sopenharmony_ci return 0; 2380bf215546Sopenharmony_ci 2381bf215546Sopenharmony_ci return usage; 2382bf215546Sopenharmony_ci} 2383bf215546Sopenharmony_ci 2384bf215546Sopenharmony_cistatic bool si_is_colorbuffer_format_supported(enum amd_gfx_level gfx_level, 2385bf215546Sopenharmony_ci enum pipe_format format) 2386bf215546Sopenharmony_ci{ 2387bf215546Sopenharmony_ci return si_translate_colorformat(gfx_level, format) != V_028C70_COLOR_INVALID && 2388bf215546Sopenharmony_ci si_translate_colorswap(gfx_level, format, false) != ~0U; 2389bf215546Sopenharmony_ci} 2390bf215546Sopenharmony_ci 2391bf215546Sopenharmony_cistatic bool si_is_zs_format_supported(enum pipe_format format) 2392bf215546Sopenharmony_ci{ 2393bf215546Sopenharmony_ci return si_translate_dbformat(format) != V_028040_Z_INVALID; 2394bf215546Sopenharmony_ci} 2395bf215546Sopenharmony_ci 2396bf215546Sopenharmony_cistatic bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format format, 2397bf215546Sopenharmony_ci enum pipe_texture_target target, unsigned sample_count, 2398bf215546Sopenharmony_ci unsigned storage_sample_count, unsigned usage) 2399bf215546Sopenharmony_ci{ 2400bf215546Sopenharmony_ci struct si_screen *sscreen = (struct si_screen *)screen; 2401bf215546Sopenharmony_ci unsigned retval = 0; 2402bf215546Sopenharmony_ci 2403bf215546Sopenharmony_ci if (target >= PIPE_MAX_TEXTURE_TYPES) { 2404bf215546Sopenharmony_ci PRINT_ERR("radeonsi: unsupported texture type %d\n", target); 2405bf215546Sopenharmony_ci return false; 2406bf215546Sopenharmony_ci } 2407bf215546Sopenharmony_ci 2408bf215546Sopenharmony_ci /* Require PIPE_BIND_SAMPLER_VIEW support when PIPE_BIND_RENDER_TARGET 2409bf215546Sopenharmony_ci * is requested. 2410bf215546Sopenharmony_ci */ 2411bf215546Sopenharmony_ci if (usage & PIPE_BIND_RENDER_TARGET) 2412bf215546Sopenharmony_ci usage |= PIPE_BIND_SAMPLER_VIEW; 2413bf215546Sopenharmony_ci 2414bf215546Sopenharmony_ci if ((target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE) && 2415bf215546Sopenharmony_ci !sscreen->info.has_3d_cube_border_color_mipmap) 2416bf215546Sopenharmony_ci return false; 2417bf215546Sopenharmony_ci 2418bf215546Sopenharmony_ci if (util_format_get_num_planes(format) >= 2) 2419bf215546Sopenharmony_ci return false; 2420bf215546Sopenharmony_ci 2421bf215546Sopenharmony_ci if (MAX2(1, sample_count) < MAX2(1, storage_sample_count)) 2422bf215546Sopenharmony_ci return false; 2423bf215546Sopenharmony_ci 2424bf215546Sopenharmony_ci if (sample_count > 1) { 2425bf215546Sopenharmony_ci if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 2426bf215546Sopenharmony_ci return false; 2427bf215546Sopenharmony_ci 2428bf215546Sopenharmony_ci /* Only power-of-two sample counts are supported. */ 2429bf215546Sopenharmony_ci if (!util_is_power_of_two_or_zero(sample_count) || 2430bf215546Sopenharmony_ci !util_is_power_of_two_or_zero(storage_sample_count)) 2431bf215546Sopenharmony_ci return false; 2432bf215546Sopenharmony_ci 2433bf215546Sopenharmony_ci /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate, 2434bf215546Sopenharmony_ci * so don't expose 16 samples there. 2435bf215546Sopenharmony_ci */ 2436bf215546Sopenharmony_ci const unsigned max_eqaa_samples = util_bitcount(sscreen->info.enabled_rb_mask) <= 1 ? 8 : 16; 2437bf215546Sopenharmony_ci const unsigned max_samples = 8; 2438bf215546Sopenharmony_ci 2439bf215546Sopenharmony_ci /* MSAA support without framebuffer attachments. */ 2440bf215546Sopenharmony_ci if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples) 2441bf215546Sopenharmony_ci return true; 2442bf215546Sopenharmony_ci 2443bf215546Sopenharmony_ci if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) { 2444bf215546Sopenharmony_ci /* Color without EQAA or depth/stencil. */ 2445bf215546Sopenharmony_ci if (sample_count > max_samples || sample_count != storage_sample_count) 2446bf215546Sopenharmony_ci return false; 2447bf215546Sopenharmony_ci } else { 2448bf215546Sopenharmony_ci /* Color with EQAA. */ 2449bf215546Sopenharmony_ci if (sample_count > max_eqaa_samples || storage_sample_count > max_samples) 2450bf215546Sopenharmony_ci return false; 2451bf215546Sopenharmony_ci } 2452bf215546Sopenharmony_ci } 2453bf215546Sopenharmony_ci 2454bf215546Sopenharmony_ci if (usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) { 2455bf215546Sopenharmony_ci if (target == PIPE_BUFFER) { 2456bf215546Sopenharmony_ci retval |= si_is_vertex_format_supported( 2457bf215546Sopenharmony_ci screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)); 2458bf215546Sopenharmony_ci } else { 2459bf215546Sopenharmony_ci if (si_is_sampler_format_supported(screen, format)) 2460bf215546Sopenharmony_ci retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE); 2461bf215546Sopenharmony_ci } 2462bf215546Sopenharmony_ci } 2463bf215546Sopenharmony_ci 2464bf215546Sopenharmony_ci if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | 2465bf215546Sopenharmony_ci PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) && 2466bf215546Sopenharmony_ci si_is_colorbuffer_format_supported(sscreen->info.gfx_level, format)) { 2467bf215546Sopenharmony_ci retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | 2468bf215546Sopenharmony_ci PIPE_BIND_SHARED); 2469bf215546Sopenharmony_ci if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format)) 2470bf215546Sopenharmony_ci retval |= usage & PIPE_BIND_BLENDABLE; 2471bf215546Sopenharmony_ci } 2472bf215546Sopenharmony_ci 2473bf215546Sopenharmony_ci if ((usage & PIPE_BIND_DEPTH_STENCIL) && si_is_zs_format_supported(format)) { 2474bf215546Sopenharmony_ci retval |= PIPE_BIND_DEPTH_STENCIL; 2475bf215546Sopenharmony_ci } 2476bf215546Sopenharmony_ci 2477bf215546Sopenharmony_ci if (usage & PIPE_BIND_VERTEX_BUFFER) { 2478bf215546Sopenharmony_ci retval |= si_is_vertex_format_supported(screen, format, PIPE_BIND_VERTEX_BUFFER); 2479bf215546Sopenharmony_ci } 2480bf215546Sopenharmony_ci 2481bf215546Sopenharmony_ci if (usage & PIPE_BIND_INDEX_BUFFER) { 2482bf215546Sopenharmony_ci if (format == PIPE_FORMAT_R8_UINT || 2483bf215546Sopenharmony_ci format == PIPE_FORMAT_R16_UINT || 2484bf215546Sopenharmony_ci format == PIPE_FORMAT_R32_UINT) 2485bf215546Sopenharmony_ci retval |= PIPE_BIND_INDEX_BUFFER; 2486bf215546Sopenharmony_ci } 2487bf215546Sopenharmony_ci 2488bf215546Sopenharmony_ci if ((usage & PIPE_BIND_LINEAR) && !util_format_is_compressed(format) && 2489bf215546Sopenharmony_ci !(usage & PIPE_BIND_DEPTH_STENCIL)) 2490bf215546Sopenharmony_ci retval |= PIPE_BIND_LINEAR; 2491bf215546Sopenharmony_ci 2492bf215546Sopenharmony_ci return retval == usage; 2493bf215546Sopenharmony_ci} 2494bf215546Sopenharmony_ci 2495bf215546Sopenharmony_ci/* 2496bf215546Sopenharmony_ci * framebuffer handling 2497bf215546Sopenharmony_ci */ 2498bf215546Sopenharmony_ci 2499bf215546Sopenharmony_cistatic void si_choose_spi_color_formats(struct si_surface *surf, unsigned format, unsigned swap, 2500bf215546Sopenharmony_ci unsigned ntype, bool is_depth) 2501bf215546Sopenharmony_ci{ 2502bf215546Sopenharmony_ci struct ac_spi_color_formats formats = {}; 2503bf215546Sopenharmony_ci 2504bf215546Sopenharmony_ci ac_choose_spi_color_formats(format, swap, ntype, is_depth, true, &formats); 2505bf215546Sopenharmony_ci 2506bf215546Sopenharmony_ci surf->spi_shader_col_format = formats.normal; 2507bf215546Sopenharmony_ci surf->spi_shader_col_format_alpha = formats.alpha; 2508bf215546Sopenharmony_ci surf->spi_shader_col_format_blend = formats.blend; 2509bf215546Sopenharmony_ci surf->spi_shader_col_format_blend_alpha = formats.blend_alpha; 2510bf215546Sopenharmony_ci} 2511bf215546Sopenharmony_ci 2512bf215546Sopenharmony_cistatic void si_initialize_color_surface(struct si_context *sctx, struct si_surface *surf) 2513bf215546Sopenharmony_ci{ 2514bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)surf->base.texture; 2515bf215546Sopenharmony_ci unsigned color_info, color_attrib; 2516bf215546Sopenharmony_ci unsigned format, swap, ntype, endian; 2517bf215546Sopenharmony_ci const struct util_format_description *desc; 2518bf215546Sopenharmony_ci int firstchan; 2519bf215546Sopenharmony_ci unsigned blend_clamp = 0, blend_bypass = 0; 2520bf215546Sopenharmony_ci 2521bf215546Sopenharmony_ci desc = util_format_description(surf->base.format); 2522bf215546Sopenharmony_ci for (firstchan = 0; firstchan < 4; firstchan++) { 2523bf215546Sopenharmony_ci if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) { 2524bf215546Sopenharmony_ci break; 2525bf215546Sopenharmony_ci } 2526bf215546Sopenharmony_ci } 2527bf215546Sopenharmony_ci if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) { 2528bf215546Sopenharmony_ci ntype = V_028C70_NUMBER_FLOAT; 2529bf215546Sopenharmony_ci } else { 2530bf215546Sopenharmony_ci ntype = V_028C70_NUMBER_UNORM; 2531bf215546Sopenharmony_ci if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2532bf215546Sopenharmony_ci ntype = V_028C70_NUMBER_SRGB; 2533bf215546Sopenharmony_ci else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) { 2534bf215546Sopenharmony_ci if (desc->channel[firstchan].pure_integer) { 2535bf215546Sopenharmony_ci ntype = V_028C70_NUMBER_SINT; 2536bf215546Sopenharmony_ci } else { 2537bf215546Sopenharmony_ci assert(desc->channel[firstchan].normalized); 2538bf215546Sopenharmony_ci ntype = V_028C70_NUMBER_SNORM; 2539bf215546Sopenharmony_ci } 2540bf215546Sopenharmony_ci } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2541bf215546Sopenharmony_ci if (desc->channel[firstchan].pure_integer) { 2542bf215546Sopenharmony_ci ntype = V_028C70_NUMBER_UINT; 2543bf215546Sopenharmony_ci } else { 2544bf215546Sopenharmony_ci assert(desc->channel[firstchan].normalized); 2545bf215546Sopenharmony_ci ntype = V_028C70_NUMBER_UNORM; 2546bf215546Sopenharmony_ci } 2547bf215546Sopenharmony_ci } 2548bf215546Sopenharmony_ci } 2549bf215546Sopenharmony_ci 2550bf215546Sopenharmony_ci format = si_translate_colorformat(sctx->gfx_level, surf->base.format); 2551bf215546Sopenharmony_ci if (format == V_028C70_COLOR_INVALID) { 2552bf215546Sopenharmony_ci PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2553bf215546Sopenharmony_ci } 2554bf215546Sopenharmony_ci assert(format != V_028C70_COLOR_INVALID); 2555bf215546Sopenharmony_ci swap = si_translate_colorswap(sctx->gfx_level, surf->base.format, false); 2556bf215546Sopenharmony_ci endian = si_colorformat_endian_swap(format); 2557bf215546Sopenharmony_ci 2558bf215546Sopenharmony_ci /* blend clamp should be set for all NORM/SRGB types */ 2559bf215546Sopenharmony_ci if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || 2560bf215546Sopenharmony_ci ntype == V_028C70_NUMBER_SRGB) 2561bf215546Sopenharmony_ci blend_clamp = 1; 2562bf215546Sopenharmony_ci 2563bf215546Sopenharmony_ci /* set blend bypass according to docs if SINT/UINT or 2564bf215546Sopenharmony_ci 8/24 COLOR variants */ 2565bf215546Sopenharmony_ci if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2566bf215546Sopenharmony_ci format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2567bf215546Sopenharmony_ci format == V_028C70_COLOR_X24_8_32_FLOAT) { 2568bf215546Sopenharmony_ci blend_clamp = 0; 2569bf215546Sopenharmony_ci blend_bypass = 1; 2570bf215546Sopenharmony_ci } 2571bf215546Sopenharmony_ci 2572bf215546Sopenharmony_ci if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) { 2573bf215546Sopenharmony_ci if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_8_8 || 2574bf215546Sopenharmony_ci format == V_028C70_COLOR_8_8_8_8) 2575bf215546Sopenharmony_ci surf->color_is_int8 = true; 2576bf215546Sopenharmony_ci else if (format == V_028C70_COLOR_10_10_10_2 || format == V_028C70_COLOR_2_10_10_10) 2577bf215546Sopenharmony_ci surf->color_is_int10 = true; 2578bf215546Sopenharmony_ci } 2579bf215546Sopenharmony_ci 2580bf215546Sopenharmony_ci color_info = 2581bf215546Sopenharmony_ci S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | 2582bf215546Sopenharmony_ci S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) | 2583bf215546Sopenharmony_ci S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM && 2584bf215546Sopenharmony_ci ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 && 2585bf215546Sopenharmony_ci format != V_028C70_COLOR_24_8) | 2586bf215546Sopenharmony_ci S_028C70_NUMBER_TYPE(ntype); 2587bf215546Sopenharmony_ci 2588bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 2589bf215546Sopenharmony_ci assert(!SI_BIG_ENDIAN); 2590bf215546Sopenharmony_ci color_info |= S_028C70_FORMAT_GFX11(format); 2591bf215546Sopenharmony_ci } else { 2592bf215546Sopenharmony_ci color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian); 2593bf215546Sopenharmony_ci } 2594bf215546Sopenharmony_ci 2595bf215546Sopenharmony_ci /* Intensity is implemented as Red, so treat it that way. */ 2596bf215546Sopenharmony_ci color_attrib = sctx->gfx_level >= GFX11 ? 2597bf215546Sopenharmony_ci S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1 || util_format_is_intensity(surf->base.format)): 2598bf215546Sopenharmony_ci S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1 || util_format_is_intensity(surf->base.format)); 2599bf215546Sopenharmony_ci 2600bf215546Sopenharmony_ci if (tex->buffer.b.b.nr_samples > 1) { 2601bf215546Sopenharmony_ci unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples); 2602bf215546Sopenharmony_ci unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples); 2603bf215546Sopenharmony_ci 2604bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 2605bf215546Sopenharmony_ci color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_fragments); 2606bf215546Sopenharmony_ci } else { 2607bf215546Sopenharmony_ci color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_fragments); 2608bf215546Sopenharmony_ci 2609bf215546Sopenharmony_ci if (tex->surface.fmask_offset) { 2610bf215546Sopenharmony_ci color_info |= S_028C70_COMPRESSION(1); 2611bf215546Sopenharmony_ci unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.color.fmask.bankh); 2612bf215546Sopenharmony_ci 2613bf215546Sopenharmony_ci if (sctx->gfx_level == GFX6) { 2614bf215546Sopenharmony_ci /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */ 2615bf215546Sopenharmony_ci color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2616bf215546Sopenharmony_ci } 2617bf215546Sopenharmony_ci } 2618bf215546Sopenharmony_ci } 2619bf215546Sopenharmony_ci } 2620bf215546Sopenharmony_ci 2621bf215546Sopenharmony_ci /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and 2622bf215546Sopenharmony_ci * 64 for APU because all of our APUs to date use DIMMs which have 2623bf215546Sopenharmony_ci * a request granularity size of 64B while all other chips have a 2624bf215546Sopenharmony_ci * 32B request size */ 2625bf215546Sopenharmony_ci unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; 2626bf215546Sopenharmony_ci if (!sctx->screen->info.has_dedicated_vram) 2627bf215546Sopenharmony_ci min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B; 2628bf215546Sopenharmony_ci 2629bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 2630bf215546Sopenharmony_ci surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | 2631bf215546Sopenharmony_ci S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) | 2632bf215546Sopenharmony_ci S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 2633bf215546Sopenharmony_ci S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_64B_blocks); 2634bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) 2635bf215546Sopenharmony_ci surf->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(tex->surface.u.gfx9.color.dcc.independent_128B_blocks); 2636bf215546Sopenharmony_ci else 2637bf215546Sopenharmony_ci surf->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(tex->surface.u.gfx9.color.dcc.independent_128B_blocks); 2638bf215546Sopenharmony_ci } else if (sctx->gfx_level >= GFX8) { 2639bf215546Sopenharmony_ci unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; 2640bf215546Sopenharmony_ci 2641bf215546Sopenharmony_ci if (tex->buffer.b.b.nr_storage_samples > 1) { 2642bf215546Sopenharmony_ci if (tex->surface.bpe == 1) 2643bf215546Sopenharmony_ci max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2644bf215546Sopenharmony_ci else if (tex->surface.bpe == 2) 2645bf215546Sopenharmony_ci max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 2646bf215546Sopenharmony_ci } 2647bf215546Sopenharmony_ci 2648bf215546Sopenharmony_ci surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2649bf215546Sopenharmony_ci S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 2650bf215546Sopenharmony_ci S_028C78_INDEPENDENT_64B_BLOCKS(1); 2651bf215546Sopenharmony_ci } 2652bf215546Sopenharmony_ci 2653bf215546Sopenharmony_ci /* This must be set for fast clear to work without FMASK. */ 2654bf215546Sopenharmony_ci if (!tex->surface.fmask_size && sctx->gfx_level == GFX6) { 2655bf215546Sopenharmony_ci unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh); 2656bf215546Sopenharmony_ci color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2657bf215546Sopenharmony_ci } 2658bf215546Sopenharmony_ci 2659bf215546Sopenharmony_ci /* GFX10 field has the same base shift as the GFX6 field */ 2660bf215546Sopenharmony_ci unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2661bf215546Sopenharmony_ci S_028C6C_SLICE_MAX_GFX10(surf->base.u.tex.last_layer); 2662bf215546Sopenharmony_ci unsigned mip0_width = surf->width0 - 1; 2663bf215546Sopenharmony_ci unsigned mip0_height = surf->height0 - 1; 2664bf215546Sopenharmony_ci unsigned mip0_depth = util_max_layer(&tex->buffer.b.b, 0); 2665bf215546Sopenharmony_ci 2666bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 2667bf215546Sopenharmony_ci color_view |= S_028C6C_MIP_LEVEL_GFX10(surf->base.u.tex.level); 2668bf215546Sopenharmony_ci 2669bf215546Sopenharmony_ci surf->cb_color_attrib3 = S_028EE0_MIP0_DEPTH(mip0_depth) | 2670bf215546Sopenharmony_ci S_028EE0_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type) | 2671bf215546Sopenharmony_ci S_028EE0_RESOURCE_LEVEL(sctx->gfx_level >= GFX11 ? 0 : 1); 2672bf215546Sopenharmony_ci } else if (sctx->gfx_level == GFX9) { 2673bf215546Sopenharmony_ci color_view |= S_028C6C_MIP_LEVEL_GFX9(surf->base.u.tex.level); 2674bf215546Sopenharmony_ci color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | 2675bf215546Sopenharmony_ci S_028C74_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type); 2676bf215546Sopenharmony_ci } 2677bf215546Sopenharmony_ci 2678bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX9) { 2679bf215546Sopenharmony_ci surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(mip0_width) | 2680bf215546Sopenharmony_ci S_028C68_MIP0_HEIGHT(mip0_height) | 2681bf215546Sopenharmony_ci S_028C68_MAX_MIP(tex->buffer.b.b.last_level); 2682bf215546Sopenharmony_ci } 2683bf215546Sopenharmony_ci 2684bf215546Sopenharmony_ci surf->cb_color_view = color_view; 2685bf215546Sopenharmony_ci surf->cb_color_info = color_info; 2686bf215546Sopenharmony_ci surf->cb_color_attrib = color_attrib; 2687bf215546Sopenharmony_ci 2688bf215546Sopenharmony_ci /* Determine pixel shader export format */ 2689bf215546Sopenharmony_ci si_choose_spi_color_formats(surf, format, swap, ntype, tex->is_depth); 2690bf215546Sopenharmony_ci 2691bf215546Sopenharmony_ci surf->color_initialized = true; 2692bf215546Sopenharmony_ci} 2693bf215546Sopenharmony_ci 2694bf215546Sopenharmony_cistatic void si_init_depth_surface(struct si_context *sctx, struct si_surface *surf) 2695bf215546Sopenharmony_ci{ 2696bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)surf->base.texture; 2697bf215546Sopenharmony_ci unsigned level = surf->base.u.tex.level; 2698bf215546Sopenharmony_ci unsigned format, stencil_format; 2699bf215546Sopenharmony_ci uint32_t z_info, s_info; 2700bf215546Sopenharmony_ci 2701bf215546Sopenharmony_ci format = si_translate_dbformat(tex->db_render_format); 2702bf215546Sopenharmony_ci stencil_format = tex->surface.has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; 2703bf215546Sopenharmony_ci 2704bf215546Sopenharmony_ci assert(format != V_028040_Z_INVALID); 2705bf215546Sopenharmony_ci if (format == V_028040_Z_INVALID) 2706bf215546Sopenharmony_ci PRINT_ERR("Invalid DB format: %d, disabling DB.\n", tex->buffer.b.b.format); 2707bf215546Sopenharmony_ci 2708bf215546Sopenharmony_ci surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2709bf215546Sopenharmony_ci S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2710bf215546Sopenharmony_ci surf->db_htile_data_base = 0; 2711bf215546Sopenharmony_ci surf->db_htile_surface = 0; 2712bf215546Sopenharmony_ci 2713bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 2714bf215546Sopenharmony_ci surf->db_depth_view |= S_028008_SLICE_START_HI(surf->base.u.tex.first_layer >> 11) | 2715bf215546Sopenharmony_ci S_028008_SLICE_MAX_HI(surf->base.u.tex.last_layer >> 11); 2716bf215546Sopenharmony_ci } 2717bf215546Sopenharmony_ci 2718bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX9) { 2719bf215546Sopenharmony_ci assert(tex->surface.u.gfx9.surf_offset == 0); 2720bf215546Sopenharmony_ci surf->db_depth_base = tex->buffer.gpu_address >> 8; 2721bf215546Sopenharmony_ci surf->db_stencil_base = (tex->buffer.gpu_address + tex->surface.u.gfx9.zs.stencil_offset) >> 8; 2722bf215546Sopenharmony_ci z_info = S_028038_FORMAT(format) | 2723bf215546Sopenharmony_ci S_028038_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)) | 2724bf215546Sopenharmony_ci S_028038_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 2725bf215546Sopenharmony_ci S_028038_MAXMIP(tex->buffer.b.b.last_level) | 2726bf215546Sopenharmony_ci S_028040_ITERATE_256(sctx->gfx_level >= GFX11); 2727bf215546Sopenharmony_ci s_info = S_02803C_FORMAT(stencil_format) | 2728bf215546Sopenharmony_ci S_02803C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode) | 2729bf215546Sopenharmony_ci S_028044_ITERATE_256(sctx->gfx_level >= GFX11); 2730bf215546Sopenharmony_ci 2731bf215546Sopenharmony_ci if (sctx->gfx_level == GFX9) { 2732bf215546Sopenharmony_ci surf->db_z_info2 = S_028068_EPITCH(tex->surface.u.gfx9.epitch); 2733bf215546Sopenharmony_ci surf->db_stencil_info2 = S_02806C_EPITCH(tex->surface.u.gfx9.zs.stencil_epitch); 2734bf215546Sopenharmony_ci } 2735bf215546Sopenharmony_ci surf->db_depth_view |= S_028008_MIPID(level); 2736bf215546Sopenharmony_ci surf->db_depth_size = 2737bf215546Sopenharmony_ci S_02801C_X_MAX(tex->buffer.b.b.width0 - 1) | S_02801C_Y_MAX(tex->buffer.b.b.height0 - 1); 2738bf215546Sopenharmony_ci 2739bf215546Sopenharmony_ci if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) { 2740bf215546Sopenharmony_ci z_info |= S_028038_TILE_SURFACE_ENABLE(1) | S_028038_ALLOW_EXPCLEAR(1); 2741bf215546Sopenharmony_ci s_info |= S_02803C_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled); 2742bf215546Sopenharmony_ci 2743bf215546Sopenharmony_ci if (tex->surface.has_stencil && !tex->htile_stencil_disabled) { 2744bf215546Sopenharmony_ci /* Stencil buffer workaround ported from the GFX6-GFX8 code. 2745bf215546Sopenharmony_ci * See that for explanation. 2746bf215546Sopenharmony_ci */ 2747bf215546Sopenharmony_ci s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1); 2748bf215546Sopenharmony_ci } 2749bf215546Sopenharmony_ci 2750bf215546Sopenharmony_ci surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 2751bf215546Sopenharmony_ci surf->db_htile_surface = 2752bf215546Sopenharmony_ci S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); 2753bf215546Sopenharmony_ci if (sctx->gfx_level == GFX9) { 2754bf215546Sopenharmony_ci surf->db_htile_surface |= S_028ABC_RB_ALIGNED(1); 2755bf215546Sopenharmony_ci } 2756bf215546Sopenharmony_ci } 2757bf215546Sopenharmony_ci } else { 2758bf215546Sopenharmony_ci /* GFX6-GFX8 */ 2759bf215546Sopenharmony_ci struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level]; 2760bf215546Sopenharmony_ci 2761bf215546Sopenharmony_ci assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2762bf215546Sopenharmony_ci 2763bf215546Sopenharmony_ci surf->db_depth_base = 2764bf215546Sopenharmony_ci (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.level[level].offset_256B; 2765bf215546Sopenharmony_ci surf->db_stencil_base = 2766bf215546Sopenharmony_ci (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.zs.stencil_level[level].offset_256B; 2767bf215546Sopenharmony_ci 2768bf215546Sopenharmony_ci z_info = 2769bf215546Sopenharmony_ci S_028040_FORMAT(format) | S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)); 2770bf215546Sopenharmony_ci s_info = S_028044_FORMAT(stencil_format); 2771bf215546Sopenharmony_ci surf->db_depth_info = 0; 2772bf215546Sopenharmony_ci 2773bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7) { 2774bf215546Sopenharmony_ci struct radeon_info *info = &sctx->screen->info; 2775bf215546Sopenharmony_ci unsigned index = tex->surface.u.legacy.tiling_index[level]; 2776bf215546Sopenharmony_ci unsigned stencil_index = tex->surface.u.legacy.zs.stencil_tiling_index[level]; 2777bf215546Sopenharmony_ci unsigned macro_index = tex->surface.u.legacy.macro_tile_index; 2778bf215546Sopenharmony_ci unsigned tile_mode = info->si_tile_mode_array[index]; 2779bf215546Sopenharmony_ci unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2780bf215546Sopenharmony_ci unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2781bf215546Sopenharmony_ci 2782bf215546Sopenharmony_ci surf->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2783bf215546Sopenharmony_ci S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2784bf215546Sopenharmony_ci S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2785bf215546Sopenharmony_ci S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2786bf215546Sopenharmony_ci S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2787bf215546Sopenharmony_ci S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2788bf215546Sopenharmony_ci z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2789bf215546Sopenharmony_ci s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2790bf215546Sopenharmony_ci } else { 2791bf215546Sopenharmony_ci unsigned tile_mode_index = si_tile_mode_index(tex, level, false); 2792bf215546Sopenharmony_ci z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2793bf215546Sopenharmony_ci tile_mode_index = si_tile_mode_index(tex, level, true); 2794bf215546Sopenharmony_ci s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2795bf215546Sopenharmony_ci } 2796bf215546Sopenharmony_ci 2797bf215546Sopenharmony_ci surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2798bf215546Sopenharmony_ci S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2799bf215546Sopenharmony_ci surf->db_depth_slice = 2800bf215546Sopenharmony_ci S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * levelinfo->nblk_y) / 64 - 1); 2801bf215546Sopenharmony_ci 2802bf215546Sopenharmony_ci if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) { 2803bf215546Sopenharmony_ci z_info |= S_028040_TILE_SURFACE_ENABLE(1) | S_028040_ALLOW_EXPCLEAR(1); 2804bf215546Sopenharmony_ci s_info |= S_028044_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled); 2805bf215546Sopenharmony_ci 2806bf215546Sopenharmony_ci if (tex->surface.has_stencil) { 2807bf215546Sopenharmony_ci /* Workaround: For a not yet understood reason, the 2808bf215546Sopenharmony_ci * combination of MSAA, fast stencil clear and stencil 2809bf215546Sopenharmony_ci * decompress messes with subsequent stencil buffer 2810bf215546Sopenharmony_ci * uses. Problem was reproduced on Verde, Bonaire, 2811bf215546Sopenharmony_ci * Tonga, and Carrizo. 2812bf215546Sopenharmony_ci * 2813bf215546Sopenharmony_ci * Disabling EXPCLEAR works around the problem. 2814bf215546Sopenharmony_ci * 2815bf215546Sopenharmony_ci * Check piglit's arb_texture_multisample-stencil-clear 2816bf215546Sopenharmony_ci * test if you want to try changing this. 2817bf215546Sopenharmony_ci */ 2818bf215546Sopenharmony_ci if (tex->buffer.b.b.nr_samples <= 1) 2819bf215546Sopenharmony_ci s_info |= S_028044_ALLOW_EXPCLEAR(1); 2820bf215546Sopenharmony_ci } 2821bf215546Sopenharmony_ci 2822bf215546Sopenharmony_ci surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 2823bf215546Sopenharmony_ci surf->db_htile_surface = S_028ABC_FULL_CACHE(1); 2824bf215546Sopenharmony_ci } 2825bf215546Sopenharmony_ci } 2826bf215546Sopenharmony_ci 2827bf215546Sopenharmony_ci surf->db_z_info = z_info; 2828bf215546Sopenharmony_ci surf->db_stencil_info = s_info; 2829bf215546Sopenharmony_ci 2830bf215546Sopenharmony_ci surf->depth_initialized = true; 2831bf215546Sopenharmony_ci} 2832bf215546Sopenharmony_ci 2833bf215546Sopenharmony_civoid si_set_sampler_depth_decompress_mask(struct si_context *sctx, struct si_texture *tex) 2834bf215546Sopenharmony_ci{ 2835bf215546Sopenharmony_ci /* Check all sampler bindings in all shaders where depth textures are bound, and update 2836bf215546Sopenharmony_ci * which samplers should be decompressed. 2837bf215546Sopenharmony_ci */ 2838bf215546Sopenharmony_ci u_foreach_bit(sh, sctx->shader_has_depth_tex) { 2839bf215546Sopenharmony_ci u_foreach_bit(i, sctx->samplers[sh].has_depth_tex_mask) { 2840bf215546Sopenharmony_ci if (sctx->samplers[sh].views[i]->texture == &tex->buffer.b.b) { 2841bf215546Sopenharmony_ci sctx->samplers[sh].needs_depth_decompress_mask |= 1 << i; 2842bf215546Sopenharmony_ci sctx->shader_needs_decompress_mask |= 1 << sh; 2843bf215546Sopenharmony_ci } 2844bf215546Sopenharmony_ci } 2845bf215546Sopenharmony_ci } 2846bf215546Sopenharmony_ci} 2847bf215546Sopenharmony_ci 2848bf215546Sopenharmony_civoid si_update_fb_dirtiness_after_rendering(struct si_context *sctx) 2849bf215546Sopenharmony_ci{ 2850bf215546Sopenharmony_ci if (sctx->decompression_enabled) 2851bf215546Sopenharmony_ci return; 2852bf215546Sopenharmony_ci 2853bf215546Sopenharmony_ci if (sctx->framebuffer.state.zsbuf) { 2854bf215546Sopenharmony_ci struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; 2855bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)surf->texture; 2856bf215546Sopenharmony_ci 2857bf215546Sopenharmony_ci tex->dirty_level_mask |= 1 << surf->u.tex.level; 2858bf215546Sopenharmony_ci 2859bf215546Sopenharmony_ci if (tex->surface.has_stencil) 2860bf215546Sopenharmony_ci tex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; 2861bf215546Sopenharmony_ci 2862bf215546Sopenharmony_ci si_set_sampler_depth_decompress_mask(sctx, tex); 2863bf215546Sopenharmony_ci } 2864bf215546Sopenharmony_ci 2865bf215546Sopenharmony_ci unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask; 2866bf215546Sopenharmony_ci while (compressed_cb_mask) { 2867bf215546Sopenharmony_ci unsigned i = u_bit_scan(&compressed_cb_mask); 2868bf215546Sopenharmony_ci struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 2869bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)surf->texture; 2870bf215546Sopenharmony_ci 2871bf215546Sopenharmony_ci if (tex->surface.fmask_offset) { 2872bf215546Sopenharmony_ci tex->dirty_level_mask |= 1 << surf->u.tex.level; 2873bf215546Sopenharmony_ci tex->fmask_is_identity = false; 2874bf215546Sopenharmony_ci } 2875bf215546Sopenharmony_ci } 2876bf215546Sopenharmony_ci} 2877bf215546Sopenharmony_ci 2878bf215546Sopenharmony_cistatic void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2879bf215546Sopenharmony_ci{ 2880bf215546Sopenharmony_ci for (int i = 0; i < state->nr_cbufs; ++i) { 2881bf215546Sopenharmony_ci struct si_surface *surf = NULL; 2882bf215546Sopenharmony_ci struct si_texture *tex; 2883bf215546Sopenharmony_ci 2884bf215546Sopenharmony_ci if (!state->cbufs[i]) 2885bf215546Sopenharmony_ci continue; 2886bf215546Sopenharmony_ci surf = (struct si_surface *)state->cbufs[i]; 2887bf215546Sopenharmony_ci tex = (struct si_texture *)surf->base.texture; 2888bf215546Sopenharmony_ci 2889bf215546Sopenharmony_ci p_atomic_dec(&tex->framebuffers_bound); 2890bf215546Sopenharmony_ci } 2891bf215546Sopenharmony_ci} 2892bf215546Sopenharmony_ci 2893bf215546Sopenharmony_civoid si_mark_display_dcc_dirty(struct si_context *sctx, struct si_texture *tex) 2894bf215546Sopenharmony_ci{ 2895bf215546Sopenharmony_ci if (!tex->surface.display_dcc_offset || tex->displayable_dcc_dirty) 2896bf215546Sopenharmony_ci return; 2897bf215546Sopenharmony_ci 2898bf215546Sopenharmony_ci if (!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) { 2899bf215546Sopenharmony_ci struct hash_entry *entry = _mesa_hash_table_search(sctx->dirty_implicit_resources, tex); 2900bf215546Sopenharmony_ci if (!entry) { 2901bf215546Sopenharmony_ci struct pipe_resource *dummy = NULL; 2902bf215546Sopenharmony_ci pipe_resource_reference(&dummy, &tex->buffer.b.b); 2903bf215546Sopenharmony_ci _mesa_hash_table_insert(sctx->dirty_implicit_resources, tex, tex); 2904bf215546Sopenharmony_ci } 2905bf215546Sopenharmony_ci } 2906bf215546Sopenharmony_ci tex->displayable_dcc_dirty = true; 2907bf215546Sopenharmony_ci} 2908bf215546Sopenharmony_ci 2909bf215546Sopenharmony_cistatic void si_update_display_dcc_dirty(struct si_context *sctx) 2910bf215546Sopenharmony_ci{ 2911bf215546Sopenharmony_ci const struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2912bf215546Sopenharmony_ci 2913bf215546Sopenharmony_ci for (unsigned i = 0; i < state->nr_cbufs; i++) { 2914bf215546Sopenharmony_ci if (state->cbufs[i]) 2915bf215546Sopenharmony_ci si_mark_display_dcc_dirty(sctx, (struct si_texture *)state->cbufs[i]->texture); 2916bf215546Sopenharmony_ci } 2917bf215546Sopenharmony_ci} 2918bf215546Sopenharmony_ci 2919bf215546Sopenharmony_cistatic void si_set_framebuffer_state(struct pipe_context *ctx, 2920bf215546Sopenharmony_ci const struct pipe_framebuffer_state *state) 2921bf215546Sopenharmony_ci{ 2922bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 2923bf215546Sopenharmony_ci struct si_surface *surf = NULL; 2924bf215546Sopenharmony_ci struct si_texture *tex; 2925bf215546Sopenharmony_ci bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2926bf215546Sopenharmony_ci unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2927bf215546Sopenharmony_ci unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit; 2928bf215546Sopenharmony_ci bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf; 2929bf215546Sopenharmony_ci bool old_has_stencil = 2930bf215546Sopenharmony_ci old_has_zsbuf && 2931bf215546Sopenharmony_ci ((struct si_texture *)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil; 2932bf215546Sopenharmony_ci int i; 2933bf215546Sopenharmony_ci 2934bf215546Sopenharmony_ci /* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs 2935bf215546Sopenharmony_ci * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. 2936bf215546Sopenharmony_ci * We could implement the full workaround here, but it's a useless case. 2937bf215546Sopenharmony_ci */ 2938bf215546Sopenharmony_ci if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) { 2939bf215546Sopenharmony_ci unreachable("the framebuffer shouldn't have zero area"); 2940bf215546Sopenharmony_ci return; 2941bf215546Sopenharmony_ci } 2942bf215546Sopenharmony_ci 2943bf215546Sopenharmony_ci si_update_fb_dirtiness_after_rendering(sctx); 2944bf215546Sopenharmony_ci 2945bf215546Sopenharmony_ci /* Disable DCC if the formats are incompatible. */ 2946bf215546Sopenharmony_ci for (i = 0; i < state->nr_cbufs; i++) { 2947bf215546Sopenharmony_ci if (!state->cbufs[i]) 2948bf215546Sopenharmony_ci continue; 2949bf215546Sopenharmony_ci 2950bf215546Sopenharmony_ci surf = (struct si_surface *)state->cbufs[i]; 2951bf215546Sopenharmony_ci tex = (struct si_texture *)surf->base.texture; 2952bf215546Sopenharmony_ci 2953bf215546Sopenharmony_ci if (!surf->dcc_incompatible) 2954bf215546Sopenharmony_ci continue; 2955bf215546Sopenharmony_ci 2956bf215546Sopenharmony_ci if (vi_dcc_enabled(tex, surf->base.u.tex.level)) 2957bf215546Sopenharmony_ci if (!si_texture_disable_dcc(sctx, tex)) 2958bf215546Sopenharmony_ci si_decompress_dcc(sctx, tex); 2959bf215546Sopenharmony_ci 2960bf215546Sopenharmony_ci surf->dcc_incompatible = false; 2961bf215546Sopenharmony_ci } 2962bf215546Sopenharmony_ci 2963bf215546Sopenharmony_ci /* Only flush TC when changing the framebuffer state, because 2964bf215546Sopenharmony_ci * the only client not using TC that can change textures is 2965bf215546Sopenharmony_ci * the framebuffer. 2966bf215546Sopenharmony_ci * 2967bf215546Sopenharmony_ci * Wait for compute shaders because of possible transitions: 2968bf215546Sopenharmony_ci * - FB write -> shader read 2969bf215546Sopenharmony_ci * - shader write -> FB read 2970bf215546Sopenharmony_ci * 2971bf215546Sopenharmony_ci * Wait for draws because of possible transitions: 2972bf215546Sopenharmony_ci * - texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*) 2973bf215546Sopenharmony_ci * 2974bf215546Sopenharmony_ci * DB caches are flushed on demand (using si_decompress_textures). 2975bf215546Sopenharmony_ci * 2976bf215546Sopenharmony_ci * When MSAA is enabled, CB and TC caches are flushed on demand 2977bf215546Sopenharmony_ci * (after FMASK decompression). Shader write -> FB read transitions 2978bf215546Sopenharmony_ci * cannot happen for MSAA textures, because MSAA shader images are 2979bf215546Sopenharmony_ci * not supported. 2980bf215546Sopenharmony_ci * 2981bf215546Sopenharmony_ci * Only flush and wait for CB if there is actually a bound color buffer. 2982bf215546Sopenharmony_ci */ 2983bf215546Sopenharmony_ci if (sctx->framebuffer.uncompressed_cb_mask) { 2984bf215546Sopenharmony_ci si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 2985bf215546Sopenharmony_ci sctx->framebuffer.CB_has_shader_readable_metadata, 2986bf215546Sopenharmony_ci sctx->framebuffer.all_DCC_pipe_aligned); 2987bf215546Sopenharmony_ci } 2988bf215546Sopenharmony_ci 2989bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH; 2990bf215546Sopenharmony_ci 2991bf215546Sopenharmony_ci /* u_blitter doesn't invoke depth decompression when it does multiple 2992bf215546Sopenharmony_ci * blits in a row, but the only case when it matters for DB is when 2993bf215546Sopenharmony_ci * doing generate_mipmap. So here we flush DB manually between 2994bf215546Sopenharmony_ci * individual generate_mipmap blits. 2995bf215546Sopenharmony_ci * Note that lower mipmap levels aren't compressed. 2996bf215546Sopenharmony_ci */ 2997bf215546Sopenharmony_ci if (sctx->generate_mipmap_for_depth) { 2998bf215546Sopenharmony_ci si_make_DB_shader_coherent(sctx, 1, false, sctx->framebuffer.DB_has_shader_readable_metadata); 2999bf215546Sopenharmony_ci } else if (sctx->gfx_level == GFX9) { 3000bf215546Sopenharmony_ci /* It appears that DB metadata "leaks" in a sequence of: 3001bf215546Sopenharmony_ci * - depth clear 3002bf215546Sopenharmony_ci * - DCC decompress for shader image writes (with DB disabled) 3003bf215546Sopenharmony_ci * - render with DEPTH_BEFORE_SHADER=1 3004bf215546Sopenharmony_ci * Flushing DB metadata works around the problem. 3005bf215546Sopenharmony_ci */ 3006bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META; 3007bf215546Sopenharmony_ci } 3008bf215546Sopenharmony_ci 3009bf215546Sopenharmony_ci /* Take the maximum of the old and new count. If the new count is lower, 3010bf215546Sopenharmony_ci * dirtying is needed to disable the unbound colorbuffers. 3011bf215546Sopenharmony_ci */ 3012bf215546Sopenharmony_ci sctx->framebuffer.dirty_cbufs |= 3013bf215546Sopenharmony_ci (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 3014bf215546Sopenharmony_ci sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 3015bf215546Sopenharmony_ci 3016bf215546Sopenharmony_ci si_dec_framebuffer_counters(&sctx->framebuffer.state); 3017bf215546Sopenharmony_ci util_copy_framebuffer_state(&sctx->framebuffer.state, state); 3018bf215546Sopenharmony_ci 3019bf215546Sopenharmony_ci sctx->framebuffer.colorbuf_enabled_4bit = 0; 3020bf215546Sopenharmony_ci sctx->framebuffer.spi_shader_col_format = 0; 3021bf215546Sopenharmony_ci sctx->framebuffer.spi_shader_col_format_alpha = 0; 3022bf215546Sopenharmony_ci sctx->framebuffer.spi_shader_col_format_blend = 0; 3023bf215546Sopenharmony_ci sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 3024bf215546Sopenharmony_ci sctx->framebuffer.color_is_int8 = 0; 3025bf215546Sopenharmony_ci sctx->framebuffer.color_is_int10 = 0; 3026bf215546Sopenharmony_ci 3027bf215546Sopenharmony_ci sctx->framebuffer.compressed_cb_mask = 0; 3028bf215546Sopenharmony_ci sctx->framebuffer.uncompressed_cb_mask = 0; 3029bf215546Sopenharmony_ci sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 3030bf215546Sopenharmony_ci sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples; 3031bf215546Sopenharmony_ci sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 3032bf215546Sopenharmony_ci sctx->framebuffer.any_dst_linear = false; 3033bf215546Sopenharmony_ci sctx->framebuffer.CB_has_shader_readable_metadata = false; 3034bf215546Sopenharmony_ci sctx->framebuffer.DB_has_shader_readable_metadata = false; 3035bf215546Sopenharmony_ci sctx->framebuffer.all_DCC_pipe_aligned = true; 3036bf215546Sopenharmony_ci sctx->framebuffer.has_dcc_msaa = false; 3037bf215546Sopenharmony_ci sctx->framebuffer.min_bytes_per_pixel = 0; 3038bf215546Sopenharmony_ci 3039bf215546Sopenharmony_ci for (i = 0; i < state->nr_cbufs; i++) { 3040bf215546Sopenharmony_ci if (!state->cbufs[i]) 3041bf215546Sopenharmony_ci continue; 3042bf215546Sopenharmony_ci 3043bf215546Sopenharmony_ci surf = (struct si_surface *)state->cbufs[i]; 3044bf215546Sopenharmony_ci tex = (struct si_texture *)surf->base.texture; 3045bf215546Sopenharmony_ci 3046bf215546Sopenharmony_ci if (!surf->color_initialized) { 3047bf215546Sopenharmony_ci si_initialize_color_surface(sctx, surf); 3048bf215546Sopenharmony_ci } 3049bf215546Sopenharmony_ci 3050bf215546Sopenharmony_ci sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4); 3051bf215546Sopenharmony_ci sctx->framebuffer.spi_shader_col_format |= surf->spi_shader_col_format << (i * 4); 3052bf215546Sopenharmony_ci sctx->framebuffer.spi_shader_col_format_alpha |= surf->spi_shader_col_format_alpha << (i * 4); 3053bf215546Sopenharmony_ci sctx->framebuffer.spi_shader_col_format_blend |= surf->spi_shader_col_format_blend << (i * 4); 3054bf215546Sopenharmony_ci sctx->framebuffer.spi_shader_col_format_blend_alpha |= surf->spi_shader_col_format_blend_alpha 3055bf215546Sopenharmony_ci << (i * 4); 3056bf215546Sopenharmony_ci 3057bf215546Sopenharmony_ci if (surf->color_is_int8) 3058bf215546Sopenharmony_ci sctx->framebuffer.color_is_int8 |= 1 << i; 3059bf215546Sopenharmony_ci if (surf->color_is_int10) 3060bf215546Sopenharmony_ci sctx->framebuffer.color_is_int10 |= 1 << i; 3061bf215546Sopenharmony_ci 3062bf215546Sopenharmony_ci if (tex->surface.fmask_offset) 3063bf215546Sopenharmony_ci sctx->framebuffer.compressed_cb_mask |= 1 << i; 3064bf215546Sopenharmony_ci else 3065bf215546Sopenharmony_ci sctx->framebuffer.uncompressed_cb_mask |= 1 << i; 3066bf215546Sopenharmony_ci 3067bf215546Sopenharmony_ci /* Don't update nr_color_samples for non-AA buffers. 3068bf215546Sopenharmony_ci * (e.g. destination of MSAA resolve) 3069bf215546Sopenharmony_ci */ 3070bf215546Sopenharmony_ci if (tex->buffer.b.b.nr_samples >= 2 && 3071bf215546Sopenharmony_ci tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) { 3072bf215546Sopenharmony_ci sctx->framebuffer.nr_color_samples = 3073bf215546Sopenharmony_ci MIN2(sctx->framebuffer.nr_color_samples, tex->buffer.b.b.nr_storage_samples); 3074bf215546Sopenharmony_ci sctx->framebuffer.nr_color_samples = MAX2(1, sctx->framebuffer.nr_color_samples); 3075bf215546Sopenharmony_ci } 3076bf215546Sopenharmony_ci 3077bf215546Sopenharmony_ci if (tex->surface.is_linear) 3078bf215546Sopenharmony_ci sctx->framebuffer.any_dst_linear = true; 3079bf215546Sopenharmony_ci 3080bf215546Sopenharmony_ci if (vi_dcc_enabled(tex, surf->base.u.tex.level)) { 3081bf215546Sopenharmony_ci sctx->framebuffer.CB_has_shader_readable_metadata = true; 3082bf215546Sopenharmony_ci 3083bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX9 && !tex->surface.u.gfx9.color.dcc.pipe_aligned) 3084bf215546Sopenharmony_ci sctx->framebuffer.all_DCC_pipe_aligned = false; 3085bf215546Sopenharmony_ci 3086bf215546Sopenharmony_ci if (tex->buffer.b.b.nr_storage_samples >= 2) 3087bf215546Sopenharmony_ci sctx->framebuffer.has_dcc_msaa = true; 3088bf215546Sopenharmony_ci } 3089bf215546Sopenharmony_ci 3090bf215546Sopenharmony_ci si_context_add_resource_size(sctx, surf->base.texture); 3091bf215546Sopenharmony_ci 3092bf215546Sopenharmony_ci p_atomic_inc(&tex->framebuffers_bound); 3093bf215546Sopenharmony_ci 3094bf215546Sopenharmony_ci /* Update the minimum but don't keep 0. */ 3095bf215546Sopenharmony_ci if (!sctx->framebuffer.min_bytes_per_pixel || 3096bf215546Sopenharmony_ci tex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel) 3097bf215546Sopenharmony_ci sctx->framebuffer.min_bytes_per_pixel = tex->surface.bpe; 3098bf215546Sopenharmony_ci } 3099bf215546Sopenharmony_ci 3100bf215546Sopenharmony_ci struct si_texture *zstex = NULL; 3101bf215546Sopenharmony_ci 3102bf215546Sopenharmony_ci if (state->zsbuf) { 3103bf215546Sopenharmony_ci surf = (struct si_surface *)state->zsbuf; 3104bf215546Sopenharmony_ci zstex = (struct si_texture *)surf->base.texture; 3105bf215546Sopenharmony_ci 3106bf215546Sopenharmony_ci if (!surf->depth_initialized) { 3107bf215546Sopenharmony_ci si_init_depth_surface(sctx, surf); 3108bf215546Sopenharmony_ci } 3109bf215546Sopenharmony_ci 3110bf215546Sopenharmony_ci if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level, PIPE_MASK_ZS)) 3111bf215546Sopenharmony_ci sctx->framebuffer.DB_has_shader_readable_metadata = true; 3112bf215546Sopenharmony_ci 3113bf215546Sopenharmony_ci si_context_add_resource_size(sctx, surf->base.texture); 3114bf215546Sopenharmony_ci 3115bf215546Sopenharmony_ci /* Update the minimum but don't keep 0. */ 3116bf215546Sopenharmony_ci if (!sctx->framebuffer.min_bytes_per_pixel || 3117bf215546Sopenharmony_ci zstex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel) 3118bf215546Sopenharmony_ci sctx->framebuffer.min_bytes_per_pixel = zstex->surface.bpe; 3119bf215546Sopenharmony_ci } 3120bf215546Sopenharmony_ci 3121bf215546Sopenharmony_ci si_update_ps_colorbuf0_slot(sctx); 3122bf215546Sopenharmony_ci si_update_poly_offset_state(sctx); 3123bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 3124bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 3125bf215546Sopenharmony_ci 3126bf215546Sopenharmony_ci /* NGG cull state uses the sample count. */ 3127bf215546Sopenharmony_ci if (sctx->screen->use_ngg_culling) 3128bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); 3129bf215546Sopenharmony_ci 3130bf215546Sopenharmony_ci if (sctx->screen->dpbb_allowed) 3131bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 3132bf215546Sopenharmony_ci 3133bf215546Sopenharmony_ci if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 3134bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3135bf215546Sopenharmony_ci 3136bf215546Sopenharmony_ci if (sctx->screen->has_out_of_order_rast && 3137bf215546Sopenharmony_ci (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit || 3138bf215546Sopenharmony_ci !!sctx->framebuffer.state.zsbuf != old_has_zsbuf || 3139bf215546Sopenharmony_ci (zstex && zstex->surface.has_stencil != old_has_stencil))) 3140bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3141bf215546Sopenharmony_ci 3142bf215546Sopenharmony_ci if (sctx->framebuffer.nr_samples != old_nr_samples) { 3143bf215546Sopenharmony_ci struct pipe_constant_buffer constbuf = {0}; 3144bf215546Sopenharmony_ci 3145bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3146bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 3147bf215546Sopenharmony_ci 3148bf215546Sopenharmony_ci if (!sctx->sample_pos_buffer) { 3149bf215546Sopenharmony_ci sctx->sample_pos_buffer = pipe_buffer_create_with_data(&sctx->b, 0, PIPE_USAGE_DEFAULT, 3150bf215546Sopenharmony_ci sizeof(sctx->sample_positions), 3151bf215546Sopenharmony_ci &sctx->sample_positions); 3152bf215546Sopenharmony_ci } 3153bf215546Sopenharmony_ci constbuf.buffer = sctx->sample_pos_buffer; 3154bf215546Sopenharmony_ci 3155bf215546Sopenharmony_ci /* Set sample locations as fragment shader constants. */ 3156bf215546Sopenharmony_ci switch (sctx->framebuffer.nr_samples) { 3157bf215546Sopenharmony_ci case 1: 3158bf215546Sopenharmony_ci constbuf.buffer_offset = 0; 3159bf215546Sopenharmony_ci break; 3160bf215546Sopenharmony_ci case 2: 3161bf215546Sopenharmony_ci constbuf.buffer_offset = 3162bf215546Sopenharmony_ci (ubyte *)sctx->sample_positions.x2 - (ubyte *)sctx->sample_positions.x1; 3163bf215546Sopenharmony_ci break; 3164bf215546Sopenharmony_ci case 4: 3165bf215546Sopenharmony_ci constbuf.buffer_offset = 3166bf215546Sopenharmony_ci (ubyte *)sctx->sample_positions.x4 - (ubyte *)sctx->sample_positions.x1; 3167bf215546Sopenharmony_ci break; 3168bf215546Sopenharmony_ci case 8: 3169bf215546Sopenharmony_ci constbuf.buffer_offset = 3170bf215546Sopenharmony_ci (ubyte *)sctx->sample_positions.x8 - (ubyte *)sctx->sample_positions.x1; 3171bf215546Sopenharmony_ci break; 3172bf215546Sopenharmony_ci case 16: 3173bf215546Sopenharmony_ci constbuf.buffer_offset = 3174bf215546Sopenharmony_ci (ubyte *)sctx->sample_positions.x16 - (ubyte *)sctx->sample_positions.x1; 3175bf215546Sopenharmony_ci break; 3176bf215546Sopenharmony_ci default: 3177bf215546Sopenharmony_ci PRINT_ERR("Requested an invalid number of samples %i.\n", sctx->framebuffer.nr_samples); 3178bf215546Sopenharmony_ci assert(0); 3179bf215546Sopenharmony_ci } 3180bf215546Sopenharmony_ci constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 3181bf215546Sopenharmony_ci si_set_internal_const_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 3182bf215546Sopenharmony_ci 3183bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 3184bf215546Sopenharmony_ci } 3185bf215546Sopenharmony_ci 3186bf215546Sopenharmony_ci si_ps_key_update_framebuffer(sctx); 3187bf215546Sopenharmony_ci si_ps_key_update_framebuffer_blend(sctx); 3188bf215546Sopenharmony_ci si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 3189bf215546Sopenharmony_ci si_update_ps_inputs_read_or_disabled(sctx); 3190bf215546Sopenharmony_ci sctx->do_update_shaders = true; 3191bf215546Sopenharmony_ci 3192bf215546Sopenharmony_ci if (!sctx->decompression_enabled) { 3193bf215546Sopenharmony_ci /* Prevent textures decompression when the framebuffer state 3194bf215546Sopenharmony_ci * changes come from the decompression passes themselves. 3195bf215546Sopenharmony_ci */ 3196bf215546Sopenharmony_ci sctx->need_check_render_feedback = true; 3197bf215546Sopenharmony_ci } 3198bf215546Sopenharmony_ci} 3199bf215546Sopenharmony_ci 3200bf215546Sopenharmony_cistatic void si_emit_framebuffer_state(struct si_context *sctx) 3201bf215546Sopenharmony_ci{ 3202bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 3203bf215546Sopenharmony_ci struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 3204bf215546Sopenharmony_ci unsigned i, nr_cbufs = state->nr_cbufs; 3205bf215546Sopenharmony_ci struct si_texture *tex = NULL; 3206bf215546Sopenharmony_ci struct si_surface *cb = NULL; 3207bf215546Sopenharmony_ci unsigned cb_color_info = 0; 3208bf215546Sopenharmony_ci 3209bf215546Sopenharmony_ci radeon_begin(cs); 3210bf215546Sopenharmony_ci 3211bf215546Sopenharmony_ci /* Colorbuffers. */ 3212bf215546Sopenharmony_ci for (i = 0; i < nr_cbufs; i++) { 3213bf215546Sopenharmony_ci uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base; 3214bf215546Sopenharmony_ci unsigned cb_color_attrib; 3215bf215546Sopenharmony_ci 3216bf215546Sopenharmony_ci if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 3217bf215546Sopenharmony_ci continue; 3218bf215546Sopenharmony_ci 3219bf215546Sopenharmony_ci cb = (struct si_surface *)state->cbufs[i]; 3220bf215546Sopenharmony_ci if (!cb) { 3221bf215546Sopenharmony_ci radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 3222bf215546Sopenharmony_ci sctx->gfx_level >= GFX11 ? 3223bf215546Sopenharmony_ci S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID) : 3224bf215546Sopenharmony_ci S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID)); 3225bf215546Sopenharmony_ci continue; 3226bf215546Sopenharmony_ci } 3227bf215546Sopenharmony_ci 3228bf215546Sopenharmony_ci tex = (struct si_texture *)cb->base.texture; 3229bf215546Sopenharmony_ci radeon_add_to_buffer_list( 3230bf215546Sopenharmony_ci sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC | 3231bf215546Sopenharmony_ci (tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER)); 3232bf215546Sopenharmony_ci 3233bf215546Sopenharmony_ci if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) { 3234bf215546Sopenharmony_ci radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer, 3235bf215546Sopenharmony_ci RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC | 3236bf215546Sopenharmony_ci RADEON_PRIO_SEPARATE_META); 3237bf215546Sopenharmony_ci } 3238bf215546Sopenharmony_ci 3239bf215546Sopenharmony_ci /* Compute mutable surface parameters. */ 3240bf215546Sopenharmony_ci cb_color_base = tex->buffer.gpu_address >> 8; 3241bf215546Sopenharmony_ci cb_color_fmask = 0; 3242bf215546Sopenharmony_ci cb_color_cmask = tex->cmask_base_address_reg; 3243bf215546Sopenharmony_ci cb_dcc_base = 0; 3244bf215546Sopenharmony_ci cb_color_info = cb->cb_color_info | tex->cb_color_info; 3245bf215546Sopenharmony_ci cb_color_attrib = cb->cb_color_attrib; 3246bf215546Sopenharmony_ci 3247bf215546Sopenharmony_ci if (tex->swap_rgb_to_bgr) { 3248bf215546Sopenharmony_ci /* Swap R and B channels. */ 3249bf215546Sopenharmony_ci static unsigned rgb_to_bgr[4] = { 3250bf215546Sopenharmony_ci [V_028C70_SWAP_STD] = V_028C70_SWAP_ALT, 3251bf215546Sopenharmony_ci [V_028C70_SWAP_ALT] = V_028C70_SWAP_STD, 3252bf215546Sopenharmony_ci [V_028C70_SWAP_STD_REV] = V_028C70_SWAP_ALT_REV, 3253bf215546Sopenharmony_ci [V_028C70_SWAP_ALT_REV] = V_028C70_SWAP_STD_REV, 3254bf215546Sopenharmony_ci }; 3255bf215546Sopenharmony_ci unsigned swap = rgb_to_bgr[G_028C70_COMP_SWAP(cb_color_info)]; 3256bf215546Sopenharmony_ci 3257bf215546Sopenharmony_ci cb_color_info &= C_028C70_COMP_SWAP; 3258bf215546Sopenharmony_ci cb_color_info |= S_028C70_COMP_SWAP(swap); 3259bf215546Sopenharmony_ci } 3260bf215546Sopenharmony_ci 3261bf215546Sopenharmony_ci if (sctx->gfx_level < GFX11 && cb->base.u.tex.level > 0) 3262bf215546Sopenharmony_ci cb_color_info &= C_028C70_FAST_CLEAR; 3263bf215546Sopenharmony_ci 3264bf215546Sopenharmony_ci if (tex->surface.fmask_offset) { 3265bf215546Sopenharmony_ci cb_color_fmask = (tex->buffer.gpu_address + tex->surface.fmask_offset) >> 8; 3266bf215546Sopenharmony_ci cb_color_fmask |= tex->surface.fmask_tile_swizzle; 3267bf215546Sopenharmony_ci } 3268bf215546Sopenharmony_ci 3269bf215546Sopenharmony_ci /* Set up DCC. */ 3270bf215546Sopenharmony_ci if (vi_dcc_enabled(tex, cb->base.u.tex.level)) { 3271bf215546Sopenharmony_ci bool is_msaa_resolve_dst = state->cbufs[0] && state->cbufs[0]->texture->nr_samples > 1 && 3272bf215546Sopenharmony_ci state->cbufs[1] == &cb->base && 3273bf215546Sopenharmony_ci state->cbufs[1]->texture->nr_samples <= 1; 3274bf215546Sopenharmony_ci 3275bf215546Sopenharmony_ci /* CB can't do MSAA resolve on gfx11. */ 3276bf215546Sopenharmony_ci assert(!is_msaa_resolve_dst || sctx->gfx_level < GFX11); 3277bf215546Sopenharmony_ci 3278bf215546Sopenharmony_ci if (!is_msaa_resolve_dst && sctx->gfx_level < GFX11) 3279bf215546Sopenharmony_ci cb_color_info |= S_028C70_DCC_ENABLE(1); 3280bf215546Sopenharmony_ci 3281bf215546Sopenharmony_ci cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 3282bf215546Sopenharmony_ci 3283bf215546Sopenharmony_ci unsigned dcc_tile_swizzle = tex->surface.tile_swizzle; 3284bf215546Sopenharmony_ci dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8; 3285bf215546Sopenharmony_ci cb_dcc_base |= dcc_tile_swizzle; 3286bf215546Sopenharmony_ci } 3287bf215546Sopenharmony_ci 3288bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 3289bf215546Sopenharmony_ci unsigned cb_color_attrib3, cb_fdcc_control; 3290bf215546Sopenharmony_ci 3291bf215546Sopenharmony_ci /* Set mutable surface parameters. */ 3292bf215546Sopenharmony_ci cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3293bf215546Sopenharmony_ci cb_color_base |= tex->surface.tile_swizzle; 3294bf215546Sopenharmony_ci 3295bf215546Sopenharmony_ci cb_color_attrib3 = cb->cb_color_attrib3 | 3296bf215546Sopenharmony_ci S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 3297bf215546Sopenharmony_ci S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned); 3298bf215546Sopenharmony_ci cb_fdcc_control = cb->cb_dcc_control | 3299bf215546Sopenharmony_ci S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) | 3300bf215546Sopenharmony_ci S_028C78_FDCC_ENABLE(vi_dcc_enabled(tex, cb->base.u.tex.level)); 3301bf215546Sopenharmony_ci 3302bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028C6C_CB_COLOR0_VIEW + i * 0x3C, 4); 3303bf215546Sopenharmony_ci radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3304bf215546Sopenharmony_ci radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3305bf215546Sopenharmony_ci radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3306bf215546Sopenharmony_ci radeon_emit(cb_fdcc_control); /* CB_COLOR0_FDCC_CONTROL */ 3307bf215546Sopenharmony_ci 3308bf215546Sopenharmony_ci radeon_set_context_reg(R_028C60_CB_COLOR0_BASE + i * 0x3C, cb_color_base); 3309bf215546Sopenharmony_ci radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32); 3310bf215546Sopenharmony_ci radeon_set_context_reg(R_028C94_CB_COLOR0_DCC_BASE + i * 0x3C, cb_dcc_base); 3311bf215546Sopenharmony_ci radeon_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32); 3312bf215546Sopenharmony_ci radeon_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2); 3313bf215546Sopenharmony_ci radeon_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3); 3314bf215546Sopenharmony_ci } else if (sctx->gfx_level >= GFX10) { 3315bf215546Sopenharmony_ci unsigned cb_color_attrib3; 3316bf215546Sopenharmony_ci 3317bf215546Sopenharmony_ci /* Set mutable surface parameters. */ 3318bf215546Sopenharmony_ci cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3319bf215546Sopenharmony_ci cb_color_base |= tex->surface.tile_swizzle; 3320bf215546Sopenharmony_ci if (!tex->surface.fmask_offset) 3321bf215546Sopenharmony_ci cb_color_fmask = cb_color_base; 3322bf215546Sopenharmony_ci if (cb->base.u.tex.level > 0) 3323bf215546Sopenharmony_ci cb_color_cmask = cb_color_base; 3324bf215546Sopenharmony_ci 3325bf215546Sopenharmony_ci cb_color_attrib3 = cb->cb_color_attrib3 | 3326bf215546Sopenharmony_ci S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 3327bf215546Sopenharmony_ci S_028EE0_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 3328bf215546Sopenharmony_ci S_028EE0_CMASK_PIPE_ALIGNED(1) | 3329bf215546Sopenharmony_ci S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned); 3330bf215546Sopenharmony_ci 3331bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 14); 3332bf215546Sopenharmony_ci radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 3333bf215546Sopenharmony_ci radeon_emit(0); /* hole */ 3334bf215546Sopenharmony_ci radeon_emit(0); /* hole */ 3335bf215546Sopenharmony_ci radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3336bf215546Sopenharmony_ci radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3337bf215546Sopenharmony_ci radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3338bf215546Sopenharmony_ci radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3339bf215546Sopenharmony_ci radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 3340bf215546Sopenharmony_ci radeon_emit(0); /* hole */ 3341bf215546Sopenharmony_ci radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 3342bf215546Sopenharmony_ci radeon_emit(0); /* hole */ 3343bf215546Sopenharmony_ci radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3344bf215546Sopenharmony_ci radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3345bf215546Sopenharmony_ci radeon_emit(cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 3346bf215546Sopenharmony_ci 3347bf215546Sopenharmony_ci radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32); 3348bf215546Sopenharmony_ci radeon_set_context_reg(R_028E60_CB_COLOR0_CMASK_BASE_EXT + i * 4, 3349bf215546Sopenharmony_ci cb_color_cmask >> 32); 3350bf215546Sopenharmony_ci radeon_set_context_reg(R_028E80_CB_COLOR0_FMASK_BASE_EXT + i * 4, 3351bf215546Sopenharmony_ci cb_color_fmask >> 32); 3352bf215546Sopenharmony_ci radeon_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32); 3353bf215546Sopenharmony_ci radeon_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2); 3354bf215546Sopenharmony_ci radeon_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3); 3355bf215546Sopenharmony_ci } else if (sctx->gfx_level == GFX9) { 3356bf215546Sopenharmony_ci struct gfx9_surf_meta_flags meta = { 3357bf215546Sopenharmony_ci .rb_aligned = 1, 3358bf215546Sopenharmony_ci .pipe_aligned = 1, 3359bf215546Sopenharmony_ci }; 3360bf215546Sopenharmony_ci 3361bf215546Sopenharmony_ci if (!tex->is_depth && tex->surface.meta_offset) 3362bf215546Sopenharmony_ci meta = tex->surface.u.gfx9.color.dcc; 3363bf215546Sopenharmony_ci 3364bf215546Sopenharmony_ci /* Set mutable surface parameters. */ 3365bf215546Sopenharmony_ci cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3366bf215546Sopenharmony_ci cb_color_base |= tex->surface.tile_swizzle; 3367bf215546Sopenharmony_ci if (!tex->surface.fmask_offset) 3368bf215546Sopenharmony_ci cb_color_fmask = cb_color_base; 3369bf215546Sopenharmony_ci if (cb->base.u.tex.level > 0) 3370bf215546Sopenharmony_ci cb_color_cmask = cb_color_base; 3371bf215546Sopenharmony_ci cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 3372bf215546Sopenharmony_ci S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 3373bf215546Sopenharmony_ci S_028C74_RB_ALIGNED(meta.rb_aligned) | 3374bf215546Sopenharmony_ci S_028C74_PIPE_ALIGNED(meta.pipe_aligned); 3375bf215546Sopenharmony_ci 3376bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 15); 3377bf215546Sopenharmony_ci radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 3378bf215546Sopenharmony_ci radeon_emit(S_028C64_BASE_256B(cb_color_base >> 32)); /* CB_COLOR0_BASE_EXT */ 3379bf215546Sopenharmony_ci radeon_emit(cb->cb_color_attrib2); /* CB_COLOR0_ATTRIB2 */ 3380bf215546Sopenharmony_ci radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3381bf215546Sopenharmony_ci radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3382bf215546Sopenharmony_ci radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3383bf215546Sopenharmony_ci radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3384bf215546Sopenharmony_ci radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 3385bf215546Sopenharmony_ci radeon_emit(S_028C80_BASE_256B(cb_color_cmask >> 32)); /* CB_COLOR0_CMASK_BASE_EXT */ 3386bf215546Sopenharmony_ci radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 3387bf215546Sopenharmony_ci radeon_emit(S_028C88_BASE_256B(cb_color_fmask >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */ 3388bf215546Sopenharmony_ci radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3389bf215546Sopenharmony_ci radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3390bf215546Sopenharmony_ci radeon_emit(cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 3391bf215546Sopenharmony_ci radeon_emit(S_028C98_BASE_256B(cb_dcc_base >> 32)); /* CB_COLOR0_DCC_BASE_EXT */ 3392bf215546Sopenharmony_ci 3393bf215546Sopenharmony_ci radeon_set_context_reg(R_0287A0_CB_MRT0_EPITCH + i * 4, 3394bf215546Sopenharmony_ci S_0287A0_EPITCH(tex->surface.u.gfx9.epitch)); 3395bf215546Sopenharmony_ci } else { 3396bf215546Sopenharmony_ci /* Compute mutable surface parameters (GFX6-GFX8). */ 3397bf215546Sopenharmony_ci const struct legacy_surf_level *level_info = 3398bf215546Sopenharmony_ci &tex->surface.u.legacy.level[cb->base.u.tex.level]; 3399bf215546Sopenharmony_ci unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 3400bf215546Sopenharmony_ci unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 3401bf215546Sopenharmony_ci 3402bf215546Sopenharmony_ci cb_color_base += level_info->offset_256B; 3403bf215546Sopenharmony_ci /* Only macrotiled modes can set tile swizzle. */ 3404bf215546Sopenharmony_ci if (level_info->mode == RADEON_SURF_MODE_2D) 3405bf215546Sopenharmony_ci cb_color_base |= tex->surface.tile_swizzle; 3406bf215546Sopenharmony_ci 3407bf215546Sopenharmony_ci if (!tex->surface.fmask_offset) 3408bf215546Sopenharmony_ci cb_color_fmask = cb_color_base; 3409bf215546Sopenharmony_ci if (cb->base.u.tex.level > 0) 3410bf215546Sopenharmony_ci cb_color_cmask = cb_color_base; 3411bf215546Sopenharmony_ci if (cb_dcc_base) 3412bf215546Sopenharmony_ci cb_dcc_base += tex->surface.u.legacy.color.dcc_level[cb->base.u.tex.level].dcc_offset >> 8; 3413bf215546Sopenharmony_ci 3414bf215546Sopenharmony_ci pitch_tile_max = level_info->nblk_x / 8 - 1; 3415bf215546Sopenharmony_ci slice_tile_max = level_info->nblk_x * level_info->nblk_y / 64 - 1; 3416bf215546Sopenharmony_ci tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 3417bf215546Sopenharmony_ci 3418bf215546Sopenharmony_ci cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); 3419bf215546Sopenharmony_ci cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 3420bf215546Sopenharmony_ci cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 3421bf215546Sopenharmony_ci 3422bf215546Sopenharmony_ci if (tex->surface.fmask_offset) { 3423bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7) 3424bf215546Sopenharmony_ci cb_color_pitch |= 3425bf215546Sopenharmony_ci S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.color.fmask.pitch_in_pixels / 8 - 1); 3426bf215546Sopenharmony_ci cb_color_attrib |= 3427bf215546Sopenharmony_ci S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.color.fmask.tiling_index); 3428bf215546Sopenharmony_ci cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.color.fmask.slice_tile_max); 3429bf215546Sopenharmony_ci } else { 3430bf215546Sopenharmony_ci /* This must be set for fast clear to work without FMASK. */ 3431bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7) 3432bf215546Sopenharmony_ci cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 3433bf215546Sopenharmony_ci cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 3434bf215546Sopenharmony_ci cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 3435bf215546Sopenharmony_ci } 3436bf215546Sopenharmony_ci 3437bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 3438bf215546Sopenharmony_ci sctx->gfx_level >= GFX8 ? 14 : 13); 3439bf215546Sopenharmony_ci radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 3440bf215546Sopenharmony_ci radeon_emit(cb_color_pitch); /* CB_COLOR0_PITCH */ 3441bf215546Sopenharmony_ci radeon_emit(cb_color_slice); /* CB_COLOR0_SLICE */ 3442bf215546Sopenharmony_ci radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3443bf215546Sopenharmony_ci radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3444bf215546Sopenharmony_ci radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3445bf215546Sopenharmony_ci radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3446bf215546Sopenharmony_ci radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 3447bf215546Sopenharmony_ci radeon_emit(tex->surface.u.legacy.color.cmask_slice_tile_max); /* CB_COLOR0_CMASK_SLICE */ 3448bf215546Sopenharmony_ci radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 3449bf215546Sopenharmony_ci radeon_emit(cb_color_fmask_slice); /* CB_COLOR0_FMASK_SLICE */ 3450bf215546Sopenharmony_ci radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3451bf215546Sopenharmony_ci radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3452bf215546Sopenharmony_ci 3453bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */ 3454bf215546Sopenharmony_ci radeon_emit(cb_dcc_base); 3455bf215546Sopenharmony_ci } 3456bf215546Sopenharmony_ci } 3457bf215546Sopenharmony_ci for (; i < 8; i++) 3458bf215546Sopenharmony_ci if (sctx->framebuffer.dirty_cbufs & (1 << i)) 3459bf215546Sopenharmony_ci radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 3460bf215546Sopenharmony_ci 3461bf215546Sopenharmony_ci /* ZS buffer. */ 3462bf215546Sopenharmony_ci if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 3463bf215546Sopenharmony_ci struct si_surface *zb = (struct si_surface *)state->zsbuf; 3464bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)zb->base.texture; 3465bf215546Sopenharmony_ci unsigned db_z_info = zb->db_z_info; 3466bf215546Sopenharmony_ci unsigned db_stencil_info = zb->db_stencil_info; 3467bf215546Sopenharmony_ci unsigned db_htile_surface = zb->db_htile_surface; 3468bf215546Sopenharmony_ci 3469bf215546Sopenharmony_ci radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | 3470bf215546Sopenharmony_ci (zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA 3471bf215546Sopenharmony_ci : RADEON_PRIO_DEPTH_BUFFER)); 3472bf215546Sopenharmony_ci bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS); 3473bf215546Sopenharmony_ci 3474bf215546Sopenharmony_ci /* Set fields dependent on tc_compatile_htile. */ 3475bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX9 && tc_compat_htile) { 3476bf215546Sopenharmony_ci unsigned max_zplanes = 4; 3477bf215546Sopenharmony_ci 3478bf215546Sopenharmony_ci if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1) 3479bf215546Sopenharmony_ci max_zplanes = 2; 3480bf215546Sopenharmony_ci 3481bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 3482bf215546Sopenharmony_ci bool iterate256 = tex->buffer.b.b.nr_samples >= 2; 3483bf215546Sopenharmony_ci db_z_info |= S_028040_ITERATE_FLUSH(1) | 3484bf215546Sopenharmony_ci S_028040_ITERATE_256(iterate256); 3485bf215546Sopenharmony_ci db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled) | 3486bf215546Sopenharmony_ci S_028044_ITERATE_256(iterate256); 3487bf215546Sopenharmony_ci 3488bf215546Sopenharmony_ci /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */ 3489bf215546Sopenharmony_ci if (sctx->screen->info.has_two_planes_iterate256_bug && iterate256 && 3490bf215546Sopenharmony_ci !tex->htile_stencil_disabled && tex->buffer.b.b.nr_samples == 4) { 3491bf215546Sopenharmony_ci max_zplanes = 1; 3492bf215546Sopenharmony_ci } 3493bf215546Sopenharmony_ci } else { 3494bf215546Sopenharmony_ci db_z_info |= S_028038_ITERATE_FLUSH(1); 3495bf215546Sopenharmony_ci db_stencil_info |= S_02803C_ITERATE_FLUSH(1); 3496bf215546Sopenharmony_ci } 3497bf215546Sopenharmony_ci 3498bf215546Sopenharmony_ci db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1); 3499bf215546Sopenharmony_ci } 3500bf215546Sopenharmony_ci 3501bf215546Sopenharmony_ci unsigned level = zb->base.u.tex.level; 3502bf215546Sopenharmony_ci 3503bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 3504bf215546Sopenharmony_ci radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 3505bf215546Sopenharmony_ci radeon_set_context_reg(R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size); 3506bf215546Sopenharmony_ci 3507bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 3508bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028040_DB_Z_INFO, 6); 3509bf215546Sopenharmony_ci } else { 3510bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 7); 3511bf215546Sopenharmony_ci radeon_emit(S_02803C_RESOURCE_LEVEL(1)); /* DB_DEPTH_INFO */ 3512bf215546Sopenharmony_ci } 3513bf215546Sopenharmony_ci radeon_emit(db_z_info | /* DB_Z_INFO */ 3514bf215546Sopenharmony_ci S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 3515bf215546Sopenharmony_ci radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 3516bf215546Sopenharmony_ci radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 3517bf215546Sopenharmony_ci radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3518bf215546Sopenharmony_ci radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3519bf215546Sopenharmony_ci radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3520bf215546Sopenharmony_ci 3521bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028068_DB_Z_READ_BASE_HI, 5); 3522bf215546Sopenharmony_ci radeon_emit(zb->db_depth_base >> 32); /* DB_Z_READ_BASE_HI */ 3523bf215546Sopenharmony_ci radeon_emit(zb->db_stencil_base >> 32); /* DB_STENCIL_READ_BASE_HI */ 3524bf215546Sopenharmony_ci radeon_emit(zb->db_depth_base >> 32); /* DB_Z_WRITE_BASE_HI */ 3525bf215546Sopenharmony_ci radeon_emit(zb->db_stencil_base >> 32); /* DB_STENCIL_WRITE_BASE_HI */ 3526bf215546Sopenharmony_ci radeon_emit(zb->db_htile_data_base >> 32); /* DB_HTILE_DATA_BASE_HI */ 3527bf215546Sopenharmony_ci } else if (sctx->gfx_level == GFX9) { 3528bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028014_DB_HTILE_DATA_BASE, 3); 3529bf215546Sopenharmony_ci radeon_emit(zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */ 3530bf215546Sopenharmony_ci radeon_emit(S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */ 3531bf215546Sopenharmony_ci radeon_emit(zb->db_depth_size); /* DB_DEPTH_SIZE */ 3532bf215546Sopenharmony_ci 3533bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 10); 3534bf215546Sopenharmony_ci radeon_emit(db_z_info | /* DB_Z_INFO */ 3535bf215546Sopenharmony_ci S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 3536bf215546Sopenharmony_ci radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 3537bf215546Sopenharmony_ci radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 3538bf215546Sopenharmony_ci radeon_emit(S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */ 3539bf215546Sopenharmony_ci radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3540bf215546Sopenharmony_ci radeon_emit(S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ 3541bf215546Sopenharmony_ci radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3542bf215546Sopenharmony_ci radeon_emit(S_028054_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_WRITE_BASE_HI */ 3543bf215546Sopenharmony_ci radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3544bf215546Sopenharmony_ci radeon_emit(S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ 3545bf215546Sopenharmony_ci 3546bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028068_DB_Z_INFO2, 2); 3547bf215546Sopenharmony_ci radeon_emit(zb->db_z_info2); /* DB_Z_INFO2 */ 3548bf215546Sopenharmony_ci radeon_emit(zb->db_stencil_info2); /* DB_STENCIL_INFO2 */ 3549bf215546Sopenharmony_ci } else { 3550bf215546Sopenharmony_ci /* GFX6-GFX8 */ 3551bf215546Sopenharmony_ci /* Set fields dependent on tc_compatile_htile. */ 3552bf215546Sopenharmony_ci if (si_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) { 3553bf215546Sopenharmony_ci if (tex->tc_compatible_htile) { 3554bf215546Sopenharmony_ci db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 3555bf215546Sopenharmony_ci 3556bf215546Sopenharmony_ci /* 0 = full compression. N = only compress up to N-1 Z planes. */ 3557bf215546Sopenharmony_ci if (tex->buffer.b.b.nr_samples <= 1) 3558bf215546Sopenharmony_ci db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); 3559bf215546Sopenharmony_ci else if (tex->buffer.b.b.nr_samples <= 4) 3560bf215546Sopenharmony_ci db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); 3561bf215546Sopenharmony_ci else 3562bf215546Sopenharmony_ci db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); 3563bf215546Sopenharmony_ci } 3564bf215546Sopenharmony_ci } 3565bf215546Sopenharmony_ci 3566bf215546Sopenharmony_ci radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 3567bf215546Sopenharmony_ci 3568bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 9); 3569bf215546Sopenharmony_ci radeon_emit(zb->db_depth_info | /* DB_DEPTH_INFO */ 3570bf215546Sopenharmony_ci S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile)); 3571bf215546Sopenharmony_ci radeon_emit(db_z_info | /* DB_Z_INFO */ 3572bf215546Sopenharmony_ci S_028040_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 3573bf215546Sopenharmony_ci radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 3574bf215546Sopenharmony_ci radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 3575bf215546Sopenharmony_ci radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3576bf215546Sopenharmony_ci radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3577bf215546Sopenharmony_ci radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3578bf215546Sopenharmony_ci radeon_emit(zb->db_depth_size); /* DB_DEPTH_SIZE */ 3579bf215546Sopenharmony_ci radeon_emit(zb->db_depth_slice); /* DB_DEPTH_SLICE */ 3580bf215546Sopenharmony_ci } 3581bf215546Sopenharmony_ci 3582bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028028_DB_STENCIL_CLEAR, 2); 3583bf215546Sopenharmony_ci radeon_emit(tex->stencil_clear_value[level]); /* R_028028_DB_STENCIL_CLEAR */ 3584bf215546Sopenharmony_ci radeon_emit(fui(tex->depth_clear_value[level])); /* R_02802C_DB_DEPTH_CLEAR */ 3585bf215546Sopenharmony_ci 3586bf215546Sopenharmony_ci radeon_set_context_reg(R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 3587bf215546Sopenharmony_ci radeon_set_context_reg(R_028ABC_DB_HTILE_SURFACE, db_htile_surface); 3588bf215546Sopenharmony_ci } else if (sctx->framebuffer.dirty_zsbuf) { 3589bf215546Sopenharmony_ci if (sctx->gfx_level == GFX9) 3590bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 2); 3591bf215546Sopenharmony_ci else 3592bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028040_DB_Z_INFO, 2); 3593bf215546Sopenharmony_ci 3594bf215546Sopenharmony_ci /* Gfx11 only: DB_Z_INFO.NUM_SAMPLES should always match the framebuffer samples. 3595bf215546Sopenharmony_ci * It affects VRS and occlusion queries if depth and stencil are not bound. 3596bf215546Sopenharmony_ci */ 3597bf215546Sopenharmony_ci radeon_emit(S_028040_FORMAT(V_028040_Z_INVALID) | /* DB_Z_INFO */ 3598bf215546Sopenharmony_ci S_028040_NUM_SAMPLES(sctx->gfx_level == GFX11 ? sctx->framebuffer.log_samples : 0)); 3599bf215546Sopenharmony_ci radeon_emit(S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ 3600bf215546Sopenharmony_ci } 3601bf215546Sopenharmony_ci 3602bf215546Sopenharmony_ci /* Framebuffer dimensions. */ 3603bf215546Sopenharmony_ci /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_cs_preamble_state */ 3604bf215546Sopenharmony_ci radeon_set_context_reg(R_028208_PA_SC_WINDOW_SCISSOR_BR, 3605bf215546Sopenharmony_ci S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 3606bf215546Sopenharmony_ci 3607bf215546Sopenharmony_ci if (sctx->screen->dpbb_allowed) { 3608bf215546Sopenharmony_ci radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 3609bf215546Sopenharmony_ci radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 3610bf215546Sopenharmony_ci } 3611bf215546Sopenharmony_ci radeon_end(); 3612bf215546Sopenharmony_ci 3613bf215546Sopenharmony_ci si_update_display_dcc_dirty(sctx); 3614bf215546Sopenharmony_ci 3615bf215546Sopenharmony_ci sctx->framebuffer.dirty_cbufs = 0; 3616bf215546Sopenharmony_ci sctx->framebuffer.dirty_zsbuf = false; 3617bf215546Sopenharmony_ci} 3618bf215546Sopenharmony_ci 3619bf215546Sopenharmony_cistatic void si_emit_msaa_sample_locs(struct si_context *sctx) 3620bf215546Sopenharmony_ci{ 3621bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 3622bf215546Sopenharmony_ci struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 3623bf215546Sopenharmony_ci unsigned nr_samples = sctx->framebuffer.nr_samples; 3624bf215546Sopenharmony_ci bool has_msaa_sample_loc_bug = sctx->screen->info.has_msaa_sample_loc_bug; 3625bf215546Sopenharmony_ci 3626bf215546Sopenharmony_ci /* Smoothing (only possible with nr_samples == 1) uses the same 3627bf215546Sopenharmony_ci * sample locations as the MSAA it simulates. 3628bf215546Sopenharmony_ci */ 3629bf215546Sopenharmony_ci if (nr_samples <= 1 && sctx->smoothing_enabled) 3630bf215546Sopenharmony_ci nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 3631bf215546Sopenharmony_ci 3632bf215546Sopenharmony_ci /* On Polaris, the small primitive filter uses the sample locations 3633bf215546Sopenharmony_ci * even when MSAA is off, so we need to make sure they're set to 0. 3634bf215546Sopenharmony_ci * 3635bf215546Sopenharmony_ci * GFX10 uses sample locations unconditionally, so they always need 3636bf215546Sopenharmony_ci * to be set up. 3637bf215546Sopenharmony_ci */ 3638bf215546Sopenharmony_ci if ((nr_samples >= 2 || has_msaa_sample_loc_bug || sctx->gfx_level >= GFX10) && 3639bf215546Sopenharmony_ci nr_samples != sctx->sample_locs_num_samples) { 3640bf215546Sopenharmony_ci sctx->sample_locs_num_samples = nr_samples; 3641bf215546Sopenharmony_ci si_emit_sample_locations(cs, nr_samples); 3642bf215546Sopenharmony_ci } 3643bf215546Sopenharmony_ci 3644bf215546Sopenharmony_ci radeon_begin(cs); 3645bf215546Sopenharmony_ci 3646bf215546Sopenharmony_ci if (sctx->family >= CHIP_POLARIS10) { 3647bf215546Sopenharmony_ci unsigned small_prim_filter_cntl = 3648bf215546Sopenharmony_ci S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 3649bf215546Sopenharmony_ci /* line bug */ 3650bf215546Sopenharmony_ci S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12); 3651bf215546Sopenharmony_ci 3652bf215546Sopenharmony_ci /* For hardware with the sample location bug, the problem is that in order to use the small 3653bf215546Sopenharmony_ci * primitive filter, we need to explicitly set the sample locations to 0. But the DB doesn't 3654bf215546Sopenharmony_ci * properly process the change of sample locations without a flush, and so we can end up 3655bf215546Sopenharmony_ci * with incorrect Z values. 3656bf215546Sopenharmony_ci * 3657bf215546Sopenharmony_ci * Instead of doing a flush, just disable the small primitive filter when MSAA is 3658bf215546Sopenharmony_ci * force-disabled. 3659bf215546Sopenharmony_ci * 3660bf215546Sopenharmony_ci * The alternative of setting sample locations to 0 would require a DB flush to avoid 3661bf215546Sopenharmony_ci * Z errors, see https://bugs.freedesktop.org/show_bug.cgi?id=96908 3662bf215546Sopenharmony_ci */ 3663bf215546Sopenharmony_ci if (has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1 && !rs->multisample_enable) 3664bf215546Sopenharmony_ci small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 3665bf215546Sopenharmony_ci 3666bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 3667bf215546Sopenharmony_ci SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl); 3668bf215546Sopenharmony_ci } 3669bf215546Sopenharmony_ci 3670bf215546Sopenharmony_ci /* The exclusion bits can be set to improve rasterization efficiency 3671bf215546Sopenharmony_ci * if no sample lies on the pixel boundary (-8 sample offset). 3672bf215546Sopenharmony_ci */ 3673bf215546Sopenharmony_ci bool exclusion = sctx->gfx_level >= GFX7 && (!rs->multisample_enable || nr_samples != 16); 3674bf215546Sopenharmony_ci radeon_opt_set_context_reg( 3675bf215546Sopenharmony_ci sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, 3676bf215546Sopenharmony_ci S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); 3677bf215546Sopenharmony_ci radeon_end(); 3678bf215546Sopenharmony_ci} 3679bf215546Sopenharmony_ci 3680bf215546Sopenharmony_cistatic bool si_out_of_order_rasterization(struct si_context *sctx) 3681bf215546Sopenharmony_ci{ 3682bf215546Sopenharmony_ci struct si_state_blend *blend = sctx->queued.named.blend; 3683bf215546Sopenharmony_ci struct si_state_dsa *dsa = sctx->queued.named.dsa; 3684bf215546Sopenharmony_ci 3685bf215546Sopenharmony_ci if (!sctx->screen->has_out_of_order_rast) 3686bf215546Sopenharmony_ci return false; 3687bf215546Sopenharmony_ci 3688bf215546Sopenharmony_ci unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit; 3689bf215546Sopenharmony_ci 3690bf215546Sopenharmony_ci colormask &= blend->cb_target_enabled_4bit; 3691bf215546Sopenharmony_ci 3692bf215546Sopenharmony_ci /* Conservative: No logic op. */ 3693bf215546Sopenharmony_ci if (colormask && blend->logicop_enable) 3694bf215546Sopenharmony_ci return false; 3695bf215546Sopenharmony_ci 3696bf215546Sopenharmony_ci struct si_dsa_order_invariance dsa_order_invariant = {.zs = true, 3697bf215546Sopenharmony_ci .pass_set = true}; 3698bf215546Sopenharmony_ci 3699bf215546Sopenharmony_ci if (sctx->framebuffer.state.zsbuf) { 3700bf215546Sopenharmony_ci struct si_texture *zstex = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture; 3701bf215546Sopenharmony_ci bool has_stencil = zstex->surface.has_stencil; 3702bf215546Sopenharmony_ci dsa_order_invariant = dsa->order_invariance[has_stencil]; 3703bf215546Sopenharmony_ci if (!dsa_order_invariant.zs) 3704bf215546Sopenharmony_ci return false; 3705bf215546Sopenharmony_ci 3706bf215546Sopenharmony_ci /* The set of PS invocations is always order invariant, 3707bf215546Sopenharmony_ci * except when early Z/S tests are requested. */ 3708bf215546Sopenharmony_ci if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.writes_memory && 3709bf215546Sopenharmony_ci sctx->shader.ps.cso->info.base.fs.early_fragment_tests && 3710bf215546Sopenharmony_ci !dsa_order_invariant.pass_set) 3711bf215546Sopenharmony_ci return false; 3712bf215546Sopenharmony_ci 3713bf215546Sopenharmony_ci if (sctx->num_perfect_occlusion_queries != 0 && !dsa_order_invariant.pass_set) 3714bf215546Sopenharmony_ci return false; 3715bf215546Sopenharmony_ci } 3716bf215546Sopenharmony_ci 3717bf215546Sopenharmony_ci if (!colormask) 3718bf215546Sopenharmony_ci return true; 3719bf215546Sopenharmony_ci 3720bf215546Sopenharmony_ci unsigned blendmask = colormask & blend->blend_enable_4bit; 3721bf215546Sopenharmony_ci 3722bf215546Sopenharmony_ci if (blendmask) { 3723bf215546Sopenharmony_ci /* Only commutative blending. */ 3724bf215546Sopenharmony_ci if (blendmask & ~blend->commutative_4bit) 3725bf215546Sopenharmony_ci return false; 3726bf215546Sopenharmony_ci 3727bf215546Sopenharmony_ci if (!dsa_order_invariant.pass_set) 3728bf215546Sopenharmony_ci return false; 3729bf215546Sopenharmony_ci } 3730bf215546Sopenharmony_ci 3731bf215546Sopenharmony_ci if (colormask & ~blendmask) 3732bf215546Sopenharmony_ci return false; 3733bf215546Sopenharmony_ci 3734bf215546Sopenharmony_ci return true; 3735bf215546Sopenharmony_ci} 3736bf215546Sopenharmony_ci 3737bf215546Sopenharmony_cistatic void si_emit_msaa_config(struct si_context *sctx) 3738bf215546Sopenharmony_ci{ 3739bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 3740bf215546Sopenharmony_ci unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes; 3741bf215546Sopenharmony_ci /* 33% faster rendering to linear color buffers */ 3742bf215546Sopenharmony_ci bool dst_is_linear = sctx->framebuffer.any_dst_linear; 3743bf215546Sopenharmony_ci bool out_of_order_rast = si_out_of_order_rasterization(sctx); 3744bf215546Sopenharmony_ci unsigned sc_mode_cntl_1 = 3745bf215546Sopenharmony_ci S_028A4C_WALK_SIZE(dst_is_linear) | S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 3746bf215546Sopenharmony_ci S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 3747bf215546Sopenharmony_ci S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) | 3748bf215546Sopenharmony_ci S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) | 3749bf215546Sopenharmony_ci /* always 1: */ 3750bf215546Sopenharmony_ci S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 3751bf215546Sopenharmony_ci S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 3752bf215546Sopenharmony_ci S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1); 3753bf215546Sopenharmony_ci unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) | 3754bf215546Sopenharmony_ci S_028804_INTERPOLATE_COMP_Z(sctx->gfx_level < GFX11) | 3755bf215546Sopenharmony_ci S_028804_STATIC_ANCHOR_ASSOCIATIONS(1); 3756bf215546Sopenharmony_ci unsigned coverage_samples, z_samples; 3757bf215546Sopenharmony_ci struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 3758bf215546Sopenharmony_ci 3759bf215546Sopenharmony_ci /* S: Coverage samples (up to 16x): 3760bf215546Sopenharmony_ci * - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES) 3761bf215546Sopenharmony_ci * - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES) 3762bf215546Sopenharmony_ci * 3763bf215546Sopenharmony_ci * Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples): 3764bf215546Sopenharmony_ci * - Value seen by DB (DB_Z_INFO.NUM_SAMPLES) 3765bf215546Sopenharmony_ci * - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES) 3766bf215546Sopenharmony_ci * # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or 3767bf215546Sopenharmony_ci * # from the closest defined sample if Z is uncompressed (same quality as the number of 3768bf215546Sopenharmony_ci * # Z samples). 3769bf215546Sopenharmony_ci * 3770bf215546Sopenharmony_ci * F: Color samples (up to 8x, must be <= coverage samples): 3771bf215546Sopenharmony_ci * - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS) 3772bf215546Sopenharmony_ci * - PS iter samples (DB_EQAA.PS_ITER_SAMPLES) 3773bf215546Sopenharmony_ci * 3774bf215546Sopenharmony_ci * Can be anything between coverage and color samples: 3775bf215546Sopenharmony_ci * - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES) 3776bf215546Sopenharmony_ci * - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES) 3777bf215546Sopenharmony_ci * - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES) 3778bf215546Sopenharmony_ci * - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE) 3779bf215546Sopenharmony_ci * # All are currently set the same as coverage samples. 3780bf215546Sopenharmony_ci * 3781bf215546Sopenharmony_ci * If color samples < coverage samples, FMASK has a higher bpp to store an "unknown" 3782bf215546Sopenharmony_ci * flag for undefined color samples. A shader-based resolve must handle unknowns 3783bf215546Sopenharmony_ci * or mask them out with AND. Unknowns can also be guessed from neighbors via 3784bf215546Sopenharmony_ci * an edge-detect shader-based resolve, which is required to make "color samples = 1" 3785bf215546Sopenharmony_ci * useful. The CB resolve always drops unknowns. 3786bf215546Sopenharmony_ci * 3787bf215546Sopenharmony_ci * Sensible AA configurations: 3788bf215546Sopenharmony_ci * EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed 3789bf215546Sopenharmony_ci * EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed 3790bf215546Sopenharmony_ci * EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed 3791bf215546Sopenharmony_ci * EQAA 8s 8z 8f = 8x MSAA 3792bf215546Sopenharmony_ci * EQAA 8s 8z 4f - might look the same as 8x MSAA 3793bf215546Sopenharmony_ci * EQAA 8s 8z 2f - might look the same as 8x MSAA with low-density geometry 3794bf215546Sopenharmony_ci * EQAA 8s 4z 4f - might look the same as 8x MSAA if Z is compressed 3795bf215546Sopenharmony_ci * EQAA 8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed 3796bf215546Sopenharmony_ci * EQAA 4s 4z 4f = 4x MSAA 3797bf215546Sopenharmony_ci * EQAA 4s 4z 2f - might look the same as 4x MSAA with low-density geometry 3798bf215546Sopenharmony_ci * EQAA 2s 2z 2f = 2x MSAA 3799bf215546Sopenharmony_ci */ 3800bf215546Sopenharmony_ci coverage_samples = si_get_num_coverage_samples(sctx); 3801bf215546Sopenharmony_ci 3802bf215546Sopenharmony_ci /* The DX10 diamond test is not required by GL and decreases line rasterization 3803bf215546Sopenharmony_ci * performance, so don't use it. 3804bf215546Sopenharmony_ci */ 3805bf215546Sopenharmony_ci unsigned sc_line_cntl = 0; 3806bf215546Sopenharmony_ci unsigned sc_aa_config = 0; 3807bf215546Sopenharmony_ci 3808bf215546Sopenharmony_ci if (coverage_samples > 1 && rs->multisample_enable) { 3809bf215546Sopenharmony_ci /* distance from the pixel center, indexed by log2(nr_samples) */ 3810bf215546Sopenharmony_ci static unsigned max_dist[] = { 3811bf215546Sopenharmony_ci 0, /* unused */ 3812bf215546Sopenharmony_ci 4, /* 2x MSAA */ 3813bf215546Sopenharmony_ci 6, /* 4x MSAA */ 3814bf215546Sopenharmony_ci 7, /* 8x MSAA */ 3815bf215546Sopenharmony_ci 8, /* 16x MSAA */ 3816bf215546Sopenharmony_ci }; 3817bf215546Sopenharmony_ci unsigned log_samples = util_logbase2(coverage_samples); 3818bf215546Sopenharmony_ci 3819bf215546Sopenharmony_ci sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1) | 3820bf215546Sopenharmony_ci S_028BDC_PERPENDICULAR_ENDCAP_ENA(rs->perpendicular_end_caps) | 3821bf215546Sopenharmony_ci S_028BDC_EXTRA_DX_DY_PRECISION(rs->perpendicular_end_caps && 3822bf215546Sopenharmony_ci (sctx->family == CHIP_VEGA20 || 3823bf215546Sopenharmony_ci sctx->gfx_level >= GFX10)); 3824bf215546Sopenharmony_ci sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 3825bf215546Sopenharmony_ci S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | 3826bf215546Sopenharmony_ci S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | 3827bf215546Sopenharmony_ci S_028BE0_COVERED_CENTROID_IS_CENTER(sctx->gfx_level >= GFX10_3); 3828bf215546Sopenharmony_ci } 3829bf215546Sopenharmony_ci 3830bf215546Sopenharmony_ci if (sctx->framebuffer.nr_samples > 1) { 3831bf215546Sopenharmony_ci if (sctx->framebuffer.state.zsbuf) { 3832bf215546Sopenharmony_ci z_samples = sctx->framebuffer.state.zsbuf->texture->nr_samples; 3833bf215546Sopenharmony_ci z_samples = MAX2(1, z_samples); 3834bf215546Sopenharmony_ci } else { 3835bf215546Sopenharmony_ci z_samples = coverage_samples; 3836bf215546Sopenharmony_ci } 3837bf215546Sopenharmony_ci unsigned log_samples = util_logbase2(coverage_samples); 3838bf215546Sopenharmony_ci unsigned log_z_samples = util_logbase2(z_samples); 3839bf215546Sopenharmony_ci unsigned ps_iter_samples = si_get_ps_iter_samples(sctx); 3840bf215546Sopenharmony_ci unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples); 3841bf215546Sopenharmony_ci if (sctx->framebuffer.nr_samples > 1) { 3842bf215546Sopenharmony_ci db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | 3843bf215546Sopenharmony_ci S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 3844bf215546Sopenharmony_ci S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 3845bf215546Sopenharmony_ci S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); 3846bf215546Sopenharmony_ci sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); 3847bf215546Sopenharmony_ci } else if (sctx->smoothing_enabled) { 3848bf215546Sopenharmony_ci db_eqaa |= S_028804_OVERRASTERIZATION_AMOUNT(log_samples); 3849bf215546Sopenharmony_ci } 3850bf215546Sopenharmony_ci } 3851bf215546Sopenharmony_ci 3852bf215546Sopenharmony_ci radeon_begin(cs); 3853bf215546Sopenharmony_ci 3854bf215546Sopenharmony_ci /* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */ 3855bf215546Sopenharmony_ci radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL, 3856bf215546Sopenharmony_ci sc_line_cntl, sc_aa_config); 3857bf215546Sopenharmony_ci /* R_028804_DB_EQAA */ 3858bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa); 3859bf215546Sopenharmony_ci /* R_028A4C_PA_SC_MODE_CNTL_1 */ 3860bf215546Sopenharmony_ci radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, 3861bf215546Sopenharmony_ci sc_mode_cntl_1); 3862bf215546Sopenharmony_ci radeon_end_update_context_roll(sctx); 3863bf215546Sopenharmony_ci} 3864bf215546Sopenharmony_ci 3865bf215546Sopenharmony_civoid si_update_ps_iter_samples(struct si_context *sctx) 3866bf215546Sopenharmony_ci{ 3867bf215546Sopenharmony_ci if (sctx->framebuffer.nr_samples > 1) 3868bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3869bf215546Sopenharmony_ci if (sctx->screen->dpbb_allowed) 3870bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 3871bf215546Sopenharmony_ci} 3872bf215546Sopenharmony_ci 3873bf215546Sopenharmony_cistatic void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 3874bf215546Sopenharmony_ci{ 3875bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 3876bf215546Sopenharmony_ci 3877bf215546Sopenharmony_ci /* The hardware can only do sample shading with 2^n samples. */ 3878bf215546Sopenharmony_ci min_samples = util_next_power_of_two(min_samples); 3879bf215546Sopenharmony_ci 3880bf215546Sopenharmony_ci if (sctx->ps_iter_samples == min_samples) 3881bf215546Sopenharmony_ci return; 3882bf215546Sopenharmony_ci 3883bf215546Sopenharmony_ci sctx->ps_iter_samples = min_samples; 3884bf215546Sopenharmony_ci 3885bf215546Sopenharmony_ci si_ps_key_update_sample_shading(sctx); 3886bf215546Sopenharmony_ci si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 3887bf215546Sopenharmony_ci sctx->do_update_shaders = true; 3888bf215546Sopenharmony_ci 3889bf215546Sopenharmony_ci si_update_ps_iter_samples(sctx); 3890bf215546Sopenharmony_ci} 3891bf215546Sopenharmony_ci 3892bf215546Sopenharmony_ci/* 3893bf215546Sopenharmony_ci * Samplers 3894bf215546Sopenharmony_ci */ 3895bf215546Sopenharmony_ci 3896bf215546Sopenharmony_ci/** 3897bf215546Sopenharmony_ci * Build the sampler view descriptor for a buffer texture. 3898bf215546Sopenharmony_ci * @param state 256-bit descriptor; only the high 128 bits are filled in 3899bf215546Sopenharmony_ci */ 3900bf215546Sopenharmony_civoid si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, 3901bf215546Sopenharmony_ci enum pipe_format format, unsigned offset, unsigned num_elements, 3902bf215546Sopenharmony_ci uint32_t *state) 3903bf215546Sopenharmony_ci{ 3904bf215546Sopenharmony_ci const struct util_format_description *desc; 3905bf215546Sopenharmony_ci unsigned stride; 3906bf215546Sopenharmony_ci unsigned num_records; 3907bf215546Sopenharmony_ci 3908bf215546Sopenharmony_ci desc = util_format_description(format); 3909bf215546Sopenharmony_ci stride = desc->block.bits / 8; 3910bf215546Sopenharmony_ci 3911bf215546Sopenharmony_ci num_records = num_elements; 3912bf215546Sopenharmony_ci num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 3913bf215546Sopenharmony_ci 3914bf215546Sopenharmony_ci /* The NUM_RECORDS field has a different meaning depending on the chip, 3915bf215546Sopenharmony_ci * instruction type, STRIDE, and SWIZZLE_ENABLE. 3916bf215546Sopenharmony_ci * 3917bf215546Sopenharmony_ci * GFX6-7,10: 3918bf215546Sopenharmony_ci * - If STRIDE == 0, it's in byte units. 3919bf215546Sopenharmony_ci * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. 3920bf215546Sopenharmony_ci * 3921bf215546Sopenharmony_ci * GFX8: 3922bf215546Sopenharmony_ci * - For SMEM and STRIDE == 0, it's in byte units. 3923bf215546Sopenharmony_ci * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3924bf215546Sopenharmony_ci * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. 3925bf215546Sopenharmony_ci * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE. 3926bf215546Sopenharmony_ci * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_- 3927bf215546Sopenharmony_ci * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when 3928bf215546Sopenharmony_ci * using SMEM. This can be done in the shader by clearing STRIDE with s_and. 3929bf215546Sopenharmony_ci * That way the same descriptor can be used by both SMEM and VMEM. 3930bf215546Sopenharmony_ci * 3931bf215546Sopenharmony_ci * GFX9: 3932bf215546Sopenharmony_ci * - For SMEM and STRIDE == 0, it's in byte units. 3933bf215546Sopenharmony_ci * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3934bf215546Sopenharmony_ci * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. 3935bf215546Sopenharmony_ci * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. 3936bf215546Sopenharmony_ci */ 3937bf215546Sopenharmony_ci if (screen->info.gfx_level == GFX8) 3938bf215546Sopenharmony_ci num_records *= stride; 3939bf215546Sopenharmony_ci 3940bf215546Sopenharmony_ci state[4] = 0; 3941bf215546Sopenharmony_ci state[5] = S_008F04_STRIDE(stride); 3942bf215546Sopenharmony_ci state[6] = num_records; 3943bf215546Sopenharmony_ci state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3944bf215546Sopenharmony_ci S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3945bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3946bf215546Sopenharmony_ci S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])); 3947bf215546Sopenharmony_ci 3948bf215546Sopenharmony_ci if (screen->info.gfx_level >= GFX10) { 3949bf215546Sopenharmony_ci const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&screen->info)[format]; 3950bf215546Sopenharmony_ci 3951bf215546Sopenharmony_ci /* OOB_SELECT chooses the out-of-bounds check: 3952bf215546Sopenharmony_ci * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE) 3953bf215546Sopenharmony_ci * - 1: index >= NUM_RECORDS 3954bf215546Sopenharmony_ci * - 2: NUM_RECORDS == 0 3955bf215546Sopenharmony_ci * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS 3956bf215546Sopenharmony_ci * else: swizzle_address >= NUM_RECORDS 3957bf215546Sopenharmony_ci */ 3958bf215546Sopenharmony_ci state[7] |= S_008F0C_FORMAT(fmt->img_format) | 3959bf215546Sopenharmony_ci S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | 3960bf215546Sopenharmony_ci S_008F0C_RESOURCE_LEVEL(screen->info.gfx_level < GFX11); 3961bf215546Sopenharmony_ci } else { 3962bf215546Sopenharmony_ci int first_non_void; 3963bf215546Sopenharmony_ci unsigned num_format, data_format; 3964bf215546Sopenharmony_ci 3965bf215546Sopenharmony_ci first_non_void = util_format_get_first_non_void_channel(format); 3966bf215546Sopenharmony_ci num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void); 3967bf215546Sopenharmony_ci data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void); 3968bf215546Sopenharmony_ci 3969bf215546Sopenharmony_ci state[7] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); 3970bf215546Sopenharmony_ci } 3971bf215546Sopenharmony_ci} 3972bf215546Sopenharmony_ci 3973bf215546Sopenharmony_cistatic unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4]) 3974bf215546Sopenharmony_ci{ 3975bf215546Sopenharmony_ci unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3976bf215546Sopenharmony_ci 3977bf215546Sopenharmony_ci if (swizzle[3] == PIPE_SWIZZLE_X) { 3978bf215546Sopenharmony_ci /* For the pre-defined border color values (white, opaque 3979bf215546Sopenharmony_ci * black, transparent black), the only thing that matters is 3980bf215546Sopenharmony_ci * that the alpha channel winds up in the correct place 3981bf215546Sopenharmony_ci * (because the RGB channels are all the same) so either of 3982bf215546Sopenharmony_ci * these enumerations will work. 3983bf215546Sopenharmony_ci */ 3984bf215546Sopenharmony_ci if (swizzle[2] == PIPE_SWIZZLE_Y) 3985bf215546Sopenharmony_ci bc_swizzle = V_008F20_BC_SWIZZLE_WZYX; 3986bf215546Sopenharmony_ci else 3987bf215546Sopenharmony_ci bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ; 3988bf215546Sopenharmony_ci } else if (swizzle[0] == PIPE_SWIZZLE_X) { 3989bf215546Sopenharmony_ci if (swizzle[1] == PIPE_SWIZZLE_Y) 3990bf215546Sopenharmony_ci bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3991bf215546Sopenharmony_ci else 3992bf215546Sopenharmony_ci bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ; 3993bf215546Sopenharmony_ci } else if (swizzle[1] == PIPE_SWIZZLE_X) { 3994bf215546Sopenharmony_ci bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ; 3995bf215546Sopenharmony_ci } else if (swizzle[2] == PIPE_SWIZZLE_X) { 3996bf215546Sopenharmony_ci bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW; 3997bf215546Sopenharmony_ci } 3998bf215546Sopenharmony_ci 3999bf215546Sopenharmony_ci return bc_swizzle; 4000bf215546Sopenharmony_ci} 4001bf215546Sopenharmony_ci 4002bf215546Sopenharmony_ci/** 4003bf215546Sopenharmony_ci * Build the sampler view descriptor for a texture. 4004bf215546Sopenharmony_ci */ 4005bf215546Sopenharmony_cistatic void gfx10_make_texture_descriptor( 4006bf215546Sopenharmony_ci struct si_screen *screen, struct si_texture *tex, bool sampler, enum pipe_texture_target target, 4007bf215546Sopenharmony_ci enum pipe_format pipe_format, const unsigned char state_swizzle[4], unsigned first_level, 4008bf215546Sopenharmony_ci unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height, 4009bf215546Sopenharmony_ci unsigned depth, uint32_t *state, uint32_t *fmask_state) 4010bf215546Sopenharmony_ci{ 4011bf215546Sopenharmony_ci struct pipe_resource *res = &tex->buffer.b.b; 4012bf215546Sopenharmony_ci const struct util_format_description *desc; 4013bf215546Sopenharmony_ci unsigned img_format; 4014bf215546Sopenharmony_ci unsigned char swizzle[4]; 4015bf215546Sopenharmony_ci unsigned type; 4016bf215546Sopenharmony_ci uint64_t va; 4017bf215546Sopenharmony_ci 4018bf215546Sopenharmony_ci desc = util_format_description(pipe_format); 4019bf215546Sopenharmony_ci img_format = ac_get_gfx10_format_table(&screen->info)[pipe_format].img_format; 4020bf215546Sopenharmony_ci 4021bf215546Sopenharmony_ci if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 4022bf215546Sopenharmony_ci const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 4023bf215546Sopenharmony_ci const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 4024bf215546Sopenharmony_ci const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 4025bf215546Sopenharmony_ci bool is_stencil = false; 4026bf215546Sopenharmony_ci 4027bf215546Sopenharmony_ci switch (pipe_format) { 4028bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4029bf215546Sopenharmony_ci case PIPE_FORMAT_X32_S8X24_UINT: 4030bf215546Sopenharmony_ci case PIPE_FORMAT_X8Z24_UNORM: 4031bf215546Sopenharmony_ci util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 4032bf215546Sopenharmony_ci is_stencil = true; 4033bf215546Sopenharmony_ci break; 4034bf215546Sopenharmony_ci case PIPE_FORMAT_X24S8_UINT: 4035bf215546Sopenharmony_ci /* 4036bf215546Sopenharmony_ci * X24S8 is implemented as an 8_8_8_8 data format, to 4037bf215546Sopenharmony_ci * fix texture gathers. This affects at least 4038bf215546Sopenharmony_ci * GL45-CTS.texture_cube_map_array.sampling on GFX8. 4039bf215546Sopenharmony_ci */ 4040bf215546Sopenharmony_ci util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 4041bf215546Sopenharmony_ci is_stencil = true; 4042bf215546Sopenharmony_ci break; 4043bf215546Sopenharmony_ci default: 4044bf215546Sopenharmony_ci util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 4045bf215546Sopenharmony_ci is_stencil = pipe_format == PIPE_FORMAT_S8_UINT; 4046bf215546Sopenharmony_ci } 4047bf215546Sopenharmony_ci 4048bf215546Sopenharmony_ci if (tex->upgraded_depth && !is_stencil) { 4049bf215546Sopenharmony_ci if (screen->info.gfx_level >= GFX11) { 4050bf215546Sopenharmony_ci assert(img_format == V_008F0C_GFX11_FORMAT_32_FLOAT); 4051bf215546Sopenharmony_ci img_format = V_008F0C_GFX11_FORMAT_32_FLOAT_CLAMP; 4052bf215546Sopenharmony_ci } else { 4053bf215546Sopenharmony_ci assert(img_format == V_008F0C_GFX10_FORMAT_32_FLOAT); 4054bf215546Sopenharmony_ci img_format = V_008F0C_GFX10_FORMAT_32_FLOAT_CLAMP; 4055bf215546Sopenharmony_ci } 4056bf215546Sopenharmony_ci } 4057bf215546Sopenharmony_ci } else { 4058bf215546Sopenharmony_ci util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 4059bf215546Sopenharmony_ci } 4060bf215546Sopenharmony_ci 4061bf215546Sopenharmony_ci if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY)) { 4062bf215546Sopenharmony_ci /* For the purpose of shader images, treat cube maps as 2D 4063bf215546Sopenharmony_ci * arrays. 4064bf215546Sopenharmony_ci */ 4065bf215546Sopenharmony_ci type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 4066bf215546Sopenharmony_ci } else { 4067bf215546Sopenharmony_ci type = si_tex_dim(screen, tex, target, res->nr_samples); 4068bf215546Sopenharmony_ci } 4069bf215546Sopenharmony_ci 4070bf215546Sopenharmony_ci if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 4071bf215546Sopenharmony_ci height = 1; 4072bf215546Sopenharmony_ci depth = res->array_size; 4073bf215546Sopenharmony_ci } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 4074bf215546Sopenharmony_ci if (sampler || res->target != PIPE_TEXTURE_3D) 4075bf215546Sopenharmony_ci depth = res->array_size; 4076bf215546Sopenharmony_ci } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 4077bf215546Sopenharmony_ci depth = res->array_size / 6; 4078bf215546Sopenharmony_ci 4079bf215546Sopenharmony_ci state[0] = 0; 4080bf215546Sopenharmony_ci state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1); 4081bf215546Sopenharmony_ci state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 4082bf215546Sopenharmony_ci S_00A008_RESOURCE_LEVEL(screen->info.gfx_level < GFX11); 4083bf215546Sopenharmony_ci 4084bf215546Sopenharmony_ci state[3] = 4085bf215546Sopenharmony_ci S_00A00C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 4086bf215546Sopenharmony_ci S_00A00C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 4087bf215546Sopenharmony_ci S_00A00C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 4088bf215546Sopenharmony_ci S_00A00C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 4089bf215546Sopenharmony_ci S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) | 4090bf215546Sopenharmony_ci S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) | 4091bf215546Sopenharmony_ci S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type); 4092bf215546Sopenharmony_ci /* Depth is the the last accessible layer on gfx9+. The hw doesn't need 4093bf215546Sopenharmony_ci * to know the total number of layers. 4094bf215546Sopenharmony_ci */ 4095bf215546Sopenharmony_ci state[4] = 4096bf215546Sopenharmony_ci S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) | 4097bf215546Sopenharmony_ci S_00A010_BASE_ARRAY(first_layer); 4098bf215546Sopenharmony_ci state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) | 4099bf215546Sopenharmony_ci S_00A014_PERF_MOD(4); 4100bf215546Sopenharmony_ci 4101bf215546Sopenharmony_ci unsigned max_mip = res->nr_samples > 1 ? util_logbase2(res->nr_samples) : 4102bf215546Sopenharmony_ci tex->buffer.b.b.last_level; 4103bf215546Sopenharmony_ci 4104bf215546Sopenharmony_ci if (screen->info.gfx_level >= GFX11) { 4105bf215546Sopenharmony_ci state[1] |= S_00A004_MAX_MIP(max_mip); 4106bf215546Sopenharmony_ci } else { 4107bf215546Sopenharmony_ci state[5] |= S_00A014_MAX_MIP(max_mip); 4108bf215546Sopenharmony_ci } 4109bf215546Sopenharmony_ci state[6] = 0; 4110bf215546Sopenharmony_ci state[7] = 0; 4111bf215546Sopenharmony_ci 4112bf215546Sopenharmony_ci if (vi_dcc_enabled(tex, first_level)) { 4113bf215546Sopenharmony_ci state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | 4114bf215546Sopenharmony_ci S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) | 4115bf215546Sopenharmony_ci S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); 4116bf215546Sopenharmony_ci } 4117bf215546Sopenharmony_ci 4118bf215546Sopenharmony_ci /* Initialize the sampler view for FMASK. */ 4119bf215546Sopenharmony_ci if (tex->surface.fmask_offset) { 4120bf215546Sopenharmony_ci uint32_t format; 4121bf215546Sopenharmony_ci 4122bf215546Sopenharmony_ci va = tex->buffer.gpu_address + tex->surface.fmask_offset; 4123bf215546Sopenharmony_ci 4124bf215546Sopenharmony_ci#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) 4125bf215546Sopenharmony_ci switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 4126bf215546Sopenharmony_ci case FMASK(2, 1): 4127bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F1; 4128bf215546Sopenharmony_ci break; 4129bf215546Sopenharmony_ci case FMASK(2, 2): 4130bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2; 4131bf215546Sopenharmony_ci break; 4132bf215546Sopenharmony_ci case FMASK(4, 1): 4133bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F1; 4134bf215546Sopenharmony_ci break; 4135bf215546Sopenharmony_ci case FMASK(4, 2): 4136bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F2; 4137bf215546Sopenharmony_ci break; 4138bf215546Sopenharmony_ci case FMASK(4, 4): 4139bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4; 4140bf215546Sopenharmony_ci break; 4141bf215546Sopenharmony_ci case FMASK(8, 1): 4142bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK8_S8_F1; 4143bf215546Sopenharmony_ci break; 4144bf215546Sopenharmony_ci case FMASK(8, 2): 4145bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK16_S8_F2; 4146bf215546Sopenharmony_ci break; 4147bf215546Sopenharmony_ci case FMASK(8, 4): 4148bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F4; 4149bf215546Sopenharmony_ci break; 4150bf215546Sopenharmony_ci case FMASK(8, 8): 4151bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8; 4152bf215546Sopenharmony_ci break; 4153bf215546Sopenharmony_ci case FMASK(16, 1): 4154bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK16_S16_F1; 4155bf215546Sopenharmony_ci break; 4156bf215546Sopenharmony_ci case FMASK(16, 2): 4157bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK32_S16_F2; 4158bf215546Sopenharmony_ci break; 4159bf215546Sopenharmony_ci case FMASK(16, 4): 4160bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F4; 4161bf215546Sopenharmony_ci break; 4162bf215546Sopenharmony_ci case FMASK(16, 8): 4163bf215546Sopenharmony_ci format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F8; 4164bf215546Sopenharmony_ci break; 4165bf215546Sopenharmony_ci default: 4166bf215546Sopenharmony_ci unreachable("invalid nr_samples"); 4167bf215546Sopenharmony_ci } 4168bf215546Sopenharmony_ci#undef FMASK 4169bf215546Sopenharmony_ci fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle; 4170bf215546Sopenharmony_ci fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) | 4171bf215546Sopenharmony_ci S_00A004_WIDTH_LO(width - 1); 4172bf215546Sopenharmony_ci fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 4173bf215546Sopenharmony_ci S_00A008_RESOURCE_LEVEL(1); 4174bf215546Sopenharmony_ci fmask_state[3] = 4175bf215546Sopenharmony_ci S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 4176bf215546Sopenharmony_ci S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 4177bf215546Sopenharmony_ci S_00A00C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 4178bf215546Sopenharmony_ci S_00A00C_TYPE(si_tex_dim(screen, tex, target, 0)); 4179bf215546Sopenharmony_ci fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer); 4180bf215546Sopenharmony_ci fmask_state[5] = 0; 4181bf215546Sopenharmony_ci fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1); 4182bf215546Sopenharmony_ci fmask_state[7] = 0; 4183bf215546Sopenharmony_ci } 4184bf215546Sopenharmony_ci} 4185bf215546Sopenharmony_ci 4186bf215546Sopenharmony_ci/** 4187bf215546Sopenharmony_ci * Build the sampler view descriptor for a texture (SI-GFX9). 4188bf215546Sopenharmony_ci */ 4189bf215546Sopenharmony_cistatic void si_make_texture_descriptor(struct si_screen *screen, struct si_texture *tex, 4190bf215546Sopenharmony_ci bool sampler, enum pipe_texture_target target, 4191bf215546Sopenharmony_ci enum pipe_format pipe_format, 4192bf215546Sopenharmony_ci const unsigned char state_swizzle[4], unsigned first_level, 4193bf215546Sopenharmony_ci unsigned last_level, unsigned first_layer, 4194bf215546Sopenharmony_ci unsigned last_layer, unsigned width, unsigned height, 4195bf215546Sopenharmony_ci unsigned depth, uint32_t *state, uint32_t *fmask_state) 4196bf215546Sopenharmony_ci{ 4197bf215546Sopenharmony_ci struct pipe_resource *res = &tex->buffer.b.b; 4198bf215546Sopenharmony_ci const struct util_format_description *desc; 4199bf215546Sopenharmony_ci unsigned char swizzle[4]; 4200bf215546Sopenharmony_ci int first_non_void; 4201bf215546Sopenharmony_ci unsigned num_format, data_format, type, num_samples; 4202bf215546Sopenharmony_ci uint64_t va; 4203bf215546Sopenharmony_ci 4204bf215546Sopenharmony_ci desc = util_format_description(pipe_format); 4205bf215546Sopenharmony_ci 4206bf215546Sopenharmony_ci num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? MAX2(1, res->nr_samples) 4207bf215546Sopenharmony_ci : MAX2(1, res->nr_storage_samples); 4208bf215546Sopenharmony_ci 4209bf215546Sopenharmony_ci if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 4210bf215546Sopenharmony_ci const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 4211bf215546Sopenharmony_ci const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 4212bf215546Sopenharmony_ci const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 4213bf215546Sopenharmony_ci 4214bf215546Sopenharmony_ci switch (pipe_format) { 4215bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4216bf215546Sopenharmony_ci case PIPE_FORMAT_X32_S8X24_UINT: 4217bf215546Sopenharmony_ci case PIPE_FORMAT_X8Z24_UNORM: 4218bf215546Sopenharmony_ci util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 4219bf215546Sopenharmony_ci break; 4220bf215546Sopenharmony_ci case PIPE_FORMAT_X24S8_UINT: 4221bf215546Sopenharmony_ci /* 4222bf215546Sopenharmony_ci * X24S8 is implemented as an 8_8_8_8 data format, to 4223bf215546Sopenharmony_ci * fix texture gathers. This affects at least 4224bf215546Sopenharmony_ci * GL45-CTS.texture_cube_map_array.sampling on GFX8. 4225bf215546Sopenharmony_ci */ 4226bf215546Sopenharmony_ci if (screen->info.gfx_level <= GFX8) 4227bf215546Sopenharmony_ci util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 4228bf215546Sopenharmony_ci else 4229bf215546Sopenharmony_ci util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 4230bf215546Sopenharmony_ci break; 4231bf215546Sopenharmony_ci default: 4232bf215546Sopenharmony_ci util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 4233bf215546Sopenharmony_ci } 4234bf215546Sopenharmony_ci } else { 4235bf215546Sopenharmony_ci util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 4236bf215546Sopenharmony_ci } 4237bf215546Sopenharmony_ci 4238bf215546Sopenharmony_ci first_non_void = util_format_get_first_non_void_channel(pipe_format); 4239bf215546Sopenharmony_ci 4240bf215546Sopenharmony_ci switch (pipe_format) { 4241bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4242bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4243bf215546Sopenharmony_ci break; 4244bf215546Sopenharmony_ci default: 4245bf215546Sopenharmony_ci if (first_non_void < 0) { 4246bf215546Sopenharmony_ci if (util_format_is_compressed(pipe_format)) { 4247bf215546Sopenharmony_ci switch (pipe_format) { 4248bf215546Sopenharmony_ci case PIPE_FORMAT_DXT1_SRGB: 4249bf215546Sopenharmony_ci case PIPE_FORMAT_DXT1_SRGBA: 4250bf215546Sopenharmony_ci case PIPE_FORMAT_DXT3_SRGBA: 4251bf215546Sopenharmony_ci case PIPE_FORMAT_DXT5_SRGBA: 4252bf215546Sopenharmony_ci case PIPE_FORMAT_BPTC_SRGBA: 4253bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_SRGB8: 4254bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_SRGB8A1: 4255bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_SRGBA8: 4256bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 4257bf215546Sopenharmony_ci break; 4258bf215546Sopenharmony_ci case PIPE_FORMAT_RGTC1_SNORM: 4259bf215546Sopenharmony_ci case PIPE_FORMAT_LATC1_SNORM: 4260bf215546Sopenharmony_ci case PIPE_FORMAT_RGTC2_SNORM: 4261bf215546Sopenharmony_ci case PIPE_FORMAT_LATC2_SNORM: 4262bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_R11_SNORM: 4263bf215546Sopenharmony_ci case PIPE_FORMAT_ETC2_RG11_SNORM: 4264bf215546Sopenharmony_ci /* implies float, so use SNORM/UNORM to determine 4265bf215546Sopenharmony_ci whether data is signed or not */ 4266bf215546Sopenharmony_ci case PIPE_FORMAT_BPTC_RGB_FLOAT: 4267bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 4268bf215546Sopenharmony_ci break; 4269bf215546Sopenharmony_ci default: 4270bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4271bf215546Sopenharmony_ci break; 4272bf215546Sopenharmony_ci } 4273bf215546Sopenharmony_ci } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 4274bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4275bf215546Sopenharmony_ci } else { 4276bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 4277bf215546Sopenharmony_ci } 4278bf215546Sopenharmony_ci } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 4279bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 4280bf215546Sopenharmony_ci } else { 4281bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4282bf215546Sopenharmony_ci 4283bf215546Sopenharmony_ci switch (desc->channel[first_non_void].type) { 4284bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_FLOAT: 4285bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 4286bf215546Sopenharmony_ci break; 4287bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_SIGNED: 4288bf215546Sopenharmony_ci if (desc->channel[first_non_void].normalized) 4289bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 4290bf215546Sopenharmony_ci else if (desc->channel[first_non_void].pure_integer) 4291bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_SINT; 4292bf215546Sopenharmony_ci else 4293bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 4294bf215546Sopenharmony_ci break; 4295bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_UNSIGNED: 4296bf215546Sopenharmony_ci if (desc->channel[first_non_void].normalized) 4297bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4298bf215546Sopenharmony_ci else if (desc->channel[first_non_void].pure_integer) 4299bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_UINT; 4300bf215546Sopenharmony_ci else 4301bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 4302bf215546Sopenharmony_ci } 4303bf215546Sopenharmony_ci } 4304bf215546Sopenharmony_ci } 4305bf215546Sopenharmony_ci 4306bf215546Sopenharmony_ci data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void); 4307bf215546Sopenharmony_ci if (data_format == ~0) { 4308bf215546Sopenharmony_ci data_format = 0; 4309bf215546Sopenharmony_ci } 4310bf215546Sopenharmony_ci 4311bf215546Sopenharmony_ci /* S8 with Z32 HTILE needs a special format. */ 4312bf215546Sopenharmony_ci if (screen->info.gfx_level == GFX9 && pipe_format == PIPE_FORMAT_S8_UINT) 4313bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_S8_32; 4314bf215546Sopenharmony_ci 4315bf215546Sopenharmony_ci if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY || 4316bf215546Sopenharmony_ci (screen->info.gfx_level <= GFX8 && res->target == PIPE_TEXTURE_3D))) { 4317bf215546Sopenharmony_ci /* For the purpose of shader images, treat cube maps and 3D 4318bf215546Sopenharmony_ci * textures as 2D arrays. For 3D textures, the address 4319bf215546Sopenharmony_ci * calculations for mipmaps are different, so we rely on the 4320bf215546Sopenharmony_ci * caller to effectively disable mipmaps. 4321bf215546Sopenharmony_ci */ 4322bf215546Sopenharmony_ci type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 4323bf215546Sopenharmony_ci 4324bf215546Sopenharmony_ci assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 4325bf215546Sopenharmony_ci } else { 4326bf215546Sopenharmony_ci type = si_tex_dim(screen, tex, target, num_samples); 4327bf215546Sopenharmony_ci } 4328bf215546Sopenharmony_ci 4329bf215546Sopenharmony_ci if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 4330bf215546Sopenharmony_ci height = 1; 4331bf215546Sopenharmony_ci depth = res->array_size; 4332bf215546Sopenharmony_ci } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 4333bf215546Sopenharmony_ci if (sampler || res->target != PIPE_TEXTURE_3D) 4334bf215546Sopenharmony_ci depth = res->array_size; 4335bf215546Sopenharmony_ci } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 4336bf215546Sopenharmony_ci depth = res->array_size / 6; 4337bf215546Sopenharmony_ci 4338bf215546Sopenharmony_ci state[0] = 0; 4339bf215546Sopenharmony_ci state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format)); 4340bf215546Sopenharmony_ci state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4)); 4341bf215546Sopenharmony_ci state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 4342bf215546Sopenharmony_ci S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 4343bf215546Sopenharmony_ci S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 4344bf215546Sopenharmony_ci S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 4345bf215546Sopenharmony_ci S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) | 4346bf215546Sopenharmony_ci S_008F1C_LAST_LEVEL(num_samples > 1 ? util_logbase2(num_samples) : last_level) | 4347bf215546Sopenharmony_ci S_008F1C_TYPE(type)); 4348bf215546Sopenharmony_ci state[4] = 0; 4349bf215546Sopenharmony_ci state[5] = S_008F24_BASE_ARRAY(first_layer); 4350bf215546Sopenharmony_ci state[6] = 0; 4351bf215546Sopenharmony_ci state[7] = 0; 4352bf215546Sopenharmony_ci 4353bf215546Sopenharmony_ci if (screen->info.gfx_level == GFX9) { 4354bf215546Sopenharmony_ci unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle); 4355bf215546Sopenharmony_ci 4356bf215546Sopenharmony_ci /* Depth is the the last accessible layer on Gfx9. 4357bf215546Sopenharmony_ci * The hw doesn't need to know the total number of layers. 4358bf215546Sopenharmony_ci */ 4359bf215546Sopenharmony_ci if (type == V_008F1C_SQ_RSRC_IMG_3D) 4360bf215546Sopenharmony_ci state[4] |= S_008F20_DEPTH(depth - 1); 4361bf215546Sopenharmony_ci else 4362bf215546Sopenharmony_ci state[4] |= S_008F20_DEPTH(last_layer); 4363bf215546Sopenharmony_ci 4364bf215546Sopenharmony_ci state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); 4365bf215546Sopenharmony_ci state[5] |= S_008F24_MAX_MIP(num_samples > 1 ? util_logbase2(num_samples) 4366bf215546Sopenharmony_ci : tex->buffer.b.b.last_level); 4367bf215546Sopenharmony_ci } else { 4368bf215546Sopenharmony_ci state[3] |= S_008F1C_POW2_PAD(res->last_level > 0); 4369bf215546Sopenharmony_ci state[4] |= S_008F20_DEPTH(depth - 1); 4370bf215546Sopenharmony_ci state[5] |= S_008F24_LAST_ARRAY(last_layer); 4371bf215546Sopenharmony_ci } 4372bf215546Sopenharmony_ci 4373bf215546Sopenharmony_ci if (vi_dcc_enabled(tex, first_level)) { 4374bf215546Sopenharmony_ci state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); 4375bf215546Sopenharmony_ci } else { 4376bf215546Sopenharmony_ci /* The last dword is unused by hw. The shader uses it to clear 4377bf215546Sopenharmony_ci * bits in the first dword of sampler state. 4378bf215546Sopenharmony_ci */ 4379bf215546Sopenharmony_ci if (screen->info.gfx_level <= GFX7 && res->nr_samples <= 1) { 4380bf215546Sopenharmony_ci if (first_level == last_level) 4381bf215546Sopenharmony_ci state[7] = C_008F30_MAX_ANISO_RATIO; 4382bf215546Sopenharmony_ci else 4383bf215546Sopenharmony_ci state[7] = 0xffffffff; 4384bf215546Sopenharmony_ci } 4385bf215546Sopenharmony_ci } 4386bf215546Sopenharmony_ci 4387bf215546Sopenharmony_ci /* Initialize the sampler view for FMASK. */ 4388bf215546Sopenharmony_ci if (tex->surface.fmask_offset) { 4389bf215546Sopenharmony_ci uint32_t data_format, num_format; 4390bf215546Sopenharmony_ci 4391bf215546Sopenharmony_ci va = tex->buffer.gpu_address + tex->surface.fmask_offset; 4392bf215546Sopenharmony_ci 4393bf215546Sopenharmony_ci#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) 4394bf215546Sopenharmony_ci if (screen->info.gfx_level == GFX9) { 4395bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK; 4396bf215546Sopenharmony_ci switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 4397bf215546Sopenharmony_ci case FMASK(2, 1): 4398bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1; 4399bf215546Sopenharmony_ci break; 4400bf215546Sopenharmony_ci case FMASK(2, 2): 4401bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2; 4402bf215546Sopenharmony_ci break; 4403bf215546Sopenharmony_ci case FMASK(4, 1): 4404bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1; 4405bf215546Sopenharmony_ci break; 4406bf215546Sopenharmony_ci case FMASK(4, 2): 4407bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2; 4408bf215546Sopenharmony_ci break; 4409bf215546Sopenharmony_ci case FMASK(4, 4): 4410bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4; 4411bf215546Sopenharmony_ci break; 4412bf215546Sopenharmony_ci case FMASK(8, 1): 4413bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1; 4414bf215546Sopenharmony_ci break; 4415bf215546Sopenharmony_ci case FMASK(8, 2): 4416bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2; 4417bf215546Sopenharmony_ci break; 4418bf215546Sopenharmony_ci case FMASK(8, 4): 4419bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4; 4420bf215546Sopenharmony_ci break; 4421bf215546Sopenharmony_ci case FMASK(8, 8): 4422bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8; 4423bf215546Sopenharmony_ci break; 4424bf215546Sopenharmony_ci case FMASK(16, 1): 4425bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1; 4426bf215546Sopenharmony_ci break; 4427bf215546Sopenharmony_ci case FMASK(16, 2): 4428bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2; 4429bf215546Sopenharmony_ci break; 4430bf215546Sopenharmony_ci case FMASK(16, 4): 4431bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4; 4432bf215546Sopenharmony_ci break; 4433bf215546Sopenharmony_ci case FMASK(16, 8): 4434bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8; 4435bf215546Sopenharmony_ci break; 4436bf215546Sopenharmony_ci default: 4437bf215546Sopenharmony_ci unreachable("invalid nr_samples"); 4438bf215546Sopenharmony_ci } 4439bf215546Sopenharmony_ci } else { 4440bf215546Sopenharmony_ci switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 4441bf215546Sopenharmony_ci case FMASK(2, 1): 4442bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1; 4443bf215546Sopenharmony_ci break; 4444bf215546Sopenharmony_ci case FMASK(2, 2): 4445bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 4446bf215546Sopenharmony_ci break; 4447bf215546Sopenharmony_ci case FMASK(4, 1): 4448bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1; 4449bf215546Sopenharmony_ci break; 4450bf215546Sopenharmony_ci case FMASK(4, 2): 4451bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2; 4452bf215546Sopenharmony_ci break; 4453bf215546Sopenharmony_ci case FMASK(4, 4): 4454bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 4455bf215546Sopenharmony_ci break; 4456bf215546Sopenharmony_ci case FMASK(8, 1): 4457bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1; 4458bf215546Sopenharmony_ci break; 4459bf215546Sopenharmony_ci case FMASK(8, 2): 4460bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2; 4461bf215546Sopenharmony_ci break; 4462bf215546Sopenharmony_ci case FMASK(8, 4): 4463bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4; 4464bf215546Sopenharmony_ci break; 4465bf215546Sopenharmony_ci case FMASK(8, 8): 4466bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 4467bf215546Sopenharmony_ci break; 4468bf215546Sopenharmony_ci case FMASK(16, 1): 4469bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1; 4470bf215546Sopenharmony_ci break; 4471bf215546Sopenharmony_ci case FMASK(16, 2): 4472bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2; 4473bf215546Sopenharmony_ci break; 4474bf215546Sopenharmony_ci case FMASK(16, 4): 4475bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4; 4476bf215546Sopenharmony_ci break; 4477bf215546Sopenharmony_ci case FMASK(16, 8): 4478bf215546Sopenharmony_ci data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8; 4479bf215546Sopenharmony_ci break; 4480bf215546Sopenharmony_ci default: 4481bf215546Sopenharmony_ci unreachable("invalid nr_samples"); 4482bf215546Sopenharmony_ci } 4483bf215546Sopenharmony_ci num_format = V_008F14_IMG_NUM_FORMAT_UINT; 4484bf215546Sopenharmony_ci } 4485bf215546Sopenharmony_ci#undef FMASK 4486bf215546Sopenharmony_ci 4487bf215546Sopenharmony_ci fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle; 4488bf215546Sopenharmony_ci fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(data_format) | 4489bf215546Sopenharmony_ci S_008F14_NUM_FORMAT(num_format); 4490bf215546Sopenharmony_ci fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1); 4491bf215546Sopenharmony_ci fmask_state[3] = 4492bf215546Sopenharmony_ci S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 4493bf215546Sopenharmony_ci S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 4494bf215546Sopenharmony_ci S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0)); 4495bf215546Sopenharmony_ci fmask_state[4] = 0; 4496bf215546Sopenharmony_ci fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); 4497bf215546Sopenharmony_ci fmask_state[6] = 0; 4498bf215546Sopenharmony_ci fmask_state[7] = 0; 4499bf215546Sopenharmony_ci 4500bf215546Sopenharmony_ci if (screen->info.gfx_level == GFX9) { 4501bf215546Sopenharmony_ci fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode); 4502bf215546Sopenharmony_ci fmask_state[4] |= 4503bf215546Sopenharmony_ci S_008F20_DEPTH(last_layer) | S_008F20_PITCH(tex->surface.u.gfx9.color.fmask_epitch); 4504bf215546Sopenharmony_ci fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | 4505bf215546Sopenharmony_ci S_008F24_META_RB_ALIGNED(1); 4506bf215546Sopenharmony_ci } else { 4507bf215546Sopenharmony_ci fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.color.fmask.tiling_index); 4508bf215546Sopenharmony_ci fmask_state[4] |= S_008F20_DEPTH(depth - 1) | 4509bf215546Sopenharmony_ci S_008F20_PITCH(tex->surface.u.legacy.color.fmask.pitch_in_pixels - 1); 4510bf215546Sopenharmony_ci fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); 4511bf215546Sopenharmony_ci } 4512bf215546Sopenharmony_ci } 4513bf215546Sopenharmony_ci} 4514bf215546Sopenharmony_ci 4515bf215546Sopenharmony_ci/** 4516bf215546Sopenharmony_ci * Create a sampler view. 4517bf215546Sopenharmony_ci * 4518bf215546Sopenharmony_ci * @param ctx context 4519bf215546Sopenharmony_ci * @param texture texture 4520bf215546Sopenharmony_ci * @param state sampler view template 4521bf215546Sopenharmony_ci */ 4522bf215546Sopenharmony_cistatic struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx, 4523bf215546Sopenharmony_ci struct pipe_resource *texture, 4524bf215546Sopenharmony_ci const struct pipe_sampler_view *state) 4525bf215546Sopenharmony_ci{ 4526bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 4527bf215546Sopenharmony_ci struct si_sampler_view *view = CALLOC_STRUCT_CL(si_sampler_view); 4528bf215546Sopenharmony_ci struct si_texture *tex = (struct si_texture *)texture; 4529bf215546Sopenharmony_ci unsigned char state_swizzle[4]; 4530bf215546Sopenharmony_ci unsigned last_layer = state->u.tex.last_layer; 4531bf215546Sopenharmony_ci enum pipe_format pipe_format; 4532bf215546Sopenharmony_ci const struct legacy_surf_level *surflevel; 4533bf215546Sopenharmony_ci 4534bf215546Sopenharmony_ci if (!view) 4535bf215546Sopenharmony_ci return NULL; 4536bf215546Sopenharmony_ci 4537bf215546Sopenharmony_ci /* initialize base object */ 4538bf215546Sopenharmony_ci view->base = *state; 4539bf215546Sopenharmony_ci view->base.texture = NULL; 4540bf215546Sopenharmony_ci view->base.reference.count = 1; 4541bf215546Sopenharmony_ci view->base.context = ctx; 4542bf215546Sopenharmony_ci 4543bf215546Sopenharmony_ci assert(texture); 4544bf215546Sopenharmony_ci pipe_resource_reference(&view->base.texture, texture); 4545bf215546Sopenharmony_ci 4546bf215546Sopenharmony_ci if (state->format == PIPE_FORMAT_X24S8_UINT || state->format == PIPE_FORMAT_S8X24_UINT || 4547bf215546Sopenharmony_ci state->format == PIPE_FORMAT_X32_S8X24_UINT || state->format == PIPE_FORMAT_S8_UINT) 4548bf215546Sopenharmony_ci view->is_stencil_sampler = true; 4549bf215546Sopenharmony_ci 4550bf215546Sopenharmony_ci /* Buffer resource. */ 4551bf215546Sopenharmony_ci if (texture->target == PIPE_BUFFER) { 4552bf215546Sopenharmony_ci uint32_t elements = si_clamp_texture_texel_count(sctx->screen->max_texel_buffer_elements, 4553bf215546Sopenharmony_ci state->format, state->u.buf.size); 4554bf215546Sopenharmony_ci 4555bf215546Sopenharmony_ci si_make_buffer_descriptor(sctx->screen, si_resource(texture), state->format, 4556bf215546Sopenharmony_ci state->u.buf.offset, elements, view->state); 4557bf215546Sopenharmony_ci return &view->base; 4558bf215546Sopenharmony_ci } 4559bf215546Sopenharmony_ci 4560bf215546Sopenharmony_ci state_swizzle[0] = state->swizzle_r; 4561bf215546Sopenharmony_ci state_swizzle[1] = state->swizzle_g; 4562bf215546Sopenharmony_ci state_swizzle[2] = state->swizzle_b; 4563bf215546Sopenharmony_ci state_swizzle[3] = state->swizzle_a; 4564bf215546Sopenharmony_ci 4565bf215546Sopenharmony_ci /* This is not needed if gallium frontends set last_layer correctly. */ 4566bf215546Sopenharmony_ci if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D || 4567bf215546Sopenharmony_ci state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE) 4568bf215546Sopenharmony_ci last_layer = state->u.tex.first_layer; 4569bf215546Sopenharmony_ci 4570bf215546Sopenharmony_ci /* Texturing with separate depth and stencil. */ 4571bf215546Sopenharmony_ci pipe_format = state->format; 4572bf215546Sopenharmony_ci 4573bf215546Sopenharmony_ci /* Depth/stencil texturing sometimes needs separate texture. */ 4574bf215546Sopenharmony_ci if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) { 4575bf215546Sopenharmony_ci if (!tex->flushed_depth_texture && !si_init_flushed_depth_texture(ctx, texture)) { 4576bf215546Sopenharmony_ci pipe_resource_reference(&view->base.texture, NULL); 4577bf215546Sopenharmony_ci FREE(view); 4578bf215546Sopenharmony_ci return NULL; 4579bf215546Sopenharmony_ci } 4580bf215546Sopenharmony_ci 4581bf215546Sopenharmony_ci assert(tex->flushed_depth_texture); 4582bf215546Sopenharmony_ci 4583bf215546Sopenharmony_ci /* Override format for the case where the flushed texture 4584bf215546Sopenharmony_ci * contains only Z or only S. 4585bf215546Sopenharmony_ci */ 4586bf215546Sopenharmony_ci if (tex->flushed_depth_texture->buffer.b.b.format != tex->buffer.b.b.format) 4587bf215546Sopenharmony_ci pipe_format = tex->flushed_depth_texture->buffer.b.b.format; 4588bf215546Sopenharmony_ci 4589bf215546Sopenharmony_ci tex = tex->flushed_depth_texture; 4590bf215546Sopenharmony_ci } 4591bf215546Sopenharmony_ci 4592bf215546Sopenharmony_ci surflevel = tex->surface.u.legacy.level; 4593bf215546Sopenharmony_ci 4594bf215546Sopenharmony_ci if (tex->db_compatible) { 4595bf215546Sopenharmony_ci if (!view->is_stencil_sampler) 4596bf215546Sopenharmony_ci pipe_format = tex->db_render_format; 4597bf215546Sopenharmony_ci 4598bf215546Sopenharmony_ci switch (pipe_format) { 4599bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 4600bf215546Sopenharmony_ci pipe_format = PIPE_FORMAT_Z32_FLOAT; 4601bf215546Sopenharmony_ci break; 4602bf215546Sopenharmony_ci case PIPE_FORMAT_X8Z24_UNORM: 4603bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4604bf215546Sopenharmony_ci /* Z24 is always stored like this for DB 4605bf215546Sopenharmony_ci * compatibility. 4606bf215546Sopenharmony_ci */ 4607bf215546Sopenharmony_ci pipe_format = PIPE_FORMAT_Z24X8_UNORM; 4608bf215546Sopenharmony_ci break; 4609bf215546Sopenharmony_ci case PIPE_FORMAT_X24S8_UINT: 4610bf215546Sopenharmony_ci case PIPE_FORMAT_S8X24_UINT: 4611bf215546Sopenharmony_ci case PIPE_FORMAT_X32_S8X24_UINT: 4612bf215546Sopenharmony_ci pipe_format = PIPE_FORMAT_S8_UINT; 4613bf215546Sopenharmony_ci surflevel = tex->surface.u.legacy.zs.stencil_level; 4614bf215546Sopenharmony_ci break; 4615bf215546Sopenharmony_ci default:; 4616bf215546Sopenharmony_ci } 4617bf215546Sopenharmony_ci } 4618bf215546Sopenharmony_ci 4619bf215546Sopenharmony_ci view->dcc_incompatible = 4620bf215546Sopenharmony_ci vi_dcc_formats_are_incompatible(texture, state->u.tex.first_level, state->format); 4621bf215546Sopenharmony_ci 4622bf215546Sopenharmony_ci sctx->screen->make_texture_descriptor( 4623bf215546Sopenharmony_ci sctx->screen, tex, true, state->target, pipe_format, state_swizzle, 4624bf215546Sopenharmony_ci state->u.tex.first_level, state->u.tex.last_level, 4625bf215546Sopenharmony_ci state->u.tex.first_layer, last_layer, texture->width0, texture->height0, texture->depth0, 4626bf215546Sopenharmony_ci view->state, view->fmask_state); 4627bf215546Sopenharmony_ci 4628bf215546Sopenharmony_ci view->base_level_info = &surflevel[0]; 4629bf215546Sopenharmony_ci view->block_width = util_format_get_blockwidth(pipe_format); 4630bf215546Sopenharmony_ci return &view->base; 4631bf215546Sopenharmony_ci} 4632bf215546Sopenharmony_ci 4633bf215546Sopenharmony_cistatic void si_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state) 4634bf215546Sopenharmony_ci{ 4635bf215546Sopenharmony_ci struct si_sampler_view *view = (struct si_sampler_view *)state; 4636bf215546Sopenharmony_ci 4637bf215546Sopenharmony_ci pipe_resource_reference(&state->texture, NULL); 4638bf215546Sopenharmony_ci FREE_CL(view); 4639bf215546Sopenharmony_ci} 4640bf215546Sopenharmony_ci 4641bf215546Sopenharmony_cistatic bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 4642bf215546Sopenharmony_ci{ 4643bf215546Sopenharmony_ci return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 4644bf215546Sopenharmony_ci (linear_filter && (wrap == PIPE_TEX_WRAP_CLAMP || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 4645bf215546Sopenharmony_ci} 4646bf215546Sopenharmony_ci 4647bf215546Sopenharmony_cistatic uint32_t si_translate_border_color(struct si_context *sctx, 4648bf215546Sopenharmony_ci const struct pipe_sampler_state *state, 4649bf215546Sopenharmony_ci const union pipe_color_union *color, bool is_integer) 4650bf215546Sopenharmony_ci{ 4651bf215546Sopenharmony_ci bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 4652bf215546Sopenharmony_ci state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 4653bf215546Sopenharmony_ci 4654bf215546Sopenharmony_ci if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) && 4655bf215546Sopenharmony_ci !wrap_mode_uses_border_color(state->wrap_t, linear_filter) && 4656bf215546Sopenharmony_ci !wrap_mode_uses_border_color(state->wrap_r, linear_filter)) 4657bf215546Sopenharmony_ci return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4658bf215546Sopenharmony_ci 4659bf215546Sopenharmony_ci#define simple_border_types(elt) \ 4660bf215546Sopenharmony_ci do { \ 4661bf215546Sopenharmony_ci if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 0) \ 4662bf215546Sopenharmony_ci return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \ 4663bf215546Sopenharmony_ci if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 1) \ 4664bf215546Sopenharmony_ci return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \ 4665bf215546Sopenharmony_ci if (color->elt[0] == 1 && color->elt[1] == 1 && color->elt[2] == 1 && color->elt[3] == 1) \ 4666bf215546Sopenharmony_ci return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \ 4667bf215546Sopenharmony_ci } while (false) 4668bf215546Sopenharmony_ci 4669bf215546Sopenharmony_ci if (is_integer) 4670bf215546Sopenharmony_ci simple_border_types(ui); 4671bf215546Sopenharmony_ci else 4672bf215546Sopenharmony_ci simple_border_types(f); 4673bf215546Sopenharmony_ci 4674bf215546Sopenharmony_ci#undef simple_border_types 4675bf215546Sopenharmony_ci 4676bf215546Sopenharmony_ci int i; 4677bf215546Sopenharmony_ci 4678bf215546Sopenharmony_ci /* Check if the border has been uploaded already. */ 4679bf215546Sopenharmony_ci for (i = 0; i < sctx->border_color_count; i++) 4680bf215546Sopenharmony_ci if (memcmp(&sctx->border_color_table[i], color, sizeof(*color)) == 0) 4681bf215546Sopenharmony_ci break; 4682bf215546Sopenharmony_ci 4683bf215546Sopenharmony_ci if (i >= SI_MAX_BORDER_COLORS) { 4684bf215546Sopenharmony_ci /* Getting 4096 unique border colors is very unlikely. */ 4685bf215546Sopenharmony_ci static bool printed; 4686bf215546Sopenharmony_ci if (!printed) { 4687bf215546Sopenharmony_ci fprintf(stderr, "radeonsi: The border color table is full. " 4688bf215546Sopenharmony_ci "Any new border colors will be just black. " 4689bf215546Sopenharmony_ci "This is a hardware limitation.\n"); 4690bf215546Sopenharmony_ci printed = true; 4691bf215546Sopenharmony_ci } 4692bf215546Sopenharmony_ci return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4693bf215546Sopenharmony_ci } 4694bf215546Sopenharmony_ci 4695bf215546Sopenharmony_ci if (i == sctx->border_color_count) { 4696bf215546Sopenharmony_ci /* Upload a new border color. */ 4697bf215546Sopenharmony_ci memcpy(&sctx->border_color_table[i], color, sizeof(*color)); 4698bf215546Sopenharmony_ci util_memcpy_cpu_to_le32(&sctx->border_color_map[i], color, sizeof(*color)); 4699bf215546Sopenharmony_ci sctx->border_color_count++; 4700bf215546Sopenharmony_ci } 4701bf215546Sopenharmony_ci 4702bf215546Sopenharmony_ci return (sctx->screen->info.gfx_level >= GFX11 ? S_008F3C_BORDER_COLOR_PTR_GFX11(i): 4703bf215546Sopenharmony_ci S_008F3C_BORDER_COLOR_PTR_GFX6(i)) | 4704bf215546Sopenharmony_ci S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER); 4705bf215546Sopenharmony_ci} 4706bf215546Sopenharmony_ci 4707bf215546Sopenharmony_cistatic inline int S_FIXED(float value, unsigned frac_bits) 4708bf215546Sopenharmony_ci{ 4709bf215546Sopenharmony_ci return value * (1 << frac_bits); 4710bf215546Sopenharmony_ci} 4711bf215546Sopenharmony_ci 4712bf215546Sopenharmony_cistatic inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso) 4713bf215546Sopenharmony_ci{ 4714bf215546Sopenharmony_ci if (filter == PIPE_TEX_FILTER_LINEAR) 4715bf215546Sopenharmony_ci return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR 4716bf215546Sopenharmony_ci : V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 4717bf215546Sopenharmony_ci else 4718bf215546Sopenharmony_ci return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT 4719bf215546Sopenharmony_ci : V_008F38_SQ_TEX_XY_FILTER_POINT; 4720bf215546Sopenharmony_ci} 4721bf215546Sopenharmony_ci 4722bf215546Sopenharmony_cistatic inline unsigned si_tex_aniso_filter(unsigned filter) 4723bf215546Sopenharmony_ci{ 4724bf215546Sopenharmony_ci if (filter < 2) 4725bf215546Sopenharmony_ci return 0; 4726bf215546Sopenharmony_ci if (filter < 4) 4727bf215546Sopenharmony_ci return 1; 4728bf215546Sopenharmony_ci if (filter < 8) 4729bf215546Sopenharmony_ci return 2; 4730bf215546Sopenharmony_ci if (filter < 16) 4731bf215546Sopenharmony_ci return 3; 4732bf215546Sopenharmony_ci return 4; 4733bf215546Sopenharmony_ci} 4734bf215546Sopenharmony_ci 4735bf215546Sopenharmony_cistatic void *si_create_sampler_state(struct pipe_context *ctx, 4736bf215546Sopenharmony_ci const struct pipe_sampler_state *state) 4737bf215546Sopenharmony_ci{ 4738bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 4739bf215546Sopenharmony_ci struct si_screen *sscreen = sctx->screen; 4740bf215546Sopenharmony_ci struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 4741bf215546Sopenharmony_ci unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy; 4742bf215546Sopenharmony_ci unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso); 4743bf215546Sopenharmony_ci bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST && 4744bf215546Sopenharmony_ci state->mag_img_filter == PIPE_TEX_FILTER_NEAREST && 4745bf215546Sopenharmony_ci state->compare_mode == PIPE_TEX_COMPARE_NONE; 4746bf215546Sopenharmony_ci union pipe_color_union clamped_border_color; 4747bf215546Sopenharmony_ci 4748bf215546Sopenharmony_ci if (!rstate) { 4749bf215546Sopenharmony_ci return NULL; 4750bf215546Sopenharmony_ci } 4751bf215546Sopenharmony_ci 4752bf215546Sopenharmony_ci /* Validate inputs. */ 4753bf215546Sopenharmony_ci if (!is_wrap_mode_legal(sscreen, state->wrap_s) || 4754bf215546Sopenharmony_ci !is_wrap_mode_legal(sscreen, state->wrap_t) || 4755bf215546Sopenharmony_ci !is_wrap_mode_legal(sscreen, state->wrap_r) || 4756bf215546Sopenharmony_ci (!sscreen->info.has_3d_cube_border_color_mipmap && 4757bf215546Sopenharmony_ci (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE || 4758bf215546Sopenharmony_ci state->max_anisotropy > 0))) { 4759bf215546Sopenharmony_ci assert(0); 4760bf215546Sopenharmony_ci return NULL; 4761bf215546Sopenharmony_ci } 4762bf215546Sopenharmony_ci 4763bf215546Sopenharmony_ci#ifndef NDEBUG 4764bf215546Sopenharmony_ci rstate->magic = SI_SAMPLER_STATE_MAGIC; 4765bf215546Sopenharmony_ci#endif 4766bf215546Sopenharmony_ci rstate->val[0] = 4767bf215546Sopenharmony_ci (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 4768bf215546Sopenharmony_ci S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 4769bf215546Sopenharmony_ci S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 4770bf215546Sopenharmony_ci S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 4771bf215546Sopenharmony_ci S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | 4772bf215546Sopenharmony_ci S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 4773bf215546Sopenharmony_ci S_008F30_TRUNC_COORD(trunc_coord) | 4774bf215546Sopenharmony_ci S_008F30_COMPAT_MODE(sctx->gfx_level == GFX8 || sctx->gfx_level == GFX9)); 4775bf215546Sopenharmony_ci rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 4776bf215546Sopenharmony_ci S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | 4777bf215546Sopenharmony_ci S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 4778bf215546Sopenharmony_ci rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 4779bf215546Sopenharmony_ci S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) | 4780bf215546Sopenharmony_ci S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) | 4781bf215546Sopenharmony_ci S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); 4782bf215546Sopenharmony_ci rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, 4783bf215546Sopenharmony_ci state->border_color_is_integer); 4784bf215546Sopenharmony_ci 4785bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX10) { 4786bf215546Sopenharmony_ci rstate->val[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1); 4787bf215546Sopenharmony_ci } else { 4788bf215546Sopenharmony_ci rstate->val[2] |= S_008F38_DISABLE_LSB_CEIL(sctx->gfx_level <= GFX8) | 4789bf215546Sopenharmony_ci S_008F38_FILTER_PREC_FIX(1) | 4790bf215546Sopenharmony_ci S_008F38_ANISO_OVERRIDE_GFX8(sctx->gfx_level >= GFX8); 4791bf215546Sopenharmony_ci } 4792bf215546Sopenharmony_ci 4793bf215546Sopenharmony_ci /* Create sampler resource for upgraded depth textures. */ 4794bf215546Sopenharmony_ci memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val)); 4795bf215546Sopenharmony_ci 4796bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; ++i) { 4797bf215546Sopenharmony_ci /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE 4798bf215546Sopenharmony_ci * when the border color is 1.0. */ 4799bf215546Sopenharmony_ci clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1); 4800bf215546Sopenharmony_ci } 4801bf215546Sopenharmony_ci 4802bf215546Sopenharmony_ci if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0) { 4803bf215546Sopenharmony_ci if (sscreen->info.gfx_level <= GFX9) 4804bf215546Sopenharmony_ci rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1); 4805bf215546Sopenharmony_ci } else { 4806bf215546Sopenharmony_ci rstate->upgraded_depth_val[3] = 4807bf215546Sopenharmony_ci si_translate_border_color(sctx, state, &clamped_border_color, false); 4808bf215546Sopenharmony_ci } 4809bf215546Sopenharmony_ci 4810bf215546Sopenharmony_ci return rstate; 4811bf215546Sopenharmony_ci} 4812bf215546Sopenharmony_ci 4813bf215546Sopenharmony_cistatic void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 4814bf215546Sopenharmony_ci{ 4815bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 4816bf215546Sopenharmony_ci 4817bf215546Sopenharmony_ci if (sctx->sample_mask == (uint16_t)sample_mask) 4818bf215546Sopenharmony_ci return; 4819bf215546Sopenharmony_ci 4820bf215546Sopenharmony_ci sctx->sample_mask = sample_mask; 4821bf215546Sopenharmony_ci si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask); 4822bf215546Sopenharmony_ci} 4823bf215546Sopenharmony_ci 4824bf215546Sopenharmony_cistatic void si_emit_sample_mask(struct si_context *sctx) 4825bf215546Sopenharmony_ci{ 4826bf215546Sopenharmony_ci struct radeon_cmdbuf *cs = &sctx->gfx_cs; 4827bf215546Sopenharmony_ci unsigned mask = sctx->sample_mask; 4828bf215546Sopenharmony_ci 4829bf215546Sopenharmony_ci /* Needed for line and polygon smoothing as well as for the Polaris 4830bf215546Sopenharmony_ci * small primitive filter. We expect the gallium frontend to take care of 4831bf215546Sopenharmony_ci * this for us. 4832bf215546Sopenharmony_ci */ 4833bf215546Sopenharmony_ci assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 4834bf215546Sopenharmony_ci (mask & 1 && sctx->blitter_running)); 4835bf215546Sopenharmony_ci 4836bf215546Sopenharmony_ci radeon_begin(cs); 4837bf215546Sopenharmony_ci radeon_set_context_reg_seq(R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 4838bf215546Sopenharmony_ci radeon_emit(mask | (mask << 16)); 4839bf215546Sopenharmony_ci radeon_emit(mask | (mask << 16)); 4840bf215546Sopenharmony_ci radeon_end(); 4841bf215546Sopenharmony_ci} 4842bf215546Sopenharmony_ci 4843bf215546Sopenharmony_cistatic void si_delete_sampler_state(struct pipe_context *ctx, void *state) 4844bf215546Sopenharmony_ci{ 4845bf215546Sopenharmony_ci#ifndef NDEBUG 4846bf215546Sopenharmony_ci struct si_sampler_state *s = state; 4847bf215546Sopenharmony_ci 4848bf215546Sopenharmony_ci assert(s->magic == SI_SAMPLER_STATE_MAGIC); 4849bf215546Sopenharmony_ci s->magic = 0; 4850bf215546Sopenharmony_ci#endif 4851bf215546Sopenharmony_ci free(state); 4852bf215546Sopenharmony_ci} 4853bf215546Sopenharmony_ci 4854bf215546Sopenharmony_ci/* 4855bf215546Sopenharmony_ci * Vertex elements & buffers 4856bf215546Sopenharmony_ci */ 4857bf215546Sopenharmony_ci 4858bf215546Sopenharmony_cistruct si_fast_udiv_info32 si_compute_fast_udiv_info32(uint32_t D, unsigned num_bits) 4859bf215546Sopenharmony_ci{ 4860bf215546Sopenharmony_ci struct util_fast_udiv_info info = util_compute_fast_udiv_info(D, num_bits, 32); 4861bf215546Sopenharmony_ci 4862bf215546Sopenharmony_ci struct si_fast_udiv_info32 result = { 4863bf215546Sopenharmony_ci info.multiplier, 4864bf215546Sopenharmony_ci info.pre_shift, 4865bf215546Sopenharmony_ci info.post_shift, 4866bf215546Sopenharmony_ci info.increment, 4867bf215546Sopenharmony_ci }; 4868bf215546Sopenharmony_ci return result; 4869bf215546Sopenharmony_ci} 4870bf215546Sopenharmony_ci 4871bf215546Sopenharmony_cistatic void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count, 4872bf215546Sopenharmony_ci const struct pipe_vertex_element *elements) 4873bf215546Sopenharmony_ci{ 4874bf215546Sopenharmony_ci struct si_screen *sscreen = (struct si_screen *)ctx->screen; 4875bf215546Sopenharmony_ci struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements); 4876bf215546Sopenharmony_ci bool used[SI_NUM_VERTEX_BUFFERS] = {}; 4877bf215546Sopenharmony_ci struct si_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {}; 4878bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(struct si_fast_udiv_info32) == 16); 4879bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4); 4880bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4); 4881bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4); 4882bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4); 4883bf215546Sopenharmony_ci int i; 4884bf215546Sopenharmony_ci 4885bf215546Sopenharmony_ci assert(count <= SI_MAX_ATTRIBS); 4886bf215546Sopenharmony_ci if (!v) 4887bf215546Sopenharmony_ci return NULL; 4888bf215546Sopenharmony_ci 4889bf215546Sopenharmony_ci v->count = count; 4890bf215546Sopenharmony_ci 4891bf215546Sopenharmony_ci unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sscreen); 4892bf215546Sopenharmony_ci unsigned alloc_count = 4893bf215546Sopenharmony_ci count > num_vbos_in_user_sgprs ? count - num_vbos_in_user_sgprs : 0; 4894bf215546Sopenharmony_ci v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT); 4895bf215546Sopenharmony_ci 4896bf215546Sopenharmony_ci for (i = 0; i < count; ++i) { 4897bf215546Sopenharmony_ci const struct util_format_description *desc; 4898bf215546Sopenharmony_ci const struct util_format_channel_description *channel; 4899bf215546Sopenharmony_ci int first_non_void; 4900bf215546Sopenharmony_ci unsigned vbo_index = elements[i].vertex_buffer_index; 4901bf215546Sopenharmony_ci 4902bf215546Sopenharmony_ci if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { 4903bf215546Sopenharmony_ci FREE(v); 4904bf215546Sopenharmony_ci return NULL; 4905bf215546Sopenharmony_ci } 4906bf215546Sopenharmony_ci 4907bf215546Sopenharmony_ci unsigned instance_divisor = elements[i].instance_divisor; 4908bf215546Sopenharmony_ci if (instance_divisor) { 4909bf215546Sopenharmony_ci if (instance_divisor == 1) { 4910bf215546Sopenharmony_ci v->instance_divisor_is_one |= 1u << i; 4911bf215546Sopenharmony_ci } else { 4912bf215546Sopenharmony_ci v->instance_divisor_is_fetched |= 1u << i; 4913bf215546Sopenharmony_ci divisor_factors[i] = si_compute_fast_udiv_info32(instance_divisor, 32); 4914bf215546Sopenharmony_ci } 4915bf215546Sopenharmony_ci } 4916bf215546Sopenharmony_ci 4917bf215546Sopenharmony_ci if (!used[vbo_index]) { 4918bf215546Sopenharmony_ci v->first_vb_use_mask |= 1 << i; 4919bf215546Sopenharmony_ci used[vbo_index] = true; 4920bf215546Sopenharmony_ci } 4921bf215546Sopenharmony_ci 4922bf215546Sopenharmony_ci desc = util_format_description(elements[i].src_format); 4923bf215546Sopenharmony_ci first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 4924bf215546Sopenharmony_ci channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; 4925bf215546Sopenharmony_ci 4926bf215546Sopenharmony_ci v->format_size[i] = desc->block.bits / 8; 4927bf215546Sopenharmony_ci v->src_offset[i] = elements[i].src_offset; 4928bf215546Sopenharmony_ci v->vertex_buffer_index[i] = vbo_index; 4929bf215546Sopenharmony_ci 4930bf215546Sopenharmony_ci bool always_fix = false; 4931bf215546Sopenharmony_ci union si_vs_fix_fetch fix_fetch; 4932bf215546Sopenharmony_ci unsigned log_hw_load_size; /* the load element size as seen by the hardware */ 4933bf215546Sopenharmony_ci 4934bf215546Sopenharmony_ci fix_fetch.bits = 0; 4935bf215546Sopenharmony_ci log_hw_load_size = MIN2(2, util_logbase2(desc->block.bits) - 3); 4936bf215546Sopenharmony_ci 4937bf215546Sopenharmony_ci if (channel) { 4938bf215546Sopenharmony_ci switch (channel->type) { 4939bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_FLOAT: 4940bf215546Sopenharmony_ci fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT; 4941bf215546Sopenharmony_ci break; 4942bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_FIXED: 4943bf215546Sopenharmony_ci fix_fetch.u.format = AC_FETCH_FORMAT_FIXED; 4944bf215546Sopenharmony_ci break; 4945bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_SIGNED: { 4946bf215546Sopenharmony_ci if (channel->pure_integer) 4947bf215546Sopenharmony_ci fix_fetch.u.format = AC_FETCH_FORMAT_SINT; 4948bf215546Sopenharmony_ci else if (channel->normalized) 4949bf215546Sopenharmony_ci fix_fetch.u.format = AC_FETCH_FORMAT_SNORM; 4950bf215546Sopenharmony_ci else 4951bf215546Sopenharmony_ci fix_fetch.u.format = AC_FETCH_FORMAT_SSCALED; 4952bf215546Sopenharmony_ci break; 4953bf215546Sopenharmony_ci } 4954bf215546Sopenharmony_ci case UTIL_FORMAT_TYPE_UNSIGNED: { 4955bf215546Sopenharmony_ci if (channel->pure_integer) 4956bf215546Sopenharmony_ci fix_fetch.u.format = AC_FETCH_FORMAT_UINT; 4957bf215546Sopenharmony_ci else if (channel->normalized) 4958bf215546Sopenharmony_ci fix_fetch.u.format = AC_FETCH_FORMAT_UNORM; 4959bf215546Sopenharmony_ci else 4960bf215546Sopenharmony_ci fix_fetch.u.format = AC_FETCH_FORMAT_USCALED; 4961bf215546Sopenharmony_ci break; 4962bf215546Sopenharmony_ci } 4963bf215546Sopenharmony_ci default: 4964bf215546Sopenharmony_ci unreachable("bad format type"); 4965bf215546Sopenharmony_ci } 4966bf215546Sopenharmony_ci } else { 4967bf215546Sopenharmony_ci switch (elements[i].src_format) { 4968bf215546Sopenharmony_ci case PIPE_FORMAT_R11G11B10_FLOAT: 4969bf215546Sopenharmony_ci fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT; 4970bf215546Sopenharmony_ci break; 4971bf215546Sopenharmony_ci default: 4972bf215546Sopenharmony_ci unreachable("bad other format"); 4973bf215546Sopenharmony_ci } 4974bf215546Sopenharmony_ci } 4975bf215546Sopenharmony_ci 4976bf215546Sopenharmony_ci if (desc->channel[0].size == 10) { 4977bf215546Sopenharmony_ci fix_fetch.u.log_size = 3; /* special encoding for 2_10_10_10 */ 4978bf215546Sopenharmony_ci log_hw_load_size = 2; 4979bf215546Sopenharmony_ci 4980bf215546Sopenharmony_ci /* The hardware always treats the 2-bit alpha channel as 4981bf215546Sopenharmony_ci * unsigned, so a shader workaround is needed. The affected 4982bf215546Sopenharmony_ci * chips are GFX8 and older except Stoney (GFX8.1). 4983bf215546Sopenharmony_ci */ 4984bf215546Sopenharmony_ci always_fix = sscreen->info.gfx_level <= GFX8 && sscreen->info.family != CHIP_STONEY && 4985bf215546Sopenharmony_ci channel->type == UTIL_FORMAT_TYPE_SIGNED; 4986bf215546Sopenharmony_ci } else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) { 4987bf215546Sopenharmony_ci fix_fetch.u.log_size = 3; /* special encoding */ 4988bf215546Sopenharmony_ci fix_fetch.u.format = AC_FETCH_FORMAT_FIXED; 4989bf215546Sopenharmony_ci log_hw_load_size = 2; 4990bf215546Sopenharmony_ci } else { 4991bf215546Sopenharmony_ci fix_fetch.u.log_size = util_logbase2(channel->size) - 3; 4992bf215546Sopenharmony_ci fix_fetch.u.num_channels_m1 = desc->nr_channels - 1; 4993bf215546Sopenharmony_ci 4994bf215546Sopenharmony_ci /* Always fix up: 4995bf215546Sopenharmony_ci * - doubles (multiple loads + truncate to float) 4996bf215546Sopenharmony_ci * - 32-bit requiring a conversion 4997bf215546Sopenharmony_ci */ 4998bf215546Sopenharmony_ci always_fix = (fix_fetch.u.log_size == 3) || 4999bf215546Sopenharmony_ci (fix_fetch.u.log_size == 2 && fix_fetch.u.format != AC_FETCH_FORMAT_FLOAT && 5000bf215546Sopenharmony_ci fix_fetch.u.format != AC_FETCH_FORMAT_UINT && 5001bf215546Sopenharmony_ci fix_fetch.u.format != AC_FETCH_FORMAT_SINT); 5002bf215546Sopenharmony_ci 5003bf215546Sopenharmony_ci /* Also fixup 8_8_8 and 16_16_16. */ 5004bf215546Sopenharmony_ci if (desc->nr_channels == 3 && fix_fetch.u.log_size <= 1) { 5005bf215546Sopenharmony_ci always_fix = true; 5006bf215546Sopenharmony_ci log_hw_load_size = fix_fetch.u.log_size; 5007bf215546Sopenharmony_ci } 5008bf215546Sopenharmony_ci } 5009bf215546Sopenharmony_ci 5010bf215546Sopenharmony_ci if (desc->swizzle[0] != PIPE_SWIZZLE_X) { 5011bf215546Sopenharmony_ci assert(desc->swizzle[0] == PIPE_SWIZZLE_Z && 5012bf215546Sopenharmony_ci (desc->swizzle[2] == PIPE_SWIZZLE_X || desc->swizzle[2] == PIPE_SWIZZLE_0)); 5013bf215546Sopenharmony_ci fix_fetch.u.reverse = 1; 5014bf215546Sopenharmony_ci } 5015bf215546Sopenharmony_ci 5016bf215546Sopenharmony_ci /* Force the workaround for unaligned access here already if the 5017bf215546Sopenharmony_ci * offset relative to the vertex buffer base is unaligned. 5018bf215546Sopenharmony_ci * 5019bf215546Sopenharmony_ci * There is a theoretical case in which this is too conservative: 5020bf215546Sopenharmony_ci * if the vertex buffer's offset is also unaligned in just the 5021bf215546Sopenharmony_ci * right way, we end up with an aligned address after all. 5022bf215546Sopenharmony_ci * However, this case should be extremely rare in practice (it 5023bf215546Sopenharmony_ci * won't happen in well-behaved applications), and taking it 5024bf215546Sopenharmony_ci * into account would complicate the fast path (where everything 5025bf215546Sopenharmony_ci * is nicely aligned). 5026bf215546Sopenharmony_ci */ 5027bf215546Sopenharmony_ci bool check_alignment = 5028bf215546Sopenharmony_ci log_hw_load_size >= 1 && 5029bf215546Sopenharmony_ci (sscreen->info.gfx_level == GFX6 || sscreen->info.gfx_level >= GFX10); 5030bf215546Sopenharmony_ci bool opencode = sscreen->options.vs_fetch_always_opencode; 5031bf215546Sopenharmony_ci 5032bf215546Sopenharmony_ci if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0) 5033bf215546Sopenharmony_ci opencode = true; 5034bf215546Sopenharmony_ci 5035bf215546Sopenharmony_ci if (always_fix || check_alignment || opencode) 5036bf215546Sopenharmony_ci v->fix_fetch[i] = fix_fetch.bits; 5037bf215546Sopenharmony_ci 5038bf215546Sopenharmony_ci if (opencode) 5039bf215546Sopenharmony_ci v->fix_fetch_opencode |= 1 << i; 5040bf215546Sopenharmony_ci if (opencode || always_fix) 5041bf215546Sopenharmony_ci v->fix_fetch_always |= 1 << i; 5042bf215546Sopenharmony_ci 5043bf215546Sopenharmony_ci if (check_alignment && !opencode) { 5044bf215546Sopenharmony_ci assert(log_hw_load_size == 1 || log_hw_load_size == 2); 5045bf215546Sopenharmony_ci 5046bf215546Sopenharmony_ci v->fix_fetch_unaligned |= 1 << i; 5047bf215546Sopenharmony_ci v->hw_load_is_dword |= (log_hw_load_size - 1) << i; 5048bf215546Sopenharmony_ci v->vb_alignment_check_mask |= 1 << vbo_index; 5049bf215546Sopenharmony_ci } 5050bf215546Sopenharmony_ci 5051bf215546Sopenharmony_ci v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 5052bf215546Sopenharmony_ci S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 5053bf215546Sopenharmony_ci S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 5054bf215546Sopenharmony_ci S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])); 5055bf215546Sopenharmony_ci 5056bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX10) { 5057bf215546Sopenharmony_ci const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&sscreen->info)[elements[i].src_format]; 5058bf215546Sopenharmony_ci assert(fmt->img_format != 0 && fmt->img_format < 128); 5059bf215546Sopenharmony_ci v->rsrc_word3[i] |= S_008F0C_FORMAT(fmt->img_format) | 5060bf215546Sopenharmony_ci S_008F0C_RESOURCE_LEVEL(sscreen->info.gfx_level < GFX11); 5061bf215546Sopenharmony_ci } else { 5062bf215546Sopenharmony_ci unsigned data_format, num_format; 5063bf215546Sopenharmony_ci data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 5064bf215546Sopenharmony_ci num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 5065bf215546Sopenharmony_ci v->rsrc_word3[i] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); 5066bf215546Sopenharmony_ci } 5067bf215546Sopenharmony_ci } 5068bf215546Sopenharmony_ci 5069bf215546Sopenharmony_ci if (v->instance_divisor_is_fetched) { 5070bf215546Sopenharmony_ci unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched); 5071bf215546Sopenharmony_ci 5072bf215546Sopenharmony_ci v->instance_divisor_factor_buffer = (struct si_resource *)pipe_buffer_create( 5073bf215546Sopenharmony_ci &sscreen->b, 0, PIPE_USAGE_DEFAULT, num_divisors * sizeof(divisor_factors[0])); 5074bf215546Sopenharmony_ci if (!v->instance_divisor_factor_buffer) { 5075bf215546Sopenharmony_ci FREE(v); 5076bf215546Sopenharmony_ci return NULL; 5077bf215546Sopenharmony_ci } 5078bf215546Sopenharmony_ci void *map = 5079bf215546Sopenharmony_ci sscreen->ws->buffer_map(sscreen->ws, v->instance_divisor_factor_buffer->buf, NULL, PIPE_MAP_WRITE); 5080bf215546Sopenharmony_ci memcpy(map, divisor_factors, num_divisors * sizeof(divisor_factors[0])); 5081bf215546Sopenharmony_ci } 5082bf215546Sopenharmony_ci return v; 5083bf215546Sopenharmony_ci} 5084bf215546Sopenharmony_ci 5085bf215546Sopenharmony_cistatic void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 5086bf215546Sopenharmony_ci{ 5087bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 5088bf215546Sopenharmony_ci struct si_vertex_elements *old = sctx->vertex_elements; 5089bf215546Sopenharmony_ci struct si_vertex_elements *v = (struct si_vertex_elements *)state; 5090bf215546Sopenharmony_ci 5091bf215546Sopenharmony_ci if (!v) 5092bf215546Sopenharmony_ci v = sctx->no_velems_state; 5093bf215546Sopenharmony_ci 5094bf215546Sopenharmony_ci sctx->vertex_elements = v; 5095bf215546Sopenharmony_ci sctx->num_vertex_elements = v->count; 5096bf215546Sopenharmony_ci 5097bf215546Sopenharmony_ci if (sctx->num_vertex_elements) { 5098bf215546Sopenharmony_ci sctx->vertex_buffers_dirty = true; 5099bf215546Sopenharmony_ci } else { 5100bf215546Sopenharmony_ci sctx->vertex_buffers_dirty = false; 5101bf215546Sopenharmony_ci sctx->vertex_buffer_pointer_dirty = false; 5102bf215546Sopenharmony_ci sctx->vertex_buffer_user_sgprs_dirty = false; 5103bf215546Sopenharmony_ci } 5104bf215546Sopenharmony_ci 5105bf215546Sopenharmony_ci if (old->instance_divisor_is_one != v->instance_divisor_is_one || 5106bf215546Sopenharmony_ci old->instance_divisor_is_fetched != v->instance_divisor_is_fetched || 5107bf215546Sopenharmony_ci (old->vb_alignment_check_mask ^ v->vb_alignment_check_mask) & 5108bf215546Sopenharmony_ci sctx->vertex_buffer_unaligned || 5109bf215546Sopenharmony_ci ((v->vb_alignment_check_mask & sctx->vertex_buffer_unaligned) && 5110bf215546Sopenharmony_ci memcmp(old->vertex_buffer_index, v->vertex_buffer_index, 5111bf215546Sopenharmony_ci sizeof(v->vertex_buffer_index[0]) * MAX2(old->count, v->count))) || 5112bf215546Sopenharmony_ci /* fix_fetch_{always,opencode,unaligned} and hw_load_is_dword are 5113bf215546Sopenharmony_ci * functions of fix_fetch and the src_offset alignment. 5114bf215546Sopenharmony_ci * If they change and fix_fetch doesn't, it must be due to different 5115bf215546Sopenharmony_ci * src_offset alignment, which is reflected in fix_fetch_opencode. */ 5116bf215546Sopenharmony_ci old->fix_fetch_opencode != v->fix_fetch_opencode || 5117bf215546Sopenharmony_ci memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * 5118bf215546Sopenharmony_ci MAX2(old->count, v->count))) { 5119bf215546Sopenharmony_ci si_vs_key_update_inputs(sctx); 5120bf215546Sopenharmony_ci sctx->do_update_shaders = true; 5121bf215546Sopenharmony_ci } 5122bf215546Sopenharmony_ci 5123bf215546Sopenharmony_ci if (v->instance_divisor_is_fetched) { 5124bf215546Sopenharmony_ci struct pipe_constant_buffer cb; 5125bf215546Sopenharmony_ci 5126bf215546Sopenharmony_ci cb.buffer = &v->instance_divisor_factor_buffer->b.b; 5127bf215546Sopenharmony_ci cb.user_buffer = NULL; 5128bf215546Sopenharmony_ci cb.buffer_offset = 0; 5129bf215546Sopenharmony_ci cb.buffer_size = 0xffffffff; 5130bf215546Sopenharmony_ci si_set_internal_const_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb); 5131bf215546Sopenharmony_ci } 5132bf215546Sopenharmony_ci} 5133bf215546Sopenharmony_ci 5134bf215546Sopenharmony_cistatic void si_delete_vertex_element(struct pipe_context *ctx, void *state) 5135bf215546Sopenharmony_ci{ 5136bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 5137bf215546Sopenharmony_ci struct si_vertex_elements *v = (struct si_vertex_elements *)state; 5138bf215546Sopenharmony_ci 5139bf215546Sopenharmony_ci if (sctx->vertex_elements == state) 5140bf215546Sopenharmony_ci si_bind_vertex_elements(ctx, sctx->no_velems_state); 5141bf215546Sopenharmony_ci 5142bf215546Sopenharmony_ci si_resource_reference(&v->instance_divisor_factor_buffer, NULL); 5143bf215546Sopenharmony_ci FREE(state); 5144bf215546Sopenharmony_ci} 5145bf215546Sopenharmony_ci 5146bf215546Sopenharmony_cistatic void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count, 5147bf215546Sopenharmony_ci unsigned unbind_num_trailing_slots, bool take_ownership, 5148bf215546Sopenharmony_ci const struct pipe_vertex_buffer *buffers) 5149bf215546Sopenharmony_ci{ 5150bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 5151bf215546Sopenharmony_ci struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 5152bf215546Sopenharmony_ci unsigned updated_mask = u_bit_consecutive(start_slot, count + unbind_num_trailing_slots); 5153bf215546Sopenharmony_ci uint32_t orig_unaligned = sctx->vertex_buffer_unaligned; 5154bf215546Sopenharmony_ci uint32_t unaligned = 0; 5155bf215546Sopenharmony_ci int i; 5156bf215546Sopenharmony_ci 5157bf215546Sopenharmony_ci assert(start_slot + count + unbind_num_trailing_slots <= ARRAY_SIZE(sctx->vertex_buffer)); 5158bf215546Sopenharmony_ci 5159bf215546Sopenharmony_ci if (buffers) { 5160bf215546Sopenharmony_ci if (take_ownership) { 5161bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 5162bf215546Sopenharmony_ci const struct pipe_vertex_buffer *src = buffers + i; 5163bf215546Sopenharmony_ci struct pipe_vertex_buffer *dsti = dst + i; 5164bf215546Sopenharmony_ci struct pipe_resource *buf = src->buffer.resource; 5165bf215546Sopenharmony_ci unsigned slot_bit = 1 << (start_slot + i); 5166bf215546Sopenharmony_ci 5167bf215546Sopenharmony_ci /* Only unreference bound vertex buffers. (take_ownership) */ 5168bf215546Sopenharmony_ci pipe_resource_reference(&dsti->buffer.resource, NULL); 5169bf215546Sopenharmony_ci 5170bf215546Sopenharmony_ci if (src->buffer_offset & 3 || src->stride & 3) 5171bf215546Sopenharmony_ci unaligned |= slot_bit; 5172bf215546Sopenharmony_ci 5173bf215546Sopenharmony_ci si_context_add_resource_size(sctx, buf); 5174bf215546Sopenharmony_ci if (buf) 5175bf215546Sopenharmony_ci si_resource(buf)->bind_history |= SI_BIND_VERTEX_BUFFER; 5176bf215546Sopenharmony_ci } 5177bf215546Sopenharmony_ci /* take_ownership allows us to copy pipe_resource pointers without refcounting. */ 5178bf215546Sopenharmony_ci memcpy(dst, buffers, count * sizeof(struct pipe_vertex_buffer)); 5179bf215546Sopenharmony_ci } else { 5180bf215546Sopenharmony_ci for (i = 0; i < count; i++) { 5181bf215546Sopenharmony_ci const struct pipe_vertex_buffer *src = buffers + i; 5182bf215546Sopenharmony_ci struct pipe_vertex_buffer *dsti = dst + i; 5183bf215546Sopenharmony_ci struct pipe_resource *buf = src->buffer.resource; 5184bf215546Sopenharmony_ci unsigned slot_bit = 1 << (start_slot + i); 5185bf215546Sopenharmony_ci 5186bf215546Sopenharmony_ci pipe_resource_reference(&dsti->buffer.resource, buf); 5187bf215546Sopenharmony_ci dsti->buffer_offset = src->buffer_offset; 5188bf215546Sopenharmony_ci dsti->stride = src->stride; 5189bf215546Sopenharmony_ci 5190bf215546Sopenharmony_ci if (dsti->buffer_offset & 3 || dsti->stride & 3) 5191bf215546Sopenharmony_ci unaligned |= slot_bit; 5192bf215546Sopenharmony_ci 5193bf215546Sopenharmony_ci si_context_add_resource_size(sctx, buf); 5194bf215546Sopenharmony_ci if (buf) 5195bf215546Sopenharmony_ci si_resource(buf)->bind_history |= SI_BIND_VERTEX_BUFFER; 5196bf215546Sopenharmony_ci } 5197bf215546Sopenharmony_ci } 5198bf215546Sopenharmony_ci } else { 5199bf215546Sopenharmony_ci for (i = 0; i < count; i++) 5200bf215546Sopenharmony_ci pipe_resource_reference(&dst[i].buffer.resource, NULL); 5201bf215546Sopenharmony_ci } 5202bf215546Sopenharmony_ci 5203bf215546Sopenharmony_ci for (i = 0; i < unbind_num_trailing_slots; i++) 5204bf215546Sopenharmony_ci pipe_resource_reference(&dst[count + i].buffer.resource, NULL); 5205bf215546Sopenharmony_ci 5206bf215546Sopenharmony_ci sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0; 5207bf215546Sopenharmony_ci sctx->vertex_buffer_unaligned = (orig_unaligned & ~updated_mask) | unaligned; 5208bf215546Sopenharmony_ci 5209bf215546Sopenharmony_ci /* Check whether alignment may have changed in a way that requires 5210bf215546Sopenharmony_ci * shader changes. This check is conservative: a vertex buffer can only 5211bf215546Sopenharmony_ci * trigger a shader change if the misalignment amount changes (e.g. 5212bf215546Sopenharmony_ci * from byte-aligned to short-aligned), but we only keep track of 5213bf215546Sopenharmony_ci * whether buffers are at least dword-aligned, since that should always 5214bf215546Sopenharmony_ci * be the case in well-behaved applications anyway. 5215bf215546Sopenharmony_ci */ 5216bf215546Sopenharmony_ci if ((sctx->vertex_elements->vb_alignment_check_mask & 5217bf215546Sopenharmony_ci (unaligned | orig_unaligned) & updated_mask)) { 5218bf215546Sopenharmony_ci si_vs_key_update_inputs(sctx); 5219bf215546Sopenharmony_ci sctx->do_update_shaders = true; 5220bf215546Sopenharmony_ci } 5221bf215546Sopenharmony_ci} 5222bf215546Sopenharmony_ci 5223bf215546Sopenharmony_cistatic struct pipe_vertex_state * 5224bf215546Sopenharmony_cisi_create_vertex_state(struct pipe_screen *screen, 5225bf215546Sopenharmony_ci struct pipe_vertex_buffer *buffer, 5226bf215546Sopenharmony_ci const struct pipe_vertex_element *elements, 5227bf215546Sopenharmony_ci unsigned num_elements, 5228bf215546Sopenharmony_ci struct pipe_resource *indexbuf, 5229bf215546Sopenharmony_ci uint32_t full_velem_mask) 5230bf215546Sopenharmony_ci{ 5231bf215546Sopenharmony_ci struct si_screen *sscreen = (struct si_screen *)screen; 5232bf215546Sopenharmony_ci struct si_vertex_state *state = CALLOC_STRUCT(si_vertex_state); 5233bf215546Sopenharmony_ci 5234bf215546Sopenharmony_ci util_init_pipe_vertex_state(screen, buffer, elements, num_elements, indexbuf, full_velem_mask, 5235bf215546Sopenharmony_ci &state->b); 5236bf215546Sopenharmony_ci 5237bf215546Sopenharmony_ci /* Initialize the vertex element state in state->element. 5238bf215546Sopenharmony_ci * Do it by creating a vertex element state object and copying it there. 5239bf215546Sopenharmony_ci */ 5240bf215546Sopenharmony_ci struct si_context ctx = {}; 5241bf215546Sopenharmony_ci ctx.b.screen = screen; 5242bf215546Sopenharmony_ci struct si_vertex_elements *velems = si_create_vertex_elements(&ctx.b, num_elements, elements); 5243bf215546Sopenharmony_ci state->velems = *velems; 5244bf215546Sopenharmony_ci si_delete_vertex_element(&ctx.b, velems); 5245bf215546Sopenharmony_ci 5246bf215546Sopenharmony_ci assert(!state->velems.instance_divisor_is_one); 5247bf215546Sopenharmony_ci assert(!state->velems.instance_divisor_is_fetched); 5248bf215546Sopenharmony_ci assert(!state->velems.fix_fetch_always); 5249bf215546Sopenharmony_ci assert(buffer->stride % 4 == 0); 5250bf215546Sopenharmony_ci assert(buffer->buffer_offset % 4 == 0); 5251bf215546Sopenharmony_ci assert(!buffer->is_user_buffer); 5252bf215546Sopenharmony_ci for (unsigned i = 0; i < num_elements; i++) { 5253bf215546Sopenharmony_ci assert(elements[i].src_offset % 4 == 0); 5254bf215546Sopenharmony_ci assert(!elements[i].dual_slot); 5255bf215546Sopenharmony_ci } 5256bf215546Sopenharmony_ci 5257bf215546Sopenharmony_ci for (unsigned i = 0; i < num_elements; i++) { 5258bf215546Sopenharmony_ci si_set_vertex_buffer_descriptor(sscreen, &state->velems, &state->b.input.vbuffer, i, 5259bf215546Sopenharmony_ci &state->descriptors[i * 4]); 5260bf215546Sopenharmony_ci } 5261bf215546Sopenharmony_ci 5262bf215546Sopenharmony_ci return &state->b; 5263bf215546Sopenharmony_ci} 5264bf215546Sopenharmony_ci 5265bf215546Sopenharmony_cistatic void si_vertex_state_destroy(struct pipe_screen *screen, 5266bf215546Sopenharmony_ci struct pipe_vertex_state *state) 5267bf215546Sopenharmony_ci{ 5268bf215546Sopenharmony_ci pipe_vertex_buffer_unreference(&state->input.vbuffer); 5269bf215546Sopenharmony_ci pipe_resource_reference(&state->input.indexbuf, NULL); 5270bf215546Sopenharmony_ci FREE(state); 5271bf215546Sopenharmony_ci} 5272bf215546Sopenharmony_ci 5273bf215546Sopenharmony_cistatic struct pipe_vertex_state * 5274bf215546Sopenharmony_cisi_pipe_create_vertex_state(struct pipe_screen *screen, 5275bf215546Sopenharmony_ci struct pipe_vertex_buffer *buffer, 5276bf215546Sopenharmony_ci const struct pipe_vertex_element *elements, 5277bf215546Sopenharmony_ci unsigned num_elements, 5278bf215546Sopenharmony_ci struct pipe_resource *indexbuf, 5279bf215546Sopenharmony_ci uint32_t full_velem_mask) 5280bf215546Sopenharmony_ci{ 5281bf215546Sopenharmony_ci struct si_screen *sscreen = (struct si_screen *)screen; 5282bf215546Sopenharmony_ci 5283bf215546Sopenharmony_ci return util_vertex_state_cache_get(screen, buffer, elements, num_elements, indexbuf, 5284bf215546Sopenharmony_ci full_velem_mask, &sscreen->vertex_state_cache); 5285bf215546Sopenharmony_ci} 5286bf215546Sopenharmony_ci 5287bf215546Sopenharmony_cistatic void si_pipe_vertex_state_destroy(struct pipe_screen *screen, 5288bf215546Sopenharmony_ci struct pipe_vertex_state *state) 5289bf215546Sopenharmony_ci{ 5290bf215546Sopenharmony_ci struct si_screen *sscreen = (struct si_screen *)screen; 5291bf215546Sopenharmony_ci 5292bf215546Sopenharmony_ci util_vertex_state_destroy(screen, &sscreen->vertex_state_cache, state); 5293bf215546Sopenharmony_ci} 5294bf215546Sopenharmony_ci 5295bf215546Sopenharmony_ci/* 5296bf215546Sopenharmony_ci * Misc 5297bf215546Sopenharmony_ci */ 5298bf215546Sopenharmony_ci 5299bf215546Sopenharmony_cistatic void si_set_tess_state(struct pipe_context *ctx, const float default_outer_level[4], 5300bf215546Sopenharmony_ci const float default_inner_level[2]) 5301bf215546Sopenharmony_ci{ 5302bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 5303bf215546Sopenharmony_ci struct pipe_constant_buffer cb; 5304bf215546Sopenharmony_ci float array[8]; 5305bf215546Sopenharmony_ci 5306bf215546Sopenharmony_ci memcpy(array, default_outer_level, sizeof(float) * 4); 5307bf215546Sopenharmony_ci memcpy(array + 4, default_inner_level, sizeof(float) * 2); 5308bf215546Sopenharmony_ci 5309bf215546Sopenharmony_ci cb.buffer = NULL; 5310bf215546Sopenharmony_ci cb.user_buffer = array; 5311bf215546Sopenharmony_ci cb.buffer_offset = 0; 5312bf215546Sopenharmony_ci cb.buffer_size = sizeof(array); 5313bf215546Sopenharmony_ci 5314bf215546Sopenharmony_ci si_set_internal_const_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 5315bf215546Sopenharmony_ci} 5316bf215546Sopenharmony_ci 5317bf215546Sopenharmony_cistatic void si_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertices) 5318bf215546Sopenharmony_ci{ 5319bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 5320bf215546Sopenharmony_ci 5321bf215546Sopenharmony_ci sctx->patch_vertices = patch_vertices; 5322bf215546Sopenharmony_ci} 5323bf215546Sopenharmony_ci 5324bf215546Sopenharmony_cistatic void si_texture_barrier(struct pipe_context *ctx, unsigned flags) 5325bf215546Sopenharmony_ci{ 5326bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 5327bf215546Sopenharmony_ci 5328bf215546Sopenharmony_ci si_update_fb_dirtiness_after_rendering(sctx); 5329bf215546Sopenharmony_ci 5330bf215546Sopenharmony_ci /* Multisample surfaces are flushed in si_decompress_textures. */ 5331bf215546Sopenharmony_ci if (sctx->framebuffer.uncompressed_cb_mask) { 5332bf215546Sopenharmony_ci si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 5333bf215546Sopenharmony_ci sctx->framebuffer.CB_has_shader_readable_metadata, 5334bf215546Sopenharmony_ci sctx->framebuffer.all_DCC_pipe_aligned); 5335bf215546Sopenharmony_ci } 5336bf215546Sopenharmony_ci} 5337bf215546Sopenharmony_ci 5338bf215546Sopenharmony_ci/* This only ensures coherency for shader image/buffer stores. */ 5339bf215546Sopenharmony_cistatic void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 5340bf215546Sopenharmony_ci{ 5341bf215546Sopenharmony_ci struct si_context *sctx = (struct si_context *)ctx; 5342bf215546Sopenharmony_ci 5343bf215546Sopenharmony_ci if (!(flags & ~PIPE_BARRIER_UPDATE)) 5344bf215546Sopenharmony_ci return; 5345bf215546Sopenharmony_ci 5346bf215546Sopenharmony_ci /* Subsequent commands must wait for all shader invocations to 5347bf215546Sopenharmony_ci * complete. */ 5348bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | 5349bf215546Sopenharmony_ci SI_CONTEXT_PFP_SYNC_ME; 5350bf215546Sopenharmony_ci 5351bf215546Sopenharmony_ci if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 5352bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE; 5353bf215546Sopenharmony_ci 5354bf215546Sopenharmony_ci if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_TEXTURE | 5355bf215546Sopenharmony_ci PIPE_BARRIER_IMAGE | PIPE_BARRIER_STREAMOUT_BUFFER | PIPE_BARRIER_GLOBAL_BUFFER)) { 5356bf215546Sopenharmony_ci /* As far as I can tell, L1 contents are written back to L2 5357bf215546Sopenharmony_ci * automatically at end of shader, but the contents of other 5358bf215546Sopenharmony_ci * L1 caches might still be stale. */ 5359bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_INV_VCACHE; 5360bf215546Sopenharmony_ci 5361bf215546Sopenharmony_ci if (flags & (PIPE_BARRIER_IMAGE | PIPE_BARRIER_TEXTURE) && 5362bf215546Sopenharmony_ci sctx->screen->info.tcc_rb_non_coherent) 5363bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_INV_L2; 5364bf215546Sopenharmony_ci } 5365bf215546Sopenharmony_ci 5366bf215546Sopenharmony_ci if (flags & PIPE_BARRIER_INDEX_BUFFER) { 5367bf215546Sopenharmony_ci /* Indices are read through TC L2 since GFX8. 5368bf215546Sopenharmony_ci * L1 isn't used. 5369bf215546Sopenharmony_ci */ 5370bf215546Sopenharmony_ci if (sctx->screen->info.gfx_level <= GFX7) 5371bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_WB_L2; 5372bf215546Sopenharmony_ci } 5373bf215546Sopenharmony_ci 5374bf215546Sopenharmony_ci /* MSAA color, any depth and any stencil are flushed in 5375bf215546Sopenharmony_ci * si_decompress_textures when needed. 5376bf215546Sopenharmony_ci */ 5377bf215546Sopenharmony_ci if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.uncompressed_cb_mask) { 5378bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 5379bf215546Sopenharmony_ci 5380bf215546Sopenharmony_ci if (sctx->gfx_level <= GFX8) 5381bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_WB_L2; 5382bf215546Sopenharmony_ci } 5383bf215546Sopenharmony_ci 5384bf215546Sopenharmony_ci /* Indirect buffers use TC L2 on GFX9, but not older hw. */ 5385bf215546Sopenharmony_ci if (sctx->screen->info.gfx_level <= GFX8 && flags & PIPE_BARRIER_INDIRECT_BUFFER) 5386bf215546Sopenharmony_ci sctx->flags |= SI_CONTEXT_WB_L2; 5387bf215546Sopenharmony_ci} 5388bf215546Sopenharmony_ci 5389bf215546Sopenharmony_cistatic void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 5390bf215546Sopenharmony_ci{ 5391bf215546Sopenharmony_ci struct pipe_blend_state blend; 5392bf215546Sopenharmony_ci 5393bf215546Sopenharmony_ci memset(&blend, 0, sizeof(blend)); 5394bf215546Sopenharmony_ci blend.independent_blend_enable = true; 5395bf215546Sopenharmony_ci blend.rt[0].colormask = 0xf; 5396bf215546Sopenharmony_ci return si_create_blend_state_mode(&sctx->b, &blend, mode); 5397bf215546Sopenharmony_ci} 5398bf215546Sopenharmony_ci 5399bf215546Sopenharmony_civoid si_init_state_compute_functions(struct si_context *sctx) 5400bf215546Sopenharmony_ci{ 5401bf215546Sopenharmony_ci sctx->b.create_sampler_state = si_create_sampler_state; 5402bf215546Sopenharmony_ci sctx->b.delete_sampler_state = si_delete_sampler_state; 5403bf215546Sopenharmony_ci sctx->b.create_sampler_view = si_create_sampler_view; 5404bf215546Sopenharmony_ci sctx->b.sampler_view_destroy = si_sampler_view_destroy; 5405bf215546Sopenharmony_ci sctx->b.memory_barrier = si_memory_barrier; 5406bf215546Sopenharmony_ci} 5407bf215546Sopenharmony_ci 5408bf215546Sopenharmony_civoid si_init_state_functions(struct si_context *sctx) 5409bf215546Sopenharmony_ci{ 5410bf215546Sopenharmony_ci sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state; 5411bf215546Sopenharmony_ci sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs; 5412bf215546Sopenharmony_ci sctx->atoms.s.db_render_state.emit = si_emit_db_render_state; 5413bf215546Sopenharmony_ci sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state; 5414bf215546Sopenharmony_ci sctx->atoms.s.msaa_config.emit = si_emit_msaa_config; 5415bf215546Sopenharmony_ci sctx->atoms.s.sample_mask.emit = si_emit_sample_mask; 5416bf215546Sopenharmony_ci sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state; 5417bf215546Sopenharmony_ci sctx->atoms.s.blend_color.emit = si_emit_blend_color; 5418bf215546Sopenharmony_ci sctx->atoms.s.clip_regs.emit = si_emit_clip_regs; 5419bf215546Sopenharmony_ci sctx->atoms.s.clip_state.emit = si_emit_clip_state; 5420bf215546Sopenharmony_ci sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref; 5421bf215546Sopenharmony_ci 5422bf215546Sopenharmony_ci sctx->b.create_blend_state = si_create_blend_state; 5423bf215546Sopenharmony_ci sctx->b.bind_blend_state = si_bind_blend_state; 5424bf215546Sopenharmony_ci sctx->b.delete_blend_state = si_delete_blend_state; 5425bf215546Sopenharmony_ci sctx->b.set_blend_color = si_set_blend_color; 5426bf215546Sopenharmony_ci 5427bf215546Sopenharmony_ci sctx->b.create_rasterizer_state = si_create_rs_state; 5428bf215546Sopenharmony_ci sctx->b.bind_rasterizer_state = si_bind_rs_state; 5429bf215546Sopenharmony_ci sctx->b.delete_rasterizer_state = si_delete_rs_state; 5430bf215546Sopenharmony_ci 5431bf215546Sopenharmony_ci sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state; 5432bf215546Sopenharmony_ci sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 5433bf215546Sopenharmony_ci sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 5434bf215546Sopenharmony_ci 5435bf215546Sopenharmony_ci sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 5436bf215546Sopenharmony_ci 5437bf215546Sopenharmony_ci if (sctx->gfx_level < GFX11) { 5438bf215546Sopenharmony_ci sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 5439bf215546Sopenharmony_ci sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 5440bf215546Sopenharmony_ci sctx->custom_blend_eliminate_fastclear = 5441bf215546Sopenharmony_ci si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 5442bf215546Sopenharmony_ci } 5443bf215546Sopenharmony_ci 5444bf215546Sopenharmony_ci sctx->custom_blend_dcc_decompress = 5445bf215546Sopenharmony_ci si_create_blend_custom(sctx, sctx->gfx_level >= GFX11 ? 5446bf215546Sopenharmony_ci V_028808_CB_DCC_DECOMPRESS_GFX11 : 5447bf215546Sopenharmony_ci V_028808_CB_DCC_DECOMPRESS_GFX8); 5448bf215546Sopenharmony_ci 5449bf215546Sopenharmony_ci sctx->b.set_clip_state = si_set_clip_state; 5450bf215546Sopenharmony_ci sctx->b.set_stencil_ref = si_set_stencil_ref; 5451bf215546Sopenharmony_ci 5452bf215546Sopenharmony_ci sctx->b.set_framebuffer_state = si_set_framebuffer_state; 5453bf215546Sopenharmony_ci 5454bf215546Sopenharmony_ci sctx->b.set_sample_mask = si_set_sample_mask; 5455bf215546Sopenharmony_ci 5456bf215546Sopenharmony_ci sctx->b.create_vertex_elements_state = si_create_vertex_elements; 5457bf215546Sopenharmony_ci sctx->b.bind_vertex_elements_state = si_bind_vertex_elements; 5458bf215546Sopenharmony_ci sctx->b.delete_vertex_elements_state = si_delete_vertex_element; 5459bf215546Sopenharmony_ci sctx->b.set_vertex_buffers = si_set_vertex_buffers; 5460bf215546Sopenharmony_ci 5461bf215546Sopenharmony_ci sctx->b.texture_barrier = si_texture_barrier; 5462bf215546Sopenharmony_ci sctx->b.set_min_samples = si_set_min_samples; 5463bf215546Sopenharmony_ci sctx->b.set_tess_state = si_set_tess_state; 5464bf215546Sopenharmony_ci sctx->b.set_patch_vertices = si_set_patch_vertices; 5465bf215546Sopenharmony_ci 5466bf215546Sopenharmony_ci sctx->b.set_active_query_state = si_set_active_query_state; 5467bf215546Sopenharmony_ci} 5468bf215546Sopenharmony_ci 5469bf215546Sopenharmony_civoid si_init_screen_state_functions(struct si_screen *sscreen) 5470bf215546Sopenharmony_ci{ 5471bf215546Sopenharmony_ci sscreen->b.is_format_supported = si_is_format_supported; 5472bf215546Sopenharmony_ci sscreen->b.create_vertex_state = si_pipe_create_vertex_state; 5473bf215546Sopenharmony_ci sscreen->b.vertex_state_destroy = si_pipe_vertex_state_destroy; 5474bf215546Sopenharmony_ci 5475bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX10) { 5476bf215546Sopenharmony_ci sscreen->make_texture_descriptor = gfx10_make_texture_descriptor; 5477bf215546Sopenharmony_ci } else { 5478bf215546Sopenharmony_ci sscreen->make_texture_descriptor = si_make_texture_descriptor; 5479bf215546Sopenharmony_ci } 5480bf215546Sopenharmony_ci 5481bf215546Sopenharmony_ci util_vertex_state_cache_init(&sscreen->vertex_state_cache, 5482bf215546Sopenharmony_ci si_create_vertex_state, si_vertex_state_destroy); 5483bf215546Sopenharmony_ci} 5484bf215546Sopenharmony_ci 5485bf215546Sopenharmony_cistatic void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value) 5486bf215546Sopenharmony_ci{ 5487bf215546Sopenharmony_ci unsigned reg = sctx->gfx_level >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX; 5488bf215546Sopenharmony_ci si_pm4_set_reg(pm4, reg, value); 5489bf215546Sopenharmony_ci} 5490bf215546Sopenharmony_ci 5491bf215546Sopenharmony_cistatic void si_set_grbm_gfx_index_se(struct si_context *sctx, struct si_pm4_state *pm4, unsigned se) 5492bf215546Sopenharmony_ci{ 5493bf215546Sopenharmony_ci assert(se == ~0 || se < sctx->screen->info.max_se); 5494bf215546Sopenharmony_ci si_set_grbm_gfx_index(sctx, pm4, 5495bf215546Sopenharmony_ci (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : S_030800_SE_INDEX(se)) | 5496bf215546Sopenharmony_ci S_030800_SH_BROADCAST_WRITES(1) | 5497bf215546Sopenharmony_ci S_030800_INSTANCE_BROADCAST_WRITES(1)); 5498bf215546Sopenharmony_ci} 5499bf215546Sopenharmony_ci 5500bf215546Sopenharmony_cistatic void si_write_harvested_raster_configs(struct si_context *sctx, struct si_pm4_state *pm4, 5501bf215546Sopenharmony_ci unsigned raster_config, unsigned raster_config_1) 5502bf215546Sopenharmony_ci{ 5503bf215546Sopenharmony_ci unsigned num_se = MAX2(sctx->screen->info.max_se, 1); 5504bf215546Sopenharmony_ci unsigned raster_config_se[4]; 5505bf215546Sopenharmony_ci unsigned se; 5506bf215546Sopenharmony_ci 5507bf215546Sopenharmony_ci ac_get_harvested_configs(&sctx->screen->info, raster_config, &raster_config_1, raster_config_se); 5508bf215546Sopenharmony_ci 5509bf215546Sopenharmony_ci for (se = 0; se < num_se; se++) { 5510bf215546Sopenharmony_ci si_set_grbm_gfx_index_se(sctx, pm4, se); 5511bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); 5512bf215546Sopenharmony_ci } 5513bf215546Sopenharmony_ci si_set_grbm_gfx_index(sctx, pm4, ~0); 5514bf215546Sopenharmony_ci 5515bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7) { 5516bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 5517bf215546Sopenharmony_ci } 5518bf215546Sopenharmony_ci} 5519bf215546Sopenharmony_ci 5520bf215546Sopenharmony_cistatic void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4) 5521bf215546Sopenharmony_ci{ 5522bf215546Sopenharmony_ci struct si_screen *sscreen = sctx->screen; 5523bf215546Sopenharmony_ci unsigned num_rb = MIN2(sscreen->info.max_render_backends, 16); 5524bf215546Sopenharmony_ci unsigned rb_mask = sscreen->info.enabled_rb_mask; 5525bf215546Sopenharmony_ci unsigned raster_config = sscreen->pa_sc_raster_config; 5526bf215546Sopenharmony_ci unsigned raster_config_1 = sscreen->pa_sc_raster_config_1; 5527bf215546Sopenharmony_ci 5528bf215546Sopenharmony_ci if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 5529bf215546Sopenharmony_ci /* Always use the default config when all backends are enabled 5530bf215546Sopenharmony_ci * (or when we failed to determine the enabled backends). 5531bf215546Sopenharmony_ci */ 5532bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config); 5533bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7) 5534bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 5535bf215546Sopenharmony_ci } else { 5536bf215546Sopenharmony_ci si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 5537bf215546Sopenharmony_ci } 5538bf215546Sopenharmony_ci} 5539bf215546Sopenharmony_ci 5540bf215546Sopenharmony_ciunsigned gfx103_get_cu_mask_ps(struct si_screen *sscreen) 5541bf215546Sopenharmony_ci{ 5542bf215546Sopenharmony_ci /* It's wasteful to enable all CUs for PS if shader arrays have a different 5543bf215546Sopenharmony_ci * number of CUs. The reason is that the hardware sends the same number of PS 5544bf215546Sopenharmony_ci * waves to each shader array, so the slowest shader array limits the performance. 5545bf215546Sopenharmony_ci * Disable the extra CUs for PS in other shader arrays to save power and thus 5546bf215546Sopenharmony_ci * increase clocks for busy CUs. In the future, we might disable or enable this 5547bf215546Sopenharmony_ci * tweak only for certain apps. 5548bf215546Sopenharmony_ci */ 5549bf215546Sopenharmony_ci return u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa); 5550bf215546Sopenharmony_ci} 5551bf215546Sopenharmony_ci 5552bf215546Sopenharmony_civoid si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) 5553bf215546Sopenharmony_ci{ 5554bf215546Sopenharmony_ci struct si_screen *sscreen = sctx->screen; 5555bf215546Sopenharmony_ci uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 5556bf215546Sopenharmony_ci bool has_clear_state = sscreen->info.has_clear_state; 5557bf215546Sopenharmony_ci 5558bf215546Sopenharmony_ci struct si_cs_preamble { 5559bf215546Sopenharmony_ci struct si_pm4_state pm4; 5560bf215546Sopenharmony_ci uint32_t more_pm4[150]; /* Add more space because the preamble is large. */ 5561bf215546Sopenharmony_ci }; 5562bf215546Sopenharmony_ci struct si_pm4_state *pm4 = (struct si_pm4_state *)CALLOC_STRUCT(si_cs_preamble); 5563bf215546Sopenharmony_ci 5564bf215546Sopenharmony_ci if (!pm4) 5565bf215546Sopenharmony_ci return; 5566bf215546Sopenharmony_ci 5567bf215546Sopenharmony_ci /* Add all the space that we allocated. */ 5568bf215546Sopenharmony_ci pm4->max_dw = sizeof(struct si_cs_preamble) - offsetof(struct si_cs_preamble, pm4.pm4); 5569bf215546Sopenharmony_ci 5570bf215546Sopenharmony_ci if (!uses_reg_shadowing) { 5571bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 5572bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1)); 5573bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1)); 5574bf215546Sopenharmony_ci 5575bf215546Sopenharmony_ci if (sscreen->dpbb_allowed) { 5576bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0)); 5577bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 5578bf215546Sopenharmony_ci } 5579bf215546Sopenharmony_ci 5580bf215546Sopenharmony_ci if (has_clear_state) { 5581bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0)); 5582bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0); 5583bf215546Sopenharmony_ci } 5584bf215546Sopenharmony_ci } 5585bf215546Sopenharmony_ci 5586bf215546Sopenharmony_ci /* CLEAR_STATE doesn't restore these correctly. */ 5587bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 5588bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 5589bf215546Sopenharmony_ci S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 5590bf215546Sopenharmony_ci 5591bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 5592bf215546Sopenharmony_ci if (!has_clear_state) 5593bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 5594bf215546Sopenharmony_ci 5595bf215546Sopenharmony_ci if (!has_clear_state) { 5596bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 5597bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 5598bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 5599bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 5600bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); 5601bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 5602bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 5603bf215546Sopenharmony_ci 5604bf215546Sopenharmony_ci if (sctx->gfx_level < GFX11) { 5605bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 5606bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 5607bf215546Sopenharmony_ci } 5608bf215546Sopenharmony_ci } 5609bf215546Sopenharmony_ci 5610bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 5611bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7) 5612bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); 5613bf215546Sopenharmony_ci 5614bf215546Sopenharmony_ci if (sctx->gfx_level == GFX6) { 5615bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, 5616bf215546Sopenharmony_ci S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); 5617bf215546Sopenharmony_ci } 5618bf215546Sopenharmony_ci 5619bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7) { 5620bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0); 5621bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0); 5622bf215546Sopenharmony_ci } else { 5623bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_008A60_PA_SU_LINE_STIPPLE_VALUE, 0); 5624bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_008B10_PA_SC_LINE_STIPPLE_STATE, 0); 5625bf215546Sopenharmony_ci } 5626bf215546Sopenharmony_ci 5627bf215546Sopenharmony_ci if (sctx->gfx_level <= GFX7 || !has_clear_state) { 5628bf215546Sopenharmony_ci if (sctx->gfx_level < GFX11) { 5629bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 5630bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 5631bf215546Sopenharmony_ci } 5632bf215546Sopenharmony_ci 5633bf215546Sopenharmony_ci /* CLEAR_STATE doesn't clear these correctly on certain generations. 5634bf215546Sopenharmony_ci * I don't know why. Deduced by trial and error. 5635bf215546Sopenharmony_ci */ 5636bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 5637bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 5638bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 5639bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 5640bf215546Sopenharmony_ci S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 5641bf215546Sopenharmony_ci } 5642bf215546Sopenharmony_ci 5643bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10 && sctx->gfx_level < GFX11) { 5644bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, 5645bf215546Sopenharmony_ci S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) | 5646bf215546Sopenharmony_ci S_028038_POPS_DRAIN_PS_ON_OVERLAP(1)); 5647bf215546Sopenharmony_ci } 5648bf215546Sopenharmony_ci 5649bf215546Sopenharmony_ci unsigned cu_mask_ps = 0xffffffff; 5650bf215546Sopenharmony_ci 5651bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10_3) 5652bf215546Sopenharmony_ci cu_mask_ps = gfx103_get_cu_mask_ps(sscreen); 5653bf215546Sopenharmony_ci 5654bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7) { 5655bf215546Sopenharmony_ci ac_set_reg_cu_en(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 5656bf215546Sopenharmony_ci S_00B01C_CU_EN(cu_mask_ps) | 5657bf215546Sopenharmony_ci S_00B01C_WAVE_LIMIT(0x3F) | 5658bf215546Sopenharmony_ci S_00B01C_LDS_GROUP_SIZE(sctx->gfx_level >= GFX11), 5659bf215546Sopenharmony_ci C_00B01C_CU_EN, 0, &sscreen->info, 5660bf215546Sopenharmony_ci (void*)(sctx->gfx_level >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg)); 5661bf215546Sopenharmony_ci } 5662bf215546Sopenharmony_ci 5663bf215546Sopenharmony_ci if (sctx->gfx_level <= GFX8) { 5664bf215546Sopenharmony_ci si_set_raster_config(sctx, pm4); 5665bf215546Sopenharmony_ci 5666bf215546Sopenharmony_ci /* FIXME calculate these values somehow ??? */ 5667bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 5668bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 5669bf215546Sopenharmony_ci 5670bf215546Sopenharmony_ci /* These registers, when written, also overwrite the CLEAR_STATE 5671bf215546Sopenharmony_ci * context, so we can't rely on CLEAR_STATE setting them. 5672bf215546Sopenharmony_ci * It would be an issue if there was another UMD changing them. 5673bf215546Sopenharmony_ci */ 5674bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 5675bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 5676bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 5677bf215546Sopenharmony_ci } 5678bf215546Sopenharmony_ci 5679bf215546Sopenharmony_ci if (sscreen->info.gfx_level >= GFX10) { 5680bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, 5681bf215546Sopenharmony_ci S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); 5682bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, 5683bf215546Sopenharmony_ci S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8)); 5684bf215546Sopenharmony_ci } else if (sscreen->info.gfx_level == GFX9) { 5685bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, 5686bf215546Sopenharmony_ci S_00B414_MEM_BASE(sscreen->info.address32_hi >> 8)); 5687bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, 5688bf215546Sopenharmony_ci S_00B214_MEM_BASE(sscreen->info.address32_hi >> 8)); 5689bf215546Sopenharmony_ci } else { 5690bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, 5691bf215546Sopenharmony_ci S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); 5692bf215546Sopenharmony_ci } 5693bf215546Sopenharmony_ci 5694bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX7 && sctx->gfx_level <= GFX8) { 5695bf215546Sopenharmony_ci ac_set_reg_cu_en(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 5696bf215546Sopenharmony_ci S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F), 5697bf215546Sopenharmony_ci C_00B51C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg); 5698bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F)); 5699bf215546Sopenharmony_ci ac_set_reg_cu_en(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 5700bf215546Sopenharmony_ci S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F), 5701bf215546Sopenharmony_ci C_00B31C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg); 5702bf215546Sopenharmony_ci 5703bf215546Sopenharmony_ci /* If this is 0, Bonaire can hang even if GS isn't being used. 5704bf215546Sopenharmony_ci * Other chips are unaffected. These are suboptimal values, 5705bf215546Sopenharmony_ci * but we don't use on-chip GS. 5706bf215546Sopenharmony_ci */ 5707bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, 5708bf215546Sopenharmony_ci S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4)); 5709bf215546Sopenharmony_ci } 5710bf215546Sopenharmony_ci 5711bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX8) { 5712bf215546Sopenharmony_ci unsigned vgt_tess_distribution; 5713bf215546Sopenharmony_ci 5714bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 5715bf215546Sopenharmony_ci /* ACCUM fields changed their meaning. */ 5716bf215546Sopenharmony_ci vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(255) | 5717bf215546Sopenharmony_ci S_028B50_ACCUM_TRI(255) | 5718bf215546Sopenharmony_ci S_028B50_ACCUM_QUAD(255) | 5719bf215546Sopenharmony_ci S_028B50_DONUT_SPLIT_GFX9(24) | 5720bf215546Sopenharmony_ci S_028B50_TRAP_SPLIT(6); 5721bf215546Sopenharmony_ci } else if (sctx->gfx_level >= GFX9) { 5722bf215546Sopenharmony_ci vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(12) | 5723bf215546Sopenharmony_ci S_028B50_ACCUM_TRI(30) | 5724bf215546Sopenharmony_ci S_028B50_ACCUM_QUAD(24) | 5725bf215546Sopenharmony_ci S_028B50_DONUT_SPLIT_GFX9(24) | 5726bf215546Sopenharmony_ci S_028B50_TRAP_SPLIT(6); 5727bf215546Sopenharmony_ci } else if (sctx->gfx_level == GFX8) { 5728bf215546Sopenharmony_ci vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | 5729bf215546Sopenharmony_ci S_028B50_ACCUM_TRI(11) | 5730bf215546Sopenharmony_ci S_028B50_ACCUM_QUAD(11) | 5731bf215546Sopenharmony_ci S_028B50_DONUT_SPLIT_GFX81(16); 5732bf215546Sopenharmony_ci 5733bf215546Sopenharmony_ci /* Testing with Unigine Heaven extreme tesselation yielded best results 5734bf215546Sopenharmony_ci * with TRAP_SPLIT = 3. 5735bf215546Sopenharmony_ci */ 5736bf215546Sopenharmony_ci if (sctx->family == CHIP_FIJI || sctx->family >= CHIP_POLARIS10) 5737bf215546Sopenharmony_ci vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 5738bf215546Sopenharmony_ci } 5739bf215546Sopenharmony_ci 5740bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 5741bf215546Sopenharmony_ci } 5742bf215546Sopenharmony_ci 5743bf215546Sopenharmony_ci if (sscreen->info.gfx_level <= GFX9) { 5744bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); 5745bf215546Sopenharmony_ci } 5746bf215546Sopenharmony_ci 5747bf215546Sopenharmony_ci if (sctx->gfx_level == GFX9) { 5748bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); 5749bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); 5750bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0); 5751bf215546Sopenharmony_ci 5752bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, 5753bf215546Sopenharmony_ci S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | 5754bf215546Sopenharmony_ci S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); 5755bf215546Sopenharmony_ci } 5756bf215546Sopenharmony_ci 5757bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX9) { 5758bf215546Sopenharmony_ci ac_set_reg_cu_en(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 5759bf215546Sopenharmony_ci S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN, 5760bf215546Sopenharmony_ci 0, &sscreen->info, 5761bf215546Sopenharmony_ci (void*)(sctx->gfx_level >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg)); 5762bf215546Sopenharmony_ci 5763bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, 5764bf215546Sopenharmony_ci S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) | 5765bf215546Sopenharmony_ci S_028C48_MAX_PRIM_PER_BATCH(1023)); 5766bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 5767bf215546Sopenharmony_ci S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); 5768bf215546Sopenharmony_ci 5769bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); 5770bf215546Sopenharmony_ci 5771bf215546Sopenharmony_ci if (sctx->gfx_level < GFX11) { 5772bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 5773bf215546Sopenharmony_ci sctx->gfx_level >= GFX10 ? 0x20 : 0); 5774bf215546Sopenharmony_ci } 5775bf215546Sopenharmony_ci } 5776bf215546Sopenharmony_ci 5777bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10) { 5778bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0); 5779bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0); 5780bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0); 5781bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0); 5782bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0); 5783bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0); 5784bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0); 5785bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0); 5786bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0); 5787bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0); 5788bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0); 5789bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0); 5790bf215546Sopenharmony_ci 5791bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS, 5792bf215546Sopenharmony_ci S_00B0C0_SOFT_GROUPING_EN(1) | 5793bf215546Sopenharmony_ci S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); 5794bf215546Sopenharmony_ci 5795bf215546Sopenharmony_ci /* Enable CMASK/HTILE/DCC caching in L2 for small chips. */ 5796bf215546Sopenharmony_ci unsigned meta_write_policy, meta_read_policy; 5797bf215546Sopenharmony_ci unsigned no_alloc = sctx->gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11: 5798bf215546Sopenharmony_ci V_02807C_CACHE_NOA_GFX10; 5799bf215546Sopenharmony_ci if (sscreen->info.max_render_backends <= 4) { 5800bf215546Sopenharmony_ci meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */ 5801bf215546Sopenharmony_ci meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */ 5802bf215546Sopenharmony_ci } else { 5803bf215546Sopenharmony_ci meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */ 5804bf215546Sopenharmony_ci meta_read_policy = no_alloc; /* don't cache reads that miss */ 5805bf215546Sopenharmony_ci } 5806bf215546Sopenharmony_ci 5807bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL, 5808bf215546Sopenharmony_ci S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | 5809bf215546Sopenharmony_ci S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) | 5810bf215546Sopenharmony_ci S_02807C_HTILE_WR_POLICY(meta_write_policy) | 5811bf215546Sopenharmony_ci S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | 5812bf215546Sopenharmony_ci S_02807C_Z_RD_POLICY(no_alloc) | 5813bf215546Sopenharmony_ci S_02807C_S_RD_POLICY(no_alloc) | 5814bf215546Sopenharmony_ci S_02807C_HTILE_RD_POLICY(meta_read_policy)); 5815bf215546Sopenharmony_ci 5816bf215546Sopenharmony_ci unsigned gl2_cc; 5817bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) 5818bf215546Sopenharmony_ci gl2_cc = S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) | 5819bf215546Sopenharmony_ci S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) | 5820bf215546Sopenharmony_ci S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11); 5821bf215546Sopenharmony_ci else 5822bf215546Sopenharmony_ci gl2_cc = S_028410_CMASK_WR_POLICY(meta_write_policy) | 5823bf215546Sopenharmony_ci S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) | 5824bf215546Sopenharmony_ci S_028410_DCC_WR_POLICY_GFX10(meta_write_policy) | 5825bf215546Sopenharmony_ci S_028410_COLOR_WR_POLICY_GFX10(V_028410_CACHE_STREAM) | 5826bf215546Sopenharmony_ci S_028410_CMASK_RD_POLICY(meta_read_policy) | 5827bf215546Sopenharmony_ci S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA_GFX10) | 5828bf215546Sopenharmony_ci S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX10); 5829bf215546Sopenharmony_ci 5830bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL, 5831bf215546Sopenharmony_ci gl2_cc | 5832bf215546Sopenharmony_ci S_028410_DCC_RD_POLICY(meta_read_policy)); 5833bf215546Sopenharmony_ci 5834bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028428_CB_COVERAGE_OUT_CONTROL, 0); 5835bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0); 5836bf215546Sopenharmony_ci 5837bf215546Sopenharmony_ci /* Break up a pixel wave if it contains deallocs for more than 5838bf215546Sopenharmony_ci * half the parameter cache. 5839bf215546Sopenharmony_ci * 5840bf215546Sopenharmony_ci * To avoid a deadlock where pixel waves aren't launched 5841bf215546Sopenharmony_ci * because they're waiting for more pixels while the frontend 5842bf215546Sopenharmony_ci * is stuck waiting for PC space, the maximum allowed value is 5843bf215546Sopenharmony_ci * the size of the PC minus the largest possible allocation for 5844bf215546Sopenharmony_ci * a single primitive shader subgroup. 5845bf215546Sopenharmony_ci */ 5846bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, 5847bf215546Sopenharmony_ci S_028C50_MAX_DEALLOCS_IN_WAVE(sctx->gfx_level >= GFX11 ? 16 : 512)); 5848bf215546Sopenharmony_ci 5849bf215546Sopenharmony_ci if (sctx->gfx_level < GFX11) { 5850bf215546Sopenharmony_ci /* Reuse for legacy (non-NGG) only. */ 5851bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 5852bf215546Sopenharmony_ci } 5853bf215546Sopenharmony_ci 5854bf215546Sopenharmony_ci if (!has_clear_state) { 5855bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 5856bf215546Sopenharmony_ci sscreen->info.pa_sc_tile_steering_override); 5857bf215546Sopenharmony_ci } 5858bf215546Sopenharmony_ci 5859bf215546Sopenharmony_ci 5860bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0); 5861bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0); 5862bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0); 5863bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0); 5864bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0); 5865bf215546Sopenharmony_ci } 5866bf215546Sopenharmony_ci 5867bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10 && sctx->gfx_level <= GFX10_3) { 5868bf215546Sopenharmony_ci /* Logical CUs 16 - 31 */ 5869bf215546Sopenharmony_ci ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16), 5870bf215546Sopenharmony_ci C_00B004_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3); 5871bf215546Sopenharmony_ci ac_set_reg_cu_en(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff), 5872bf215546Sopenharmony_ci C_00B104_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3); 5873bf215546Sopenharmony_ci ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff), 5874bf215546Sopenharmony_ci C_00B404_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3); 5875bf215546Sopenharmony_ci 5876bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); 5877bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0); 5878bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0); 5879bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0); 5880bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0); 5881bf215546Sopenharmony_ci } 5882bf215546Sopenharmony_ci 5883bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX10_3) { 5884bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); 5885bf215546Sopenharmony_ci /* The rate combiners have no effect if they are disabled like this: 5886bf215546Sopenharmony_ci * VERTEX_RATE: BYPASS_VTX_RATE_COMBINER = 1 5887bf215546Sopenharmony_ci * PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1 5888bf215546Sopenharmony_ci * HTILE_RATE: VRS_HTILE_ENCODING = 0 5889bf215546Sopenharmony_ci * SAMPLE_ITER: PS_ITER_SAMPLE = 0 5890bf215546Sopenharmony_ci * 5891bf215546Sopenharmony_ci * Use OVERRIDE, which will ignore results from previous combiners. 5892bf215546Sopenharmony_ci * (e.g. enabled sample shading overrides the vertex rate) 5893bf215546Sopenharmony_ci */ 5894bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL, 5895bf215546Sopenharmony_ci S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) | 5896bf215546Sopenharmony_ci S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE)); 5897bf215546Sopenharmony_ci } 5898bf215546Sopenharmony_ci 5899bf215546Sopenharmony_ci if (sctx->gfx_level >= GFX11) { 5900bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028C54_PA_SC_BINNER_CNTL_2, 0); 5901bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_028620_PA_RATE_CNTL, 5902bf215546Sopenharmony_ci S_028620_VERTEX_RATE(2) | S_028620_PRIM_RATE(1)); 5903bf215546Sopenharmony_ci 5904bf215546Sopenharmony_ci /* We must wait for idle using an EOP event before changing the attribute ring registers. 5905bf215546Sopenharmony_ci * Use the bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory. 5906bf215546Sopenharmony_ci */ 5907bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, PKT3(PKT3_RELEASE_MEM, 6, 0)); 5908bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | 5909bf215546Sopenharmony_ci S_490_EVENT_INDEX(5) | 5910bf215546Sopenharmony_ci S_490_PWS_ENABLE(1)); 5911bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0); /* DST_SEL, INT_SEL, DATA_SEL */ 5912bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0); /* ADDRESS_LO */ 5913bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0); /* ADDRESS_HI */ 5914bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0); /* DATA_LO */ 5915bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0); /* DATA_HI */ 5916bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0); /* INT_CTXID */ 5917bf215546Sopenharmony_ci 5918bf215546Sopenharmony_ci /* Wait for the PWS counter. */ 5919bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0)); 5920bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, S_580_PWS_STAGE_SEL(V_580_CP_ME) | 5921bf215546Sopenharmony_ci S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | 5922bf215546Sopenharmony_ci S_580_PWS_ENA2(1) | 5923bf215546Sopenharmony_ci S_580_PWS_COUNT(0)); 5924bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0xffffffff); /* GCR_SIZE */ 5925bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0x01ffffff); /* GCR_SIZE_HI */ 5926bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0); /* GCR_BASE_LO */ 5927bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0); /* GCR_BASE_HI */ 5928bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, S_585_PWS_ENA(1)); 5929bf215546Sopenharmony_ci si_pm4_cmd_add(pm4, 0); /* GCR_CNTL */ 5930bf215546Sopenharmony_ci 5931bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_031110_SPI_GS_THROTTLE_CNTL1, 0x12355123); 5932bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_031114_SPI_GS_THROTTLE_CNTL2, 0x1544D); 5933bf215546Sopenharmony_ci 5934bf215546Sopenharmony_ci assert((sscreen->attribute_ring->gpu_address >> 32) == sscreen->info.address32_hi); 5935bf215546Sopenharmony_ci 5936bf215546Sopenharmony_ci /* The PS will read inputs from this address. */ 5937bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_031118_SPI_ATTRIBUTE_RING_BASE, 5938bf215546Sopenharmony_ci sscreen->attribute_ring->gpu_address >> 16); 5939bf215546Sopenharmony_ci si_pm4_set_reg(pm4, R_03111C_SPI_ATTRIBUTE_RING_SIZE, 5940bf215546Sopenharmony_ci S_03111C_MEM_SIZE(((sscreen->attribute_ring->bo_size / 5941bf215546Sopenharmony_ci sscreen->info.max_se) >> 16) - 1) | 5942bf215546Sopenharmony_ci S_03111C_BIG_PAGE(sscreen->info.discardable_allows_big_page) | 5943bf215546Sopenharmony_ci S_03111C_L1_POLICY(1)); 5944bf215546Sopenharmony_ci } 5945bf215546Sopenharmony_ci 5946bf215546Sopenharmony_ci sctx->cs_preamble_state = pm4; 5947bf215546Sopenharmony_ci 5948bf215546Sopenharmony_ci /* Make a copy of the preamble for TMZ. */ 5949bf215546Sopenharmony_ci sctx->cs_preamble_state_tmz = (struct si_pm4_state *)CALLOC_STRUCT(si_cs_preamble); 5950bf215546Sopenharmony_ci memcpy(sctx->cs_preamble_state_tmz, sctx->cs_preamble_state, sizeof(struct si_cs_preamble)); 5951bf215546Sopenharmony_ci} 5952