1/* 2 * Copyright 2012 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "si_build_pm4.h" 26#include "si_query.h" 27#include "si_shader_internal.h" 28#include "sid.h" 29#include "util/fast_idiv_by_const.h" 30#include "util/format/u_format.h" 31#include "util/format/u_format_s3tc.h" 32#include "util/u_dual_blend.h" 33#include "util/u_helpers.h" 34#include "util/u_memory.h" 35#include "util/u_resource.h" 36#include "util/u_upload_mgr.h" 37#include "util/u_blend.h" 38 39#include "gfx10_format_table.h" 40 41static unsigned si_map_swizzle(unsigned swizzle) 42{ 43 switch (swizzle) { 44 case PIPE_SWIZZLE_Y: 45 return V_008F0C_SQ_SEL_Y; 46 case PIPE_SWIZZLE_Z: 47 return V_008F0C_SQ_SEL_Z; 48 case PIPE_SWIZZLE_W: 49 return V_008F0C_SQ_SEL_W; 50 case PIPE_SWIZZLE_0: 51 return V_008F0C_SQ_SEL_0; 52 case PIPE_SWIZZLE_1: 53 return V_008F0C_SQ_SEL_1; 54 default: /* PIPE_SWIZZLE_X */ 55 return V_008F0C_SQ_SEL_X; 56 } 57} 58 59/* 12.4 fixed-point */ 60static unsigned si_pack_float_12p4(float x) 61{ 62 return x <= 0 ? 0 : x >= 4096 ? 0xffff : x * 16; 63} 64 65/* 66 * Inferred framebuffer and blender state. 67 * 68 * CB_TARGET_MASK is emitted here to avoid a hang with dual source blending 69 * if there is not enough PS outputs. 70 */ 71static void si_emit_cb_render_state(struct si_context *sctx) 72{ 73 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 74 struct si_state_blend *blend = sctx->queued.named.blend; 75 /* CB_COLORn_INFO.FORMAT=INVALID should disable unbound colorbuffers, 76 * but you never know. */ 77 uint32_t cb_target_mask = sctx->framebuffer.colorbuf_enabled_4bit & blend->cb_target_mask; 78 unsigned i; 79 80 /* Avoid a hang that happens when dual source blending is enabled 81 * but there is not enough color outputs. This is undefined behavior, 82 * so disable color writes completely. 83 * 84 * Reproducible with Unigine Heaven 4.0 and drirc missing. 85 */ 86 if (blend->dual_src_blend && sctx->shader.ps.cso && 87 (sctx->shader.ps.cso->info.colors_written & 0x3) != 0x3) 88 cb_target_mask = 0; 89 90 /* GFX9: Flush DFSM when CB_TARGET_MASK changes. 91 * I think we don't have to do anything between IBs. 92 */ 93 if (sctx->screen->dpbb_allowed && sctx->last_cb_target_mask != cb_target_mask) { 94 sctx->last_cb_target_mask = cb_target_mask; 95 96 radeon_begin(cs); 97 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 98 radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 99 radeon_end(); 100 } 101 102 radeon_begin(cs); 103 radeon_opt_set_context_reg(sctx, R_028238_CB_TARGET_MASK, SI_TRACKED_CB_TARGET_MASK, 104 cb_target_mask); 105 106 if (sctx->gfx_level >= GFX8) { 107 /* DCC MSAA workaround. 108 * Alternatively, we can set CB_COLORi_DCC_CONTROL.OVERWRITE_- 109 * COMBINER_DISABLE, but that would be more complicated. 110 */ 111 bool oc_disable = 112 blend->dcc_msaa_corruption_4bit & cb_target_mask && sctx->framebuffer.nr_samples >= 2; 113 114 if (sctx->gfx_level >= GFX11) { 115 radeon_opt_set_context_reg(sctx, R_028424_CB_FDCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, 116 S_028424_SAMPLE_MASK_TRACKER_DISABLE(oc_disable) | 117 S_028424_SAMPLE_MASK_TRACKER_WATERMARK(15)); 118 } else { 119 radeon_opt_set_context_reg( 120 sctx, R_028424_CB_DCC_CONTROL, SI_TRACKED_CB_DCC_CONTROL, 121 S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(sctx->gfx_level <= GFX9) | 122 S_028424_OVERWRITE_COMBINER_WATERMARK(sctx->gfx_level >= GFX10 ? 6 : 4) | 123 S_028424_OVERWRITE_COMBINER_DISABLE(oc_disable) | 124 S_028424_DISABLE_CONSTANT_ENCODE_REG(sctx->gfx_level < GFX11 && 125 sctx->screen->info.has_dcc_constant_encode)); 126 } 127 } 128 129 /* RB+ register settings. */ 130 if (sctx->screen->info.rbplus_allowed) { 131 unsigned spi_shader_col_format = 132 sctx->shader.ps.cso ? sctx->shader.ps.current->key.ps.part.epilog.spi_shader_col_format 133 : 0; 134 unsigned sx_ps_downconvert = 0; 135 unsigned sx_blend_opt_epsilon = 0; 136 unsigned sx_blend_opt_control = 0; 137 138 for (i = 0; i < sctx->framebuffer.state.nr_cbufs; i++) { 139 struct si_surface *surf = (struct si_surface *)sctx->framebuffer.state.cbufs[i]; 140 unsigned format, swap, spi_format, colormask; 141 bool has_alpha, has_rgb; 142 143 if (!surf) { 144 /* If the color buffer is not set, the driver sets 32_R 145 * as the SPI color format, because the hw doesn't allow 146 * holes between color outputs, so also set this to 147 * enable RB+. 148 */ 149 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 150 continue; 151 } 152 153 format = sctx->gfx_level >= GFX11 ? G_028C70_FORMAT_GFX11(surf->cb_color_info): 154 G_028C70_FORMAT_GFX6(surf->cb_color_info); 155 swap = G_028C70_COMP_SWAP(surf->cb_color_info); 156 spi_format = (spi_shader_col_format >> (i * 4)) & 0xf; 157 colormask = (cb_target_mask >> (i * 4)) & 0xf; 158 159 /* Set if RGB and A are present. */ 160 has_alpha = !(sctx->gfx_level >= GFX11 ? G_028C74_FORCE_DST_ALPHA_1_GFX11(surf->cb_color_attrib): 161 G_028C74_FORCE_DST_ALPHA_1_GFX6(surf->cb_color_attrib)); 162 163 if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || 164 format == V_028C70_COLOR_32) 165 has_rgb = !has_alpha; 166 else 167 has_rgb = true; 168 169 /* Check the colormask and export format. */ 170 if (!(colormask & (PIPE_MASK_RGBA & ~PIPE_MASK_A))) 171 has_rgb = false; 172 if (!(colormask & PIPE_MASK_A)) 173 has_alpha = false; 174 175 if (spi_format == V_028714_SPI_SHADER_ZERO) { 176 has_rgb = false; 177 has_alpha = false; 178 } 179 180 /* Disable value checking for disabled channels. */ 181 if (!has_rgb) 182 sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4); 183 if (!has_alpha) 184 sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4); 185 186 /* Enable down-conversion for 32bpp and smaller formats. */ 187 switch (format) { 188 case V_028C70_COLOR_8: 189 case V_028C70_COLOR_8_8: 190 case V_028C70_COLOR_8_8_8_8: 191 /* For 1 and 2-channel formats, use the superset thereof. */ 192 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR || 193 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 194 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 195 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4); 196 sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4); 197 } 198 break; 199 200 case V_028C70_COLOR_5_6_5: 201 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 202 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4); 203 sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4); 204 } 205 break; 206 207 case V_028C70_COLOR_1_5_5_5: 208 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 209 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4); 210 sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4); 211 } 212 break; 213 214 case V_028C70_COLOR_4_4_4_4: 215 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 216 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4); 217 sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4); 218 } 219 break; 220 221 case V_028C70_COLOR_32: 222 if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R) 223 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4); 224 else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR) 225 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4); 226 break; 227 228 case V_028C70_COLOR_16: 229 case V_028C70_COLOR_16_16: 230 /* For 1-channel formats, use the superset thereof. */ 231 if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR || 232 spi_format == V_028714_SPI_SHADER_SNORM16_ABGR || 233 spi_format == V_028714_SPI_SHADER_UINT16_ABGR || 234 spi_format == V_028714_SPI_SHADER_SINT16_ABGR) { 235 if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV) 236 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4); 237 else 238 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4); 239 } 240 break; 241 242 case V_028C70_COLOR_10_11_11: 243 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) 244 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4); 245 break; 246 247 case V_028C70_COLOR_2_10_10_10: 248 case V_028C70_COLOR_10_10_10_2: 249 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) { 250 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4); 251 sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4); 252 } 253 break; 254 255 case V_028C70_COLOR_5_9_9_9: 256 if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) 257 sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4); 258 break; 259 } 260 } 261 262 /* If there are no color outputs, the first color export is 263 * always enabled as 32_R, so also set this to enable RB+. 264 */ 265 if (!sx_ps_downconvert) 266 sx_ps_downconvert = V_028754_SX_RT_EXPORT_32_R; 267 268 /* SX_PS_DOWNCONVERT, SX_BLEND_OPT_EPSILON, SX_BLEND_OPT_CONTROL */ 269 radeon_opt_set_context_reg3(sctx, R_028754_SX_PS_DOWNCONVERT, SI_TRACKED_SX_PS_DOWNCONVERT, 270 sx_ps_downconvert, sx_blend_opt_epsilon, sx_blend_opt_control); 271 } 272 radeon_end_update_context_roll(sctx); 273} 274 275/* 276 * Blender functions 277 */ 278 279static uint32_t si_translate_blend_function(int blend_func) 280{ 281 switch (blend_func) { 282 case PIPE_BLEND_ADD: 283 return V_028780_COMB_DST_PLUS_SRC; 284 case PIPE_BLEND_SUBTRACT: 285 return V_028780_COMB_SRC_MINUS_DST; 286 case PIPE_BLEND_REVERSE_SUBTRACT: 287 return V_028780_COMB_DST_MINUS_SRC; 288 case PIPE_BLEND_MIN: 289 return V_028780_COMB_MIN_DST_SRC; 290 case PIPE_BLEND_MAX: 291 return V_028780_COMB_MAX_DST_SRC; 292 default: 293 PRINT_ERR("Unknown blend function %d\n", blend_func); 294 assert(0); 295 break; 296 } 297 return 0; 298} 299 300static uint32_t si_translate_blend_factor(enum amd_gfx_level gfx_level, int blend_fact) 301{ 302 switch (blend_fact) { 303 case PIPE_BLENDFACTOR_ONE: 304 return V_028780_BLEND_ONE; 305 case PIPE_BLENDFACTOR_SRC_COLOR: 306 return V_028780_BLEND_SRC_COLOR; 307 case PIPE_BLENDFACTOR_SRC_ALPHA: 308 return V_028780_BLEND_SRC_ALPHA; 309 case PIPE_BLENDFACTOR_DST_ALPHA: 310 return V_028780_BLEND_DST_ALPHA; 311 case PIPE_BLENDFACTOR_DST_COLOR: 312 return V_028780_BLEND_DST_COLOR; 313 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 314 return V_028780_BLEND_SRC_ALPHA_SATURATE; 315 case PIPE_BLENDFACTOR_CONST_COLOR: 316 return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_COLOR_GFX11: 317 V_028780_BLEND_CONSTANT_COLOR_GFX6; 318 case PIPE_BLENDFACTOR_CONST_ALPHA: 319 return gfx_level >= GFX11 ? V_028780_BLEND_CONSTANT_ALPHA_GFX11 : 320 V_028780_BLEND_CONSTANT_ALPHA_GFX6; 321 case PIPE_BLENDFACTOR_ZERO: 322 return V_028780_BLEND_ZERO; 323 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 324 return V_028780_BLEND_ONE_MINUS_SRC_COLOR; 325 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 326 return V_028780_BLEND_ONE_MINUS_SRC_ALPHA; 327 case PIPE_BLENDFACTOR_INV_DST_ALPHA: 328 return V_028780_BLEND_ONE_MINUS_DST_ALPHA; 329 case PIPE_BLENDFACTOR_INV_DST_COLOR: 330 return V_028780_BLEND_ONE_MINUS_DST_COLOR; 331 case PIPE_BLENDFACTOR_INV_CONST_COLOR: 332 return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX11: 333 V_028780_BLEND_ONE_MINUS_CONSTANT_COLOR_GFX6; 334 case PIPE_BLENDFACTOR_INV_CONST_ALPHA: 335 return gfx_level >= GFX11 ? V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX11: 336 V_028780_BLEND_ONE_MINUS_CONSTANT_ALPHA_GFX6; 337 case PIPE_BLENDFACTOR_SRC1_COLOR: 338 return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_COLOR_GFX11: 339 V_028780_BLEND_SRC1_COLOR_GFX6; 340 case PIPE_BLENDFACTOR_SRC1_ALPHA: 341 return gfx_level >= GFX11 ? V_028780_BLEND_SRC1_ALPHA_GFX11: 342 V_028780_BLEND_SRC1_ALPHA_GFX6; 343 case PIPE_BLENDFACTOR_INV_SRC1_COLOR: 344 return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_COLOR_GFX11: 345 V_028780_BLEND_INV_SRC1_COLOR_GFX6; 346 case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: 347 return gfx_level >= GFX11 ? V_028780_BLEND_INV_SRC1_ALPHA_GFX11: 348 V_028780_BLEND_INV_SRC1_ALPHA_GFX6; 349 default: 350 PRINT_ERR("Bad blend factor %d not supported!\n", blend_fact); 351 assert(0); 352 break; 353 } 354 return 0; 355} 356 357static uint32_t si_translate_blend_opt_function(int blend_func) 358{ 359 switch (blend_func) { 360 case PIPE_BLEND_ADD: 361 return V_028760_OPT_COMB_ADD; 362 case PIPE_BLEND_SUBTRACT: 363 return V_028760_OPT_COMB_SUBTRACT; 364 case PIPE_BLEND_REVERSE_SUBTRACT: 365 return V_028760_OPT_COMB_REVSUBTRACT; 366 case PIPE_BLEND_MIN: 367 return V_028760_OPT_COMB_MIN; 368 case PIPE_BLEND_MAX: 369 return V_028760_OPT_COMB_MAX; 370 default: 371 return V_028760_OPT_COMB_BLEND_DISABLED; 372 } 373} 374 375static uint32_t si_translate_blend_opt_factor(int blend_fact, bool is_alpha) 376{ 377 switch (blend_fact) { 378 case PIPE_BLENDFACTOR_ZERO: 379 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_ALL; 380 case PIPE_BLENDFACTOR_ONE: 381 return V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE; 382 case PIPE_BLENDFACTOR_SRC_COLOR: 383 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0 384 : V_028760_BLEND_OPT_PRESERVE_C1_IGNORE_C0; 385 case PIPE_BLENDFACTOR_INV_SRC_COLOR: 386 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1 387 : V_028760_BLEND_OPT_PRESERVE_C0_IGNORE_C1; 388 case PIPE_BLENDFACTOR_SRC_ALPHA: 389 return V_028760_BLEND_OPT_PRESERVE_A1_IGNORE_A0; 390 case PIPE_BLENDFACTOR_INV_SRC_ALPHA: 391 return V_028760_BLEND_OPT_PRESERVE_A0_IGNORE_A1; 392 case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: 393 return is_alpha ? V_028760_BLEND_OPT_PRESERVE_ALL_IGNORE_NONE 394 : V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 395 default: 396 return V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 397 } 398} 399 400static void si_blend_check_commutativity(struct si_screen *sscreen, struct si_state_blend *blend, 401 enum pipe_blend_func func, enum pipe_blendfactor src, 402 enum pipe_blendfactor dst, unsigned chanmask) 403{ 404 /* Src factor is allowed when it does not depend on Dst */ 405 static const uint32_t src_allowed = 406 (1u << PIPE_BLENDFACTOR_ONE) | (1u << PIPE_BLENDFACTOR_SRC_COLOR) | 407 (1u << PIPE_BLENDFACTOR_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE) | 408 (1u << PIPE_BLENDFACTOR_CONST_COLOR) | (1u << PIPE_BLENDFACTOR_CONST_ALPHA) | 409 (1u << PIPE_BLENDFACTOR_SRC1_COLOR) | (1u << PIPE_BLENDFACTOR_SRC1_ALPHA) | 410 (1u << PIPE_BLENDFACTOR_ZERO) | (1u << PIPE_BLENDFACTOR_INV_SRC_COLOR) | 411 (1u << PIPE_BLENDFACTOR_INV_SRC_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_CONST_COLOR) | 412 (1u << PIPE_BLENDFACTOR_INV_CONST_ALPHA) | (1u << PIPE_BLENDFACTOR_INV_SRC1_COLOR) | 413 (1u << PIPE_BLENDFACTOR_INV_SRC1_ALPHA); 414 415 if (dst == PIPE_BLENDFACTOR_ONE && (src_allowed & (1u << src)) && 416 (func == PIPE_BLEND_MAX || func == PIPE_BLEND_MIN)) 417 blend->commutative_4bit |= chanmask; 418} 419 420/** 421 * Get rid of DST in the blend factors by commuting the operands: 422 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 423 */ 424static void si_blend_remove_dst(unsigned *func, unsigned *src_factor, unsigned *dst_factor, 425 unsigned expected_dst, unsigned replacement_src) 426{ 427 if (*src_factor == expected_dst && *dst_factor == PIPE_BLENDFACTOR_ZERO) { 428 *src_factor = PIPE_BLENDFACTOR_ZERO; 429 *dst_factor = replacement_src; 430 431 /* Commuting the operands requires reversing subtractions. */ 432 if (*func == PIPE_BLEND_SUBTRACT) 433 *func = PIPE_BLEND_REVERSE_SUBTRACT; 434 else if (*func == PIPE_BLEND_REVERSE_SUBTRACT) 435 *func = PIPE_BLEND_SUBTRACT; 436 } 437} 438 439static void *si_create_blend_state_mode(struct pipe_context *ctx, 440 const struct pipe_blend_state *state, unsigned mode) 441{ 442 struct si_context *sctx = (struct si_context *)ctx; 443 struct si_state_blend *blend = CALLOC_STRUCT(si_state_blend); 444 struct si_pm4_state *pm4 = &blend->pm4; 445 uint32_t sx_mrt_blend_opt[8] = {0}; 446 uint32_t color_control = 0; 447 bool logicop_enable = state->logicop_enable && state->logicop_func != PIPE_LOGICOP_COPY; 448 449 if (!blend) 450 return NULL; 451 452 blend->alpha_to_coverage = state->alpha_to_coverage; 453 blend->alpha_to_one = state->alpha_to_one; 454 blend->dual_src_blend = util_blend_state_is_dual(state, 0); 455 blend->logicop_enable = logicop_enable; 456 blend->allows_noop_optimization = 457 state->rt[0].rgb_func == PIPE_BLEND_ADD && 458 state->rt[0].alpha_func == PIPE_BLEND_ADD && 459 state->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_DST_COLOR && 460 state->rt[0].alpha_src_factor == PIPE_BLENDFACTOR_DST_COLOR && 461 state->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ZERO && 462 state->rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_ZERO && 463 mode == V_028808_CB_NORMAL; 464 465 unsigned num_shader_outputs = state->max_rt + 1; /* estimate */ 466 if (blend->dual_src_blend) 467 num_shader_outputs = MAX2(num_shader_outputs, 2); 468 469 if (logicop_enable) { 470 color_control |= S_028808_ROP3(state->logicop_func | (state->logicop_func << 4)); 471 } else { 472 color_control |= S_028808_ROP3(0xcc); 473 } 474 475 if (state->alpha_to_coverage && state->alpha_to_coverage_dither) { 476 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 477 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 478 S_028B70_ALPHA_TO_MASK_OFFSET0(3) | S_028B70_ALPHA_TO_MASK_OFFSET1(1) | 479 S_028B70_ALPHA_TO_MASK_OFFSET2(0) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 480 S_028B70_OFFSET_ROUND(1)); 481 } else { 482 si_pm4_set_reg(pm4, R_028B70_DB_ALPHA_TO_MASK, 483 S_028B70_ALPHA_TO_MASK_ENABLE(state->alpha_to_coverage) | 484 S_028B70_ALPHA_TO_MASK_OFFSET0(2) | S_028B70_ALPHA_TO_MASK_OFFSET1(2) | 485 S_028B70_ALPHA_TO_MASK_OFFSET2(2) | S_028B70_ALPHA_TO_MASK_OFFSET3(2) | 486 S_028B70_OFFSET_ROUND(0)); 487 } 488 489 if (state->alpha_to_coverage) 490 blend->need_src_alpha_4bit |= 0xf; 491 492 blend->cb_target_mask = 0; 493 blend->cb_target_enabled_4bit = 0; 494 495 unsigned last_blend_cntl; 496 497 for (int i = 0; i < num_shader_outputs; i++) { 498 /* state->rt entries > 0 only written if independent blending */ 499 const int j = state->independent_blend_enable ? i : 0; 500 501 unsigned eqRGB = state->rt[j].rgb_func; 502 unsigned srcRGB = state->rt[j].rgb_src_factor; 503 unsigned dstRGB = state->rt[j].rgb_dst_factor; 504 unsigned eqA = state->rt[j].alpha_func; 505 unsigned srcA = state->rt[j].alpha_src_factor; 506 unsigned dstA = state->rt[j].alpha_dst_factor; 507 508 unsigned srcRGB_opt, dstRGB_opt, srcA_opt, dstA_opt; 509 unsigned blend_cntl = 0; 510 511 sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED) | 512 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_BLEND_DISABLED); 513 514 /* Only set dual source blending for MRT0 to avoid a hang. */ 515 if (i >= 1 && blend->dual_src_blend) { 516 if (i == 1) { 517 if (sctx->gfx_level >= GFX11) 518 blend_cntl = last_blend_cntl; 519 else 520 blend_cntl = S_028780_ENABLE(1); 521 } 522 523 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 524 continue; 525 } 526 527 /* Only addition and subtraction equations are supported with 528 * dual source blending. 529 */ 530 if (blend->dual_src_blend && (eqRGB == PIPE_BLEND_MIN || eqRGB == PIPE_BLEND_MAX || 531 eqA == PIPE_BLEND_MIN || eqA == PIPE_BLEND_MAX)) { 532 assert(!"Unsupported equation for dual source blending"); 533 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 534 continue; 535 } 536 537 /* cb_render_state will disable unused ones */ 538 blend->cb_target_mask |= (unsigned)state->rt[j].colormask << (4 * i); 539 if (state->rt[j].colormask) 540 blend->cb_target_enabled_4bit |= 0xf << (4 * i); 541 542 if (!state->rt[j].colormask || !state->rt[j].blend_enable) { 543 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 544 continue; 545 } 546 547 si_blend_check_commutativity(sctx->screen, blend, eqRGB, srcRGB, dstRGB, 0x7 << (4 * i)); 548 si_blend_check_commutativity(sctx->screen, blend, eqA, srcA, dstA, 0x8 << (4 * i)); 549 550 /* Blending optimizations for RB+. 551 * These transformations don't change the behavior. 552 * 553 * First, get rid of DST in the blend factors: 554 * func(src * DST, dst * 0) ---> func(src * 0, dst * SRC) 555 */ 556 si_blend_remove_dst(&eqRGB, &srcRGB, &dstRGB, PIPE_BLENDFACTOR_DST_COLOR, 557 PIPE_BLENDFACTOR_SRC_COLOR); 558 si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_COLOR, 559 PIPE_BLENDFACTOR_SRC_COLOR); 560 si_blend_remove_dst(&eqA, &srcA, &dstA, PIPE_BLENDFACTOR_DST_ALPHA, 561 PIPE_BLENDFACTOR_SRC_ALPHA); 562 563 /* Look up the ideal settings from tables. */ 564 srcRGB_opt = si_translate_blend_opt_factor(srcRGB, false); 565 dstRGB_opt = si_translate_blend_opt_factor(dstRGB, false); 566 srcA_opt = si_translate_blend_opt_factor(srcA, true); 567 dstA_opt = si_translate_blend_opt_factor(dstA, true); 568 569 /* Handle interdependencies. */ 570 if (util_blend_factor_uses_dest(srcRGB, false)) 571 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 572 if (util_blend_factor_uses_dest(srcA, false)) 573 dstA_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_NONE; 574 575 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE && 576 (dstRGB == PIPE_BLENDFACTOR_ZERO || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 577 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE)) 578 dstRGB_opt = V_028760_BLEND_OPT_PRESERVE_NONE_IGNORE_A0; 579 580 /* Set the final value. */ 581 sx_mrt_blend_opt[i] = S_028760_COLOR_SRC_OPT(srcRGB_opt) | 582 S_028760_COLOR_DST_OPT(dstRGB_opt) | 583 S_028760_COLOR_COMB_FCN(si_translate_blend_opt_function(eqRGB)) | 584 S_028760_ALPHA_SRC_OPT(srcA_opt) | S_028760_ALPHA_DST_OPT(dstA_opt) | 585 S_028760_ALPHA_COMB_FCN(si_translate_blend_opt_function(eqA)); 586 587 /* Set blend state. */ 588 blend_cntl |= S_028780_ENABLE(1); 589 blend_cntl |= S_028780_COLOR_COMB_FCN(si_translate_blend_function(eqRGB)); 590 blend_cntl |= S_028780_COLOR_SRCBLEND(si_translate_blend_factor(sctx->gfx_level, srcRGB)); 591 blend_cntl |= S_028780_COLOR_DESTBLEND(si_translate_blend_factor(sctx->gfx_level, dstRGB)); 592 593 if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB) { 594 blend_cntl |= S_028780_SEPARATE_ALPHA_BLEND(1); 595 blend_cntl |= S_028780_ALPHA_COMB_FCN(si_translate_blend_function(eqA)); 596 blend_cntl |= S_028780_ALPHA_SRCBLEND(si_translate_blend_factor(sctx->gfx_level, srcA)); 597 blend_cntl |= S_028780_ALPHA_DESTBLEND(si_translate_blend_factor(sctx->gfx_level, dstA)); 598 } 599 si_pm4_set_reg(pm4, R_028780_CB_BLEND0_CONTROL + i * 4, blend_cntl); 600 last_blend_cntl = blend_cntl; 601 602 blend->blend_enable_4bit |= 0xfu << (i * 4); 603 604 if (sctx->gfx_level >= GFX8 && sctx->gfx_level <= GFX10) 605 blend->dcc_msaa_corruption_4bit |= 0xfu << (i * 4); 606 607 /* This is only important for formats without alpha. */ 608 if (srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA || 609 srcRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 610 dstRGB == PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE || 611 srcRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA || dstRGB == PIPE_BLENDFACTOR_INV_SRC_ALPHA) 612 blend->need_src_alpha_4bit |= 0xfu << (i * 4); 613 } 614 615 if (sctx->gfx_level >= GFX8 && sctx->gfx_level <= GFX10 && logicop_enable) 616 blend->dcc_msaa_corruption_4bit |= blend->cb_target_enabled_4bit; 617 618 if (blend->cb_target_mask) { 619 color_control |= S_028808_MODE(mode); 620 } else { 621 color_control |= S_028808_MODE(V_028808_CB_DISABLE); 622 } 623 624 if (sctx->screen->info.rbplus_allowed) { 625 /* Disable RB+ blend optimizations for dual source blending. 626 * Vulkan does this. 627 */ 628 if (blend->dual_src_blend) { 629 for (int i = 0; i < num_shader_outputs; i++) { 630 sx_mrt_blend_opt[i] = S_028760_COLOR_COMB_FCN(V_028760_OPT_COMB_NONE) | 631 S_028760_ALPHA_COMB_FCN(V_028760_OPT_COMB_NONE); 632 } 633 } 634 635 for (int i = 0; i < num_shader_outputs; i++) 636 si_pm4_set_reg(pm4, R_028760_SX_MRT0_BLEND_OPT + i * 4, sx_mrt_blend_opt[i]); 637 638 /* RB+ doesn't work with dual source blending, logic op, and RESOLVE. */ 639 if (blend->dual_src_blend || logicop_enable || mode == V_028808_CB_RESOLVE) 640 color_control |= S_028808_DISABLE_DUAL_QUAD(1); 641 } 642 643 si_pm4_set_reg(pm4, R_028808_CB_COLOR_CONTROL, color_control); 644 return blend; 645} 646 647static void *si_create_blend_state(struct pipe_context *ctx, const struct pipe_blend_state *state) 648{ 649 return si_create_blend_state_mode(ctx, state, V_028808_CB_NORMAL); 650} 651 652static bool si_check_blend_dst_sampler_noop(struct si_context *sctx) 653{ 654 if (sctx->framebuffer.state.nr_cbufs == 1) { 655 struct si_shader_selector *sel = sctx->shader.ps.cso; 656 657 if (unlikely(sel->info.writes_1_if_tex_is_1 == 0xff)) { 658 /* Wait for the shader to be ready. */ 659 util_queue_fence_wait(&sel->ready); 660 assert(sel->nir_binary); 661 662 struct nir_shader *nir = si_deserialize_shader(sel); 663 664 /* Determine if this fragment shader always writes vec4(1) if a specific texture 665 * is all 1s. 666 */ 667 float in[4] = { 1.0, 1.0, 1.0, 1.0 }; 668 float out[4]; 669 int texunit; 670 if (si_nir_is_output_const_if_tex_is_const(nir, in, out, &texunit) && 671 !memcmp(in, out, 4 * sizeof(float))) { 672 sel->info.writes_1_if_tex_is_1 = 1 + texunit; 673 } else { 674 sel->info.writes_1_if_tex_is_1 = 0; 675 } 676 677 ralloc_free(nir); 678 } 679 680 if (sel->info.writes_1_if_tex_is_1 && 681 sel->info.writes_1_if_tex_is_1 != 0xff) { 682 /* Now check if the texture is cleared to 1 */ 683 int unit = sctx->shader.ps.cso->info.writes_1_if_tex_is_1 - 1; 684 struct si_samplers *samp = &sctx->samplers[PIPE_SHADER_FRAGMENT]; 685 if ((1u << unit) & samp->enabled_mask) { 686 struct si_texture* tex = (struct si_texture*) samp->views[unit]->texture; 687 if (tex->is_depth && 688 tex->depth_cleared_level_mask & BITFIELD_BIT(samp->views[unit]->u.tex.first_level) && 689 tex->depth_clear_value[0] == 1) { 690 return false; 691 } 692 /* TODO: handle color textures */ 693 } 694 } 695 } 696 697 return true; 698} 699 700static void si_draw_blend_dst_sampler_noop(struct pipe_context *ctx, 701 const struct pipe_draw_info *info, 702 unsigned drawid_offset, 703 const struct pipe_draw_indirect_info *indirect, 704 const struct pipe_draw_start_count_bias *draws, 705 unsigned num_draws) { 706 struct si_context *sctx = (struct si_context *)ctx; 707 708 if (!si_check_blend_dst_sampler_noop(sctx)) 709 return; 710 711 sctx->real_draw_vbo(ctx, info, drawid_offset, indirect, draws, num_draws); 712} 713 714static void si_draw_vstate_blend_dst_sampler_noop(struct pipe_context *ctx, 715 struct pipe_vertex_state *state, 716 uint32_t partial_velem_mask, 717 struct pipe_draw_vertex_state_info info, 718 const struct pipe_draw_start_count_bias *draws, 719 unsigned num_draws) { 720 struct si_context *sctx = (struct si_context *)ctx; 721 722 if (!si_check_blend_dst_sampler_noop(sctx)) 723 return; 724 725 sctx->real_draw_vertex_state(ctx, state, partial_velem_mask, info, draws, num_draws); 726} 727 728static void si_bind_blend_state(struct pipe_context *ctx, void *state) 729{ 730 struct si_context *sctx = (struct si_context *)ctx; 731 struct si_state_blend *old_blend = sctx->queued.named.blend; 732 struct si_state_blend *blend = (struct si_state_blend *)state; 733 734 if (!blend) 735 blend = (struct si_state_blend *)sctx->noop_blend; 736 737 si_pm4_bind_state(sctx, blend, blend); 738 739 if (old_blend->cb_target_mask != blend->cb_target_mask || 740 old_blend->dual_src_blend != blend->dual_src_blend || 741 (old_blend->dcc_msaa_corruption_4bit != blend->dcc_msaa_corruption_4bit && 742 sctx->framebuffer.has_dcc_msaa)) 743 si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 744 745 if (sctx->screen->info.has_export_conflict_bug && 746 old_blend->blend_enable_4bit != blend->blend_enable_4bit) 747 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 748 749 if (old_blend->cb_target_mask != blend->cb_target_mask || 750 old_blend->alpha_to_coverage != blend->alpha_to_coverage || 751 old_blend->alpha_to_one != blend->alpha_to_one || 752 old_blend->dual_src_blend != blend->dual_src_blend || 753 old_blend->blend_enable_4bit != blend->blend_enable_4bit || 754 old_blend->need_src_alpha_4bit != blend->need_src_alpha_4bit) { 755 si_ps_key_update_framebuffer_blend(sctx); 756 si_ps_key_update_blend_rasterizer(sctx); 757 si_update_ps_inputs_read_or_disabled(sctx); 758 sctx->do_update_shaders = true; 759 } 760 761 if (sctx->screen->dpbb_allowed && 762 (old_blend->alpha_to_coverage != blend->alpha_to_coverage || 763 old_blend->blend_enable_4bit != blend->blend_enable_4bit || 764 old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit)) 765 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 766 767 if (sctx->screen->has_out_of_order_rast && 768 ((old_blend->blend_enable_4bit != blend->blend_enable_4bit || 769 old_blend->cb_target_enabled_4bit != blend->cb_target_enabled_4bit || 770 old_blend->commutative_4bit != blend->commutative_4bit || 771 old_blend->logicop_enable != blend->logicop_enable))) 772 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 773 774 if (likely(!radeon_uses_secure_bos(sctx->ws))) { 775 if (unlikely(blend->allows_noop_optimization)) { 776 si_install_draw_wrapper(sctx, si_draw_blend_dst_sampler_noop, 777 si_draw_vstate_blend_dst_sampler_noop); 778 } else { 779 si_install_draw_wrapper(sctx, NULL, NULL); 780 } 781 } 782} 783 784static void si_delete_blend_state(struct pipe_context *ctx, void *state) 785{ 786 struct si_context *sctx = (struct si_context *)ctx; 787 788 if (sctx->queued.named.blend == state) 789 si_bind_blend_state(ctx, sctx->noop_blend); 790 791 si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(blend)); 792} 793 794static void si_set_blend_color(struct pipe_context *ctx, const struct pipe_blend_color *state) 795{ 796 struct si_context *sctx = (struct si_context *)ctx; 797 static const struct pipe_blend_color zeros; 798 799 sctx->blend_color = *state; 800 sctx->blend_color_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 801 si_mark_atom_dirty(sctx, &sctx->atoms.s.blend_color); 802} 803 804static void si_emit_blend_color(struct si_context *sctx) 805{ 806 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 807 808 radeon_begin(cs); 809 radeon_set_context_reg_seq(R_028414_CB_BLEND_RED, 4); 810 radeon_emit_array((uint32_t *)sctx->blend_color.color, 4); 811 radeon_end(); 812} 813 814/* 815 * Clipping 816 */ 817 818static void si_set_clip_state(struct pipe_context *ctx, const struct pipe_clip_state *state) 819{ 820 struct si_context *sctx = (struct si_context *)ctx; 821 struct pipe_constant_buffer cb; 822 static const struct pipe_clip_state zeros; 823 824 if (memcmp(&sctx->clip_state, state, sizeof(*state)) == 0) 825 return; 826 827 sctx->clip_state = *state; 828 sctx->clip_state_any_nonzeros = memcmp(state, &zeros, sizeof(*state)) != 0; 829 si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_state); 830 831 cb.buffer = NULL; 832 cb.user_buffer = state->ucp; 833 cb.buffer_offset = 0; 834 cb.buffer_size = 4 * 4 * 8; 835 si_set_internal_const_buffer(sctx, SI_VS_CONST_CLIP_PLANES, &cb); 836} 837 838static void si_emit_clip_state(struct si_context *sctx) 839{ 840 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 841 842 radeon_begin(cs); 843 radeon_set_context_reg_seq(R_0285BC_PA_CL_UCP_0_X, 6 * 4); 844 radeon_emit_array((uint32_t *)sctx->clip_state.ucp, 6 * 4); 845 radeon_end(); 846} 847 848static void si_emit_clip_regs(struct si_context *sctx) 849{ 850 struct si_shader *vs = si_get_vs(sctx)->current; 851 struct si_shader_selector *vs_sel = vs->selector; 852 struct si_shader_info *info = &vs_sel->info; 853 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 854 bool window_space = vs_sel->stage == MESA_SHADER_VERTEX ? 855 info->base.vs.window_space_position : 0; 856 unsigned clipdist_mask = vs_sel->info.clipdist_mask; 857 unsigned ucp_mask = clipdist_mask ? 0 : rs->clip_plane_enable & SI_USER_CLIP_PLANE_MASK; 858 unsigned culldist_mask = vs_sel->info.culldist_mask; 859 860 /* Clip distances on points have no effect, so need to be implemented 861 * as cull distances. This applies for the clipvertex case as well. 862 * 863 * Setting this for primitives other than points should have no adverse 864 * effects. 865 */ 866 clipdist_mask &= rs->clip_plane_enable; 867 culldist_mask |= clipdist_mask; 868 869 unsigned pa_cl_cntl = S_02881C_BYPASS_VTX_RATE_COMBINER(sctx->gfx_level >= GFX10_3 && 870 !sctx->screen->options.vrs2x2) | 871 S_02881C_BYPASS_PRIM_RATE_COMBINER(sctx->gfx_level >= GFX10_3) | 872 clipdist_mask | (culldist_mask << 8); 873 874 radeon_begin(&sctx->gfx_cs); 875 radeon_opt_set_context_reg(sctx, R_02881C_PA_CL_VS_OUT_CNTL, SI_TRACKED_PA_CL_VS_OUT_CNTL, 876 pa_cl_cntl | vs->pa_cl_vs_out_cntl); 877 radeon_opt_set_context_reg(sctx, R_028810_PA_CL_CLIP_CNTL, SI_TRACKED_PA_CL_CLIP_CNTL, 878 rs->pa_cl_clip_cntl | ucp_mask | S_028810_CLIP_DISABLE(window_space)); 879 radeon_end_update_context_roll(sctx); 880} 881 882/* 883 * inferred state between framebuffer and rasterizer 884 */ 885static void si_update_poly_offset_state(struct si_context *sctx) 886{ 887 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 888 889 if (!rs->uses_poly_offset || !sctx->framebuffer.state.zsbuf) { 890 si_pm4_bind_state(sctx, poly_offset, NULL); 891 return; 892 } 893 894 /* Use the user format, not db_render_format, so that the polygon 895 * offset behaves as expected by applications. 896 */ 897 switch (sctx->framebuffer.state.zsbuf->texture->format) { 898 case PIPE_FORMAT_Z16_UNORM: 899 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[0]); 900 break; 901 default: /* 24-bit */ 902 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[1]); 903 break; 904 case PIPE_FORMAT_Z32_FLOAT: 905 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 906 si_pm4_bind_state(sctx, poly_offset, &rs->pm4_poly_offset[2]); 907 break; 908 } 909} 910 911/* 912 * Rasterizer 913 */ 914 915static uint32_t si_translate_fill(uint32_t func) 916{ 917 switch (func) { 918 case PIPE_POLYGON_MODE_FILL: 919 return V_028814_X_DRAW_TRIANGLES; 920 case PIPE_POLYGON_MODE_LINE: 921 return V_028814_X_DRAW_LINES; 922 case PIPE_POLYGON_MODE_POINT: 923 return V_028814_X_DRAW_POINTS; 924 default: 925 assert(0); 926 return V_028814_X_DRAW_POINTS; 927 } 928} 929 930static void *si_create_rs_state(struct pipe_context *ctx, const struct pipe_rasterizer_state *state) 931{ 932 struct si_screen *sscreen = ((struct si_context *)ctx)->screen; 933 struct si_state_rasterizer *rs = CALLOC_STRUCT(si_state_rasterizer); 934 struct si_pm4_state *pm4 = &rs->pm4; 935 unsigned tmp, i; 936 float psize_min, psize_max; 937 938 if (!rs) { 939 return NULL; 940 } 941 942 rs->scissor_enable = state->scissor; 943 rs->clip_halfz = state->clip_halfz; 944 rs->two_side = state->light_twoside; 945 rs->multisample_enable = state->multisample; 946 rs->force_persample_interp = state->force_persample_interp; 947 rs->clip_plane_enable = state->clip_plane_enable; 948 rs->half_pixel_center = state->half_pixel_center; 949 rs->line_stipple_enable = state->line_stipple_enable; 950 rs->poly_stipple_enable = state->poly_stipple_enable; 951 rs->line_smooth = state->line_smooth; 952 rs->line_width = state->line_width; 953 rs->poly_smooth = state->poly_smooth; 954 rs->point_smooth = state->point_smooth; 955 rs->uses_poly_offset = state->offset_point || state->offset_line || state->offset_tri; 956 rs->clamp_fragment_color = state->clamp_fragment_color; 957 rs->clamp_vertex_color = state->clamp_vertex_color; 958 rs->flatshade = state->flatshade; 959 rs->flatshade_first = state->flatshade_first; 960 rs->sprite_coord_enable = state->sprite_coord_enable; 961 rs->rasterizer_discard = state->rasterizer_discard; 962 rs->polygon_mode_is_lines = 963 (state->fill_front == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_FRONT)) || 964 (state->fill_back == PIPE_POLYGON_MODE_LINE && !(state->cull_face & PIPE_FACE_BACK)); 965 rs->polygon_mode_is_points = 966 (state->fill_front == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_FRONT)) || 967 (state->fill_back == PIPE_POLYGON_MODE_POINT && !(state->cull_face & PIPE_FACE_BACK)); 968 rs->pa_sc_line_stipple = state->line_stipple_enable 969 ? S_028A0C_LINE_PATTERN(state->line_stipple_pattern) | 970 S_028A0C_REPEAT_COUNT(state->line_stipple_factor) 971 : 0; 972 /* TODO: implement line stippling with perpendicular end caps. */ 973 /* Line width > 2 is an internal recommendation. */ 974 rs->perpendicular_end_caps = state->multisample && 975 state->line_width > 2 && !state->line_stipple_enable; 976 977 rs->pa_cl_clip_cntl = S_028810_DX_CLIP_SPACE_DEF(state->clip_halfz) | 978 S_028810_ZCLIP_NEAR_DISABLE(!state->depth_clip_near) | 979 S_028810_ZCLIP_FAR_DISABLE(!state->depth_clip_far) | 980 S_028810_DX_RASTERIZATION_KILL(state->rasterizer_discard) | 981 S_028810_DX_LINEAR_ATTR_CLIP_ENA(1); 982 983 rs->ngg_cull_flags_tris = SI_NGG_CULL_TRIANGLES | 984 SI_NGG_CULL_CLIP_PLANE_ENABLE(state->clip_plane_enable); 985 rs->ngg_cull_flags_tris_y_inverted = rs->ngg_cull_flags_tris; 986 987 rs->ngg_cull_flags_lines = SI_NGG_CULL_LINES | 988 (!rs->perpendicular_end_caps ? SI_NGG_CULL_SMALL_LINES_DIAMOND_EXIT : 0) | 989 SI_NGG_CULL_CLIP_PLANE_ENABLE(state->clip_plane_enable); 990 991 if (rs->rasterizer_discard) { 992 rs->ngg_cull_flags_tris |= SI_NGG_CULL_FRONT_FACE | 993 SI_NGG_CULL_BACK_FACE; 994 rs->ngg_cull_flags_tris_y_inverted = rs->ngg_cull_flags_tris; 995 } else { 996 bool cull_front, cull_back; 997 998 if (!state->front_ccw) { 999 cull_front = !!(state->cull_face & PIPE_FACE_FRONT); 1000 cull_back = !!(state->cull_face & PIPE_FACE_BACK); 1001 } else { 1002 cull_back = !!(state->cull_face & PIPE_FACE_FRONT); 1003 cull_front = !!(state->cull_face & PIPE_FACE_BACK); 1004 } 1005 1006 if (cull_front) { 1007 rs->ngg_cull_flags_tris |= SI_NGG_CULL_FRONT_FACE; 1008 rs->ngg_cull_flags_tris_y_inverted |= SI_NGG_CULL_BACK_FACE; 1009 } 1010 1011 if (cull_back) { 1012 rs->ngg_cull_flags_tris |= SI_NGG_CULL_BACK_FACE; 1013 rs->ngg_cull_flags_tris_y_inverted |= SI_NGG_CULL_FRONT_FACE; 1014 } 1015 } 1016 1017 si_pm4_set_reg( 1018 pm4, R_0286D4_SPI_INTERP_CONTROL_0, 1019 S_0286D4_FLAT_SHADE_ENA(1) | S_0286D4_PNT_SPRITE_ENA(state->point_quad_rasterization) | 1020 S_0286D4_PNT_SPRITE_OVRD_X(V_0286D4_SPI_PNT_SPRITE_SEL_S) | 1021 S_0286D4_PNT_SPRITE_OVRD_Y(V_0286D4_SPI_PNT_SPRITE_SEL_T) | 1022 S_0286D4_PNT_SPRITE_OVRD_Z(V_0286D4_SPI_PNT_SPRITE_SEL_0) | 1023 S_0286D4_PNT_SPRITE_OVRD_W(V_0286D4_SPI_PNT_SPRITE_SEL_1) | 1024 S_0286D4_PNT_SPRITE_TOP_1(state->sprite_coord_mode != PIPE_SPRITE_COORD_UPPER_LEFT)); 1025 1026 /* point size 12.4 fixed point */ 1027 tmp = (unsigned)(state->point_size * 8.0); 1028 si_pm4_set_reg(pm4, R_028A00_PA_SU_POINT_SIZE, S_028A00_HEIGHT(tmp) | S_028A00_WIDTH(tmp)); 1029 1030 if (state->point_size_per_vertex) { 1031 psize_min = util_get_min_point_size(state); 1032 psize_max = SI_MAX_POINT_SIZE; 1033 } else { 1034 /* Force the point size to be as if the vertex output was disabled. */ 1035 psize_min = state->point_size; 1036 psize_max = state->point_size; 1037 } 1038 rs->max_point_size = psize_max; 1039 1040 /* Divide by two, because 0.5 = 1 pixel. */ 1041 si_pm4_set_reg(pm4, R_028A04_PA_SU_POINT_MINMAX, 1042 S_028A04_MIN_SIZE(si_pack_float_12p4(psize_min / 2)) | 1043 S_028A04_MAX_SIZE(si_pack_float_12p4(psize_max / 2))); 1044 1045 si_pm4_set_reg(pm4, R_028A08_PA_SU_LINE_CNTL, 1046 S_028A08_WIDTH(si_pack_float_12p4(state->line_width / 2))); 1047 si_pm4_set_reg( 1048 pm4, R_028A48_PA_SC_MODE_CNTL_0, 1049 S_028A48_LINE_STIPPLE_ENABLE(state->line_stipple_enable) | 1050 S_028A48_MSAA_ENABLE(state->multisample || state->poly_smooth || state->line_smooth) | 1051 S_028A48_VPORT_SCISSOR_ENABLE(1) | 1052 S_028A48_ALTERNATE_RBS_PER_TILE(sscreen->info.gfx_level >= GFX9)); 1053 1054 bool polygon_mode_enabled = 1055 (state->fill_front != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_FRONT)) || 1056 (state->fill_back != PIPE_POLYGON_MODE_FILL && !(state->cull_face & PIPE_FACE_BACK)); 1057 1058 si_pm4_set_reg(pm4, R_028814_PA_SU_SC_MODE_CNTL, 1059 S_028814_PROVOKING_VTX_LAST(!state->flatshade_first) | 1060 S_028814_CULL_FRONT((state->cull_face & PIPE_FACE_FRONT) ? 1 : 0) | 1061 S_028814_CULL_BACK((state->cull_face & PIPE_FACE_BACK) ? 1 : 0) | 1062 S_028814_FACE(!state->front_ccw) | 1063 S_028814_POLY_OFFSET_FRONT_ENABLE(util_get_offset(state, state->fill_front)) | 1064 S_028814_POLY_OFFSET_BACK_ENABLE(util_get_offset(state, state->fill_back)) | 1065 S_028814_POLY_OFFSET_PARA_ENABLE(state->offset_point || state->offset_line) | 1066 S_028814_POLY_MODE(polygon_mode_enabled) | 1067 S_028814_POLYMODE_FRONT_PTYPE(si_translate_fill(state->fill_front)) | 1068 S_028814_POLYMODE_BACK_PTYPE(si_translate_fill(state->fill_back)) | 1069 /* this must be set if POLY_MODE or PERPENDICULAR_ENDCAP_ENA is set */ 1070 S_028814_KEEP_TOGETHER_ENABLE(sscreen->info.gfx_level >= GFX10 ? 1071 polygon_mode_enabled || 1072 rs->perpendicular_end_caps : 0)); 1073 1074 if (state->bottom_edge_rule) { 1075 /* OpenGL windows should set this. */ 1076 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 1077 S_028230_ER_TRI(0xA) | 1078 S_028230_ER_POINT(0x5) | 1079 S_028230_ER_RECT(0x9) | 1080 S_028230_ER_LINE_LR(0x29) | 1081 S_028230_ER_LINE_RL(0x29) | 1082 S_028230_ER_LINE_TB(0xA) | 1083 S_028230_ER_LINE_BT(0xA)); 1084 } else { 1085 /* OpenGL FBOs and Direct3D should set this. */ 1086 si_pm4_set_reg(pm4, R_028230_PA_SC_EDGERULE, 1087 S_028230_ER_TRI(0xA) | 1088 S_028230_ER_POINT(0xA) | 1089 S_028230_ER_RECT(0xA) | 1090 S_028230_ER_LINE_LR(0x1A) | 1091 S_028230_ER_LINE_RL(0x26) | 1092 S_028230_ER_LINE_TB(0xA) | 1093 S_028230_ER_LINE_BT(0xA)); 1094 } 1095 1096 if (!rs->uses_poly_offset) 1097 return rs; 1098 1099 rs->pm4_poly_offset = CALLOC(3, sizeof(struct si_pm4_state)); 1100 if (!rs->pm4_poly_offset) { 1101 FREE(rs); 1102 return NULL; 1103 } 1104 1105 /* Precalculate polygon offset states for 16-bit, 24-bit, and 32-bit zbuffers. */ 1106 for (i = 0; i < 3; i++) { 1107 struct si_pm4_state *pm4 = &rs->pm4_poly_offset[i]; 1108 float offset_units = state->offset_units; 1109 float offset_scale = state->offset_scale * 16.0f; 1110 uint32_t pa_su_poly_offset_db_fmt_cntl = 0; 1111 1112 if (!state->offset_units_unscaled) { 1113 switch (i) { 1114 case 0: /* 16-bit zbuffer */ 1115 offset_units *= 4.0f; 1116 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 1117 break; 1118 case 1: /* 24-bit zbuffer */ 1119 offset_units *= 2.0f; 1120 pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 1121 break; 1122 case 2: /* 32-bit zbuffer */ 1123 offset_units *= 1.0f; 1124 pa_su_poly_offset_db_fmt_cntl = 1125 S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 1126 break; 1127 } 1128 } 1129 1130 si_pm4_set_reg(pm4, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL, pa_su_poly_offset_db_fmt_cntl); 1131 si_pm4_set_reg(pm4, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, fui(state->offset_clamp)); 1132 si_pm4_set_reg(pm4, R_028B80_PA_SU_POLY_OFFSET_FRONT_SCALE, fui(offset_scale)); 1133 si_pm4_set_reg(pm4, R_028B84_PA_SU_POLY_OFFSET_FRONT_OFFSET, fui(offset_units)); 1134 si_pm4_set_reg(pm4, R_028B88_PA_SU_POLY_OFFSET_BACK_SCALE, fui(offset_scale)); 1135 si_pm4_set_reg(pm4, R_028B8C_PA_SU_POLY_OFFSET_BACK_OFFSET, fui(offset_units)); 1136 } 1137 1138 return rs; 1139} 1140 1141static void si_bind_rs_state(struct pipe_context *ctx, void *state) 1142{ 1143 struct si_context *sctx = (struct si_context *)ctx; 1144 struct si_state_rasterizer *old_rs = (struct si_state_rasterizer *)sctx->queued.named.rasterizer; 1145 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1146 1147 if (!rs) 1148 rs = (struct si_state_rasterizer *)sctx->discard_rasterizer_state; 1149 1150 if (old_rs->multisample_enable != rs->multisample_enable) { 1151 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1152 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1153 1154 /* Update the small primitive filter workaround if necessary. */ 1155 if (sctx->screen->info.has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1) 1156 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 1157 1158 /* NGG cull state uses multisample_enable. */ 1159 if (sctx->screen->use_ngg_culling) 1160 si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); 1161 } 1162 1163 if (old_rs->perpendicular_end_caps != rs->perpendicular_end_caps) 1164 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1165 1166 if (sctx->screen->use_ngg_culling && 1167 (old_rs->half_pixel_center != rs->half_pixel_center || 1168 old_rs->line_width != rs->line_width)) 1169 si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); 1170 1171 SET_FIELD(sctx->current_vs_state, VS_STATE_CLAMP_VERTEX_COLOR, rs->clamp_vertex_color); 1172 1173 si_pm4_bind_state(sctx, rasterizer, rs); 1174 si_update_poly_offset_state(sctx); 1175 1176 if (old_rs->scissor_enable != rs->scissor_enable) 1177 si_mark_atom_dirty(sctx, &sctx->atoms.s.scissors); 1178 1179 if (old_rs->line_width != rs->line_width || old_rs->max_point_size != rs->max_point_size || 1180 old_rs->half_pixel_center != rs->half_pixel_center) 1181 si_mark_atom_dirty(sctx, &sctx->atoms.s.guardband); 1182 1183 if (old_rs->clip_halfz != rs->clip_halfz) 1184 si_mark_atom_dirty(sctx, &sctx->atoms.s.viewports); 1185 1186 if (old_rs->clip_plane_enable != rs->clip_plane_enable || 1187 old_rs->pa_cl_clip_cntl != rs->pa_cl_clip_cntl) 1188 si_mark_atom_dirty(sctx, &sctx->atoms.s.clip_regs); 1189 1190 if (old_rs->sprite_coord_enable != rs->sprite_coord_enable || 1191 old_rs->flatshade != rs->flatshade) 1192 si_mark_atom_dirty(sctx, &sctx->atoms.s.spi_map); 1193 1194 if (old_rs->clip_plane_enable != rs->clip_plane_enable || 1195 old_rs->rasterizer_discard != rs->rasterizer_discard || 1196 old_rs->sprite_coord_enable != rs->sprite_coord_enable || 1197 old_rs->flatshade != rs->flatshade || old_rs->two_side != rs->two_side || 1198 old_rs->multisample_enable != rs->multisample_enable || 1199 old_rs->poly_stipple_enable != rs->poly_stipple_enable || 1200 old_rs->poly_smooth != rs->poly_smooth || old_rs->line_smooth != rs->line_smooth || 1201 old_rs->point_smooth != rs->point_smooth || 1202 old_rs->clamp_fragment_color != rs->clamp_fragment_color || 1203 old_rs->force_persample_interp != rs->force_persample_interp || 1204 old_rs->polygon_mode_is_points != rs->polygon_mode_is_points) { 1205 si_ps_key_update_blend_rasterizer(sctx); 1206 si_ps_key_update_rasterizer(sctx); 1207 si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 1208 si_update_ps_inputs_read_or_disabled(sctx); 1209 sctx->do_update_shaders = true; 1210 } 1211 1212 if (old_rs->line_smooth != rs->line_smooth || 1213 old_rs->poly_smooth != rs->poly_smooth || 1214 old_rs->point_smooth != rs->point_smooth || 1215 old_rs->poly_stipple_enable != rs->poly_stipple_enable || 1216 old_rs->flatshade != rs->flatshade) 1217 si_update_vrs_flat_shading(sctx); 1218} 1219 1220static void si_delete_rs_state(struct pipe_context *ctx, void *state) 1221{ 1222 struct si_context *sctx = (struct si_context *)ctx; 1223 struct si_state_rasterizer *rs = (struct si_state_rasterizer *)state; 1224 1225 if (sctx->queued.named.rasterizer == state) 1226 si_bind_rs_state(ctx, sctx->discard_rasterizer_state); 1227 1228 FREE(rs->pm4_poly_offset); 1229 si_pm4_free_state(sctx, &rs->pm4, SI_STATE_IDX(rasterizer)); 1230} 1231 1232/* 1233 * inferred state between dsa and stencil ref 1234 */ 1235static void si_emit_stencil_ref(struct si_context *sctx) 1236{ 1237 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 1238 struct pipe_stencil_ref *ref = &sctx->stencil_ref.state; 1239 struct si_dsa_stencil_ref_part *dsa = &sctx->stencil_ref.dsa_part; 1240 1241 radeon_begin(cs); 1242 radeon_set_context_reg_seq(R_028430_DB_STENCILREFMASK, 2); 1243 radeon_emit(S_028430_STENCILTESTVAL(ref->ref_value[0]) | 1244 S_028430_STENCILMASK(dsa->valuemask[0]) | 1245 S_028430_STENCILWRITEMASK(dsa->writemask[0]) | 1246 S_028430_STENCILOPVAL(1)); 1247 radeon_emit(S_028434_STENCILTESTVAL_BF(ref->ref_value[1]) | 1248 S_028434_STENCILMASK_BF(dsa->valuemask[1]) | 1249 S_028434_STENCILWRITEMASK_BF(dsa->writemask[1]) | 1250 S_028434_STENCILOPVAL_BF(1)); 1251 radeon_end(); 1252} 1253 1254static void si_set_stencil_ref(struct pipe_context *ctx, const struct pipe_stencil_ref state) 1255{ 1256 struct si_context *sctx = (struct si_context *)ctx; 1257 1258 if (memcmp(&sctx->stencil_ref.state, &state, sizeof(state)) == 0) 1259 return; 1260 1261 sctx->stencil_ref.state = state; 1262 si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1263} 1264 1265/* 1266 * DSA 1267 */ 1268 1269static uint32_t si_translate_stencil_op(int s_op) 1270{ 1271 switch (s_op) { 1272 case PIPE_STENCIL_OP_KEEP: 1273 return V_02842C_STENCIL_KEEP; 1274 case PIPE_STENCIL_OP_ZERO: 1275 return V_02842C_STENCIL_ZERO; 1276 case PIPE_STENCIL_OP_REPLACE: 1277 return V_02842C_STENCIL_REPLACE_TEST; 1278 case PIPE_STENCIL_OP_INCR: 1279 return V_02842C_STENCIL_ADD_CLAMP; 1280 case PIPE_STENCIL_OP_DECR: 1281 return V_02842C_STENCIL_SUB_CLAMP; 1282 case PIPE_STENCIL_OP_INCR_WRAP: 1283 return V_02842C_STENCIL_ADD_WRAP; 1284 case PIPE_STENCIL_OP_DECR_WRAP: 1285 return V_02842C_STENCIL_SUB_WRAP; 1286 case PIPE_STENCIL_OP_INVERT: 1287 return V_02842C_STENCIL_INVERT; 1288 default: 1289 PRINT_ERR("Unknown stencil op %d", s_op); 1290 assert(0); 1291 break; 1292 } 1293 return 0; 1294} 1295 1296static bool si_order_invariant_stencil_op(enum pipe_stencil_op op) 1297{ 1298 /* REPLACE is normally order invariant, except when the stencil 1299 * reference value is written by the fragment shader. Tracking this 1300 * interaction does not seem worth the effort, so be conservative. */ 1301 return op != PIPE_STENCIL_OP_INCR && op != PIPE_STENCIL_OP_DECR && op != PIPE_STENCIL_OP_REPLACE; 1302} 1303 1304/* Compute whether, assuming Z writes are disabled, this stencil state is order 1305 * invariant in the sense that the set of passing fragments as well as the 1306 * final stencil buffer result does not depend on the order of fragments. */ 1307static bool si_order_invariant_stencil_state(const struct pipe_stencil_state *state) 1308{ 1309 return !state->enabled || !state->writemask || 1310 /* The following assumes that Z writes are disabled. */ 1311 (state->func == PIPE_FUNC_ALWAYS && si_order_invariant_stencil_op(state->zpass_op) && 1312 si_order_invariant_stencil_op(state->zfail_op)) || 1313 (state->func == PIPE_FUNC_NEVER && si_order_invariant_stencil_op(state->fail_op)); 1314} 1315 1316static void *si_create_dsa_state(struct pipe_context *ctx, 1317 const struct pipe_depth_stencil_alpha_state *state) 1318{ 1319 struct si_state_dsa *dsa = CALLOC_STRUCT(si_state_dsa); 1320 struct si_pm4_state *pm4 = &dsa->pm4; 1321 unsigned db_depth_control; 1322 uint32_t db_stencil_control = 0; 1323 1324 if (!dsa) { 1325 return NULL; 1326 } 1327 1328 dsa->stencil_ref.valuemask[0] = state->stencil[0].valuemask; 1329 dsa->stencil_ref.valuemask[1] = state->stencil[1].valuemask; 1330 dsa->stencil_ref.writemask[0] = state->stencil[0].writemask; 1331 dsa->stencil_ref.writemask[1] = state->stencil[1].writemask; 1332 1333 db_depth_control = 1334 S_028800_Z_ENABLE(state->depth_enabled) | S_028800_Z_WRITE_ENABLE(state->depth_writemask) | 1335 S_028800_ZFUNC(state->depth_func) | S_028800_DEPTH_BOUNDS_ENABLE(state->depth_bounds_test); 1336 1337 /* stencil */ 1338 if (state->stencil[0].enabled) { 1339 db_depth_control |= S_028800_STENCIL_ENABLE(1); 1340 db_depth_control |= S_028800_STENCILFUNC(state->stencil[0].func); 1341 db_stencil_control |= 1342 S_02842C_STENCILFAIL(si_translate_stencil_op(state->stencil[0].fail_op)); 1343 db_stencil_control |= 1344 S_02842C_STENCILZPASS(si_translate_stencil_op(state->stencil[0].zpass_op)); 1345 db_stencil_control |= 1346 S_02842C_STENCILZFAIL(si_translate_stencil_op(state->stencil[0].zfail_op)); 1347 1348 if (state->stencil[1].enabled) { 1349 db_depth_control |= S_028800_BACKFACE_ENABLE(1); 1350 db_depth_control |= S_028800_STENCILFUNC_BF(state->stencil[1].func); 1351 db_stencil_control |= 1352 S_02842C_STENCILFAIL_BF(si_translate_stencil_op(state->stencil[1].fail_op)); 1353 db_stencil_control |= 1354 S_02842C_STENCILZPASS_BF(si_translate_stencil_op(state->stencil[1].zpass_op)); 1355 db_stencil_control |= 1356 S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(state->stencil[1].zfail_op)); 1357 } 1358 } 1359 1360 /* alpha */ 1361 if (state->alpha_enabled) { 1362 dsa->alpha_func = state->alpha_func; 1363 1364 si_pm4_set_reg(pm4, R_00B030_SPI_SHADER_USER_DATA_PS_0 + SI_SGPR_ALPHA_REF * 4, 1365 fui(state->alpha_ref_value)); 1366 } else { 1367 dsa->alpha_func = PIPE_FUNC_ALWAYS; 1368 } 1369 1370 si_pm4_set_reg(pm4, R_028800_DB_DEPTH_CONTROL, db_depth_control); 1371 if (state->stencil[0].enabled) 1372 si_pm4_set_reg(pm4, R_02842C_DB_STENCIL_CONTROL, db_stencil_control); 1373 if (state->depth_bounds_test) { 1374 si_pm4_set_reg(pm4, R_028020_DB_DEPTH_BOUNDS_MIN, fui(state->depth_bounds_min)); 1375 si_pm4_set_reg(pm4, R_028024_DB_DEPTH_BOUNDS_MAX, fui(state->depth_bounds_max)); 1376 } 1377 1378 dsa->depth_enabled = state->depth_enabled; 1379 dsa->depth_write_enabled = state->depth_enabled && state->depth_writemask; 1380 dsa->stencil_enabled = state->stencil[0].enabled; 1381 dsa->stencil_write_enabled = 1382 (util_writes_stencil(&state->stencil[0]) || util_writes_stencil(&state->stencil[1])); 1383 dsa->db_can_write = dsa->depth_write_enabled || dsa->stencil_write_enabled; 1384 1385 bool zfunc_is_ordered = 1386 state->depth_func == PIPE_FUNC_NEVER || state->depth_func == PIPE_FUNC_LESS || 1387 state->depth_func == PIPE_FUNC_LEQUAL || state->depth_func == PIPE_FUNC_GREATER || 1388 state->depth_func == PIPE_FUNC_GEQUAL; 1389 1390 bool nozwrite_and_order_invariant_stencil = 1391 !dsa->db_can_write || 1392 (!dsa->depth_write_enabled && si_order_invariant_stencil_state(&state->stencil[0]) && 1393 si_order_invariant_stencil_state(&state->stencil[1])); 1394 1395 dsa->order_invariance[1].zs = 1396 nozwrite_and_order_invariant_stencil || (!dsa->stencil_write_enabled && zfunc_is_ordered); 1397 dsa->order_invariance[0].zs = !dsa->depth_write_enabled || zfunc_is_ordered; 1398 1399 dsa->order_invariance[1].pass_set = 1400 nozwrite_and_order_invariant_stencil || 1401 (!dsa->stencil_write_enabled && 1402 (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER)); 1403 dsa->order_invariance[0].pass_set = 1404 !dsa->depth_write_enabled || 1405 (state->depth_func == PIPE_FUNC_ALWAYS || state->depth_func == PIPE_FUNC_NEVER); 1406 1407 return dsa; 1408} 1409 1410static void si_bind_dsa_state(struct pipe_context *ctx, void *state) 1411{ 1412 struct si_context *sctx = (struct si_context *)ctx; 1413 struct si_state_dsa *old_dsa = sctx->queued.named.dsa; 1414 struct si_state_dsa *dsa = state; 1415 1416 if (!dsa) 1417 dsa = (struct si_state_dsa *)sctx->noop_dsa; 1418 1419 si_pm4_bind_state(sctx, dsa, dsa); 1420 1421 if (memcmp(&dsa->stencil_ref, &sctx->stencil_ref.dsa_part, 1422 sizeof(struct si_dsa_stencil_ref_part)) != 0) { 1423 sctx->stencil_ref.dsa_part = dsa->stencil_ref; 1424 si_mark_atom_dirty(sctx, &sctx->atoms.s.stencil_ref); 1425 } 1426 1427 if (old_dsa->alpha_func != dsa->alpha_func) { 1428 si_ps_key_update_dsa(sctx); 1429 si_update_ps_inputs_read_or_disabled(sctx); 1430 sctx->do_update_shaders = true; 1431 } 1432 1433 if (sctx->screen->dpbb_allowed && ((old_dsa->depth_enabled != dsa->depth_enabled || 1434 old_dsa->stencil_enabled != dsa->stencil_enabled || 1435 old_dsa->db_can_write != dsa->db_can_write))) 1436 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 1437 1438 if (sctx->screen->has_out_of_order_rast && 1439 (memcmp(old_dsa->order_invariance, dsa->order_invariance, 1440 sizeof(old_dsa->order_invariance)))) 1441 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1442} 1443 1444static void si_delete_dsa_state(struct pipe_context *ctx, void *state) 1445{ 1446 struct si_context *sctx = (struct si_context *)ctx; 1447 1448 if (sctx->queued.named.dsa == state) 1449 si_bind_dsa_state(ctx, sctx->noop_dsa); 1450 1451 si_pm4_free_state(sctx, (struct si_pm4_state*)state, SI_STATE_IDX(dsa)); 1452} 1453 1454static void *si_create_db_flush_dsa(struct si_context *sctx) 1455{ 1456 struct pipe_depth_stencil_alpha_state dsa = {}; 1457 1458 return sctx->b.create_depth_stencil_alpha_state(&sctx->b, &dsa); 1459} 1460 1461/* DB RENDER STATE */ 1462 1463static void si_set_active_query_state(struct pipe_context *ctx, bool enable) 1464{ 1465 struct si_context *sctx = (struct si_context *)ctx; 1466 1467 /* Pipeline stat & streamout queries. */ 1468 if (enable) { 1469 sctx->flags &= ~SI_CONTEXT_STOP_PIPELINE_STATS; 1470 sctx->flags |= SI_CONTEXT_START_PIPELINE_STATS; 1471 } else { 1472 sctx->flags &= ~SI_CONTEXT_START_PIPELINE_STATS; 1473 sctx->flags |= SI_CONTEXT_STOP_PIPELINE_STATS; 1474 } 1475 1476 /* Occlusion queries. */ 1477 if (sctx->occlusion_queries_disabled != !enable) { 1478 sctx->occlusion_queries_disabled = !enable; 1479 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1480 } 1481} 1482 1483void si_set_occlusion_query_state(struct si_context *sctx, bool old_perfect_enable) 1484{ 1485 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1486 1487 bool perfect_enable = sctx->num_perfect_occlusion_queries != 0; 1488 1489 if (perfect_enable != old_perfect_enable) 1490 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 1491} 1492 1493void si_save_qbo_state(struct si_context *sctx, struct si_qbo_state *st) 1494{ 1495 si_get_pipe_constant_buffer(sctx, PIPE_SHADER_COMPUTE, 0, &st->saved_const0); 1496} 1497 1498void si_restore_qbo_state(struct si_context *sctx, struct si_qbo_state *st) 1499{ 1500 sctx->b.set_constant_buffer(&sctx->b, PIPE_SHADER_COMPUTE, 0, true, &st->saved_const0); 1501} 1502 1503static void si_emit_db_render_state(struct si_context *sctx) 1504{ 1505 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 1506 unsigned db_shader_control, db_render_control, db_count_control; 1507 1508 /* DB_RENDER_CONTROL */ 1509 if (sctx->dbcb_depth_copy_enabled || sctx->dbcb_stencil_copy_enabled) { 1510 db_render_control = S_028000_DEPTH_COPY(sctx->dbcb_depth_copy_enabled) | 1511 S_028000_STENCIL_COPY(sctx->dbcb_stencil_copy_enabled) | 1512 S_028000_COPY_CENTROID(1) | S_028000_COPY_SAMPLE(sctx->dbcb_copy_sample); 1513 } else if (sctx->db_flush_depth_inplace || sctx->db_flush_stencil_inplace) { 1514 db_render_control = S_028000_DEPTH_COMPRESS_DISABLE(sctx->db_flush_depth_inplace) | 1515 S_028000_STENCIL_COMPRESS_DISABLE(sctx->db_flush_stencil_inplace); 1516 } else { 1517 db_render_control = S_028000_DEPTH_CLEAR_ENABLE(sctx->db_depth_clear) | 1518 S_028000_STENCIL_CLEAR_ENABLE(sctx->db_stencil_clear); 1519 } 1520 1521 if (sctx->gfx_level >= GFX11) { 1522 unsigned max_allowed_tiles_in_wave = 0; 1523 1524 if (sctx->screen->info.has_dedicated_vram) { 1525 if (sctx->framebuffer.nr_samples == 8) 1526 max_allowed_tiles_in_wave = 7; 1527 else if (sctx->framebuffer.nr_samples == 4) 1528 max_allowed_tiles_in_wave = 14; 1529 } else { 1530 if (sctx->framebuffer.nr_samples == 8) 1531 max_allowed_tiles_in_wave = 8; 1532 } 1533 1534 /* TODO: We may want to disable this workaround for future chips. */ 1535 if (sctx->framebuffer.nr_samples >= 4) { 1536 if (max_allowed_tiles_in_wave) 1537 max_allowed_tiles_in_wave--; 1538 else 1539 max_allowed_tiles_in_wave = 15; 1540 } 1541 1542 db_render_control |= S_028000_OREO_MODE(V_028000_OMODE_O_THEN_B) | 1543 S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave); 1544 } 1545 1546 /* DB_COUNT_CONTROL (occlusion queries) */ 1547 if (sctx->num_occlusion_queries > 0 && !sctx->occlusion_queries_disabled) { 1548 bool perfect = sctx->num_perfect_occlusion_queries > 0; 1549 bool gfx10_perfect = sctx->gfx_level >= GFX10 && perfect; 1550 1551 if (sctx->gfx_level >= GFX7) { 1552 unsigned log_sample_rate = sctx->framebuffer.log_samples; 1553 1554 db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1555 S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) | 1556 S_028004_SAMPLE_RATE(log_sample_rate) | S_028004_ZPASS_ENABLE(1) | 1557 S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1); 1558 } else { 1559 db_count_control = S_028004_PERFECT_ZPASS_COUNTS(perfect) | 1560 S_028004_SAMPLE_RATE(sctx->framebuffer.log_samples); 1561 } 1562 } else { 1563 /* Disable occlusion queries. */ 1564 if (sctx->gfx_level >= GFX7) { 1565 db_count_control = 0; 1566 } else { 1567 db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1); 1568 } 1569 } 1570 1571 radeon_begin(&sctx->gfx_cs); 1572 radeon_opt_set_context_reg2(sctx, R_028000_DB_RENDER_CONTROL, SI_TRACKED_DB_RENDER_CONTROL, 1573 db_render_control, db_count_control); 1574 1575 /* DB_RENDER_OVERRIDE2 */ 1576 radeon_opt_set_context_reg( 1577 sctx, R_028010_DB_RENDER_OVERRIDE2, SI_TRACKED_DB_RENDER_OVERRIDE2, 1578 S_028010_DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION(sctx->db_depth_disable_expclear) | 1579 S_028010_DISABLE_SMEM_EXPCLEAR_OPTIMIZATION(sctx->db_stencil_disable_expclear) | 1580 S_028010_DECOMPRESS_Z_ON_FLUSH(sctx->framebuffer.nr_samples >= 4) | 1581 S_028010_CENTROID_COMPUTATION_MODE(sctx->gfx_level >= GFX10_3 ? 1 : 0)); 1582 1583 db_shader_control = sctx->ps_db_shader_control; 1584 1585 /* Bug workaround for smoothing (overrasterization) on GFX6. */ 1586 if (sctx->gfx_level == GFX6 && sctx->smoothing_enabled) { 1587 db_shader_control &= C_02880C_Z_ORDER; 1588 db_shader_control |= S_02880C_Z_ORDER(V_02880C_LATE_Z); 1589 } 1590 1591 /* Disable the gl_SampleMask fragment shader output if MSAA is disabled. */ 1592 if (!rs->multisample_enable) 1593 db_shader_control &= C_02880C_MASK_EXPORT_ENABLE; 1594 1595 if (sctx->screen->info.has_rbplus && !sctx->screen->info.rbplus_allowed) 1596 db_shader_control |= S_02880C_DUAL_QUAD_DISABLE(1); 1597 1598 if (sctx->screen->info.has_export_conflict_bug && 1599 sctx->queued.named.blend->blend_enable_4bit && 1600 si_get_num_coverage_samples(sctx) == 1) { 1601 db_shader_control |= S_02880C_OVERRIDE_INTRINSIC_RATE_ENABLE(1) | 1602 S_02880C_OVERRIDE_INTRINSIC_RATE(2); 1603 } 1604 1605 radeon_opt_set_context_reg(sctx, R_02880C_DB_SHADER_CONTROL, SI_TRACKED_DB_SHADER_CONTROL, 1606 db_shader_control); 1607 1608 if (sctx->gfx_level >= GFX10_3) { 1609 if (sctx->allow_flat_shading) { 1610 if (sctx->gfx_level == GFX11) { 1611 radeon_opt_set_context_reg(sctx, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, 1612 SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, 1613 S_0283D0_VRS_OVERRIDE_RATE_COMBINER_MODE( 1614 V_0283D0_SC_VRS_COMB_MODE_OVERRIDE) | 1615 /* If the hw doesn't support VRS 4x4, it will silently 1616 * use 2x2 instead. */ 1617 S_0283D0_VRS_RATE(V_0283D0_VRS_SHADING_RATE_4X4)); 1618 } else { 1619 radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL, 1620 SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, 1621 S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE( 1622 V_028064_VRS_COMB_MODE_OVERRIDE) | 1623 S_028064_VRS_OVERRIDE_RATE_X(1) | 1624 S_028064_VRS_OVERRIDE_RATE_Y(1)); 1625 } 1626 } else { 1627 /* If the shader is using discard, turn off coarse shading because 1628 * discard at 2x2 pixel granularity degrades quality too much. 1629 * 1630 * MIN allows sample shading but not coarse shading. 1631 */ 1632 if (sctx->gfx_level == GFX11) { 1633 unsigned mode = sctx->screen->options.vrs2x2 && G_02880C_KILL_ENABLE(db_shader_control) ? 1634 V_0283D0_SC_VRS_COMB_MODE_MIN : V_0283D0_SC_VRS_COMB_MODE_PASSTHRU; 1635 1636 radeon_opt_set_context_reg(sctx, R_0283D0_PA_SC_VRS_OVERRIDE_CNTL, 1637 SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, 1638 S_0283D0_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | 1639 S_0283D0_VRS_RATE(V_0283D0_VRS_SHADING_RATE_1X1)); 1640 } else { 1641 unsigned mode = sctx->screen->options.vrs2x2 && G_02880C_KILL_ENABLE(db_shader_control) ? 1642 V_028064_VRS_COMB_MODE_MIN : V_028064_VRS_COMB_MODE_PASSTHRU; 1643 1644 radeon_opt_set_context_reg(sctx, R_028064_DB_VRS_OVERRIDE_CNTL, 1645 SI_TRACKED_DB_PA_SC_VRS_OVERRIDE_CNTL, 1646 S_028064_VRS_OVERRIDE_RATE_COMBINER_MODE(mode) | 1647 S_028064_VRS_OVERRIDE_RATE_X(0) | 1648 S_028064_VRS_OVERRIDE_RATE_Y(0)); 1649 } 1650 } 1651 } 1652 radeon_end_update_context_roll(sctx); 1653} 1654 1655/* 1656 * format translation 1657 */ 1658uint32_t si_translate_colorformat(enum amd_gfx_level gfx_level, 1659 enum pipe_format format) 1660{ 1661 const struct util_format_description *desc = util_format_description(format); 1662 1663#define HAS_SIZE(x, y, z, w) \ 1664 (desc->channel[0].size == (x) && desc->channel[1].size == (y) && \ 1665 desc->channel[2].size == (z) && desc->channel[3].size == (w)) 1666 1667 if (format == PIPE_FORMAT_R11G11B10_FLOAT) /* isn't plain */ 1668 return V_028C70_COLOR_10_11_11; 1669 1670 if (gfx_level >= GFX10_3 && 1671 format == PIPE_FORMAT_R9G9B9E5_FLOAT) /* isn't plain */ 1672 return V_028C70_COLOR_5_9_9_9; 1673 1674 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 1675 return V_028C70_COLOR_INVALID; 1676 1677 /* hw cannot support mixed formats (except depth/stencil, since 1678 * stencil is not written to). */ 1679 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1680 return V_028C70_COLOR_INVALID; 1681 1682 int first_non_void = util_format_get_first_non_void_channel(format); 1683 1684 /* Reject SCALED formats because we don't implement them for CB. */ 1685 if (first_non_void >= 0 && first_non_void <= 3 && 1686 (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED || 1687 desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_SIGNED) && 1688 !desc->channel[first_non_void].normalized && 1689 !desc->channel[first_non_void].pure_integer) 1690 return V_028C70_COLOR_INVALID; 1691 1692 switch (desc->nr_channels) { 1693 case 1: 1694 switch (desc->channel[0].size) { 1695 case 8: 1696 return V_028C70_COLOR_8; 1697 case 16: 1698 return V_028C70_COLOR_16; 1699 case 32: 1700 return V_028C70_COLOR_32; 1701 } 1702 break; 1703 case 2: 1704 if (desc->channel[0].size == desc->channel[1].size) { 1705 switch (desc->channel[0].size) { 1706 case 8: 1707 return V_028C70_COLOR_8_8; 1708 case 16: 1709 return V_028C70_COLOR_16_16; 1710 case 32: 1711 return V_028C70_COLOR_32_32; 1712 } 1713 } else if (HAS_SIZE(8, 24, 0, 0)) { 1714 return V_028C70_COLOR_24_8; 1715 } else if (HAS_SIZE(24, 8, 0, 0)) { 1716 return V_028C70_COLOR_8_24; 1717 } 1718 break; 1719 case 3: 1720 if (HAS_SIZE(5, 6, 5, 0)) { 1721 return V_028C70_COLOR_5_6_5; 1722 } else if (HAS_SIZE(32, 8, 24, 0)) { 1723 return V_028C70_COLOR_X24_8_32_FLOAT; 1724 } 1725 break; 1726 case 4: 1727 if (desc->channel[0].size == desc->channel[1].size && 1728 desc->channel[0].size == desc->channel[2].size && 1729 desc->channel[0].size == desc->channel[3].size) { 1730 switch (desc->channel[0].size) { 1731 case 4: 1732 return V_028C70_COLOR_4_4_4_4; 1733 case 8: 1734 return V_028C70_COLOR_8_8_8_8; 1735 case 16: 1736 return V_028C70_COLOR_16_16_16_16; 1737 case 32: 1738 return V_028C70_COLOR_32_32_32_32; 1739 } 1740 } else if (HAS_SIZE(5, 5, 5, 1)) { 1741 return V_028C70_COLOR_1_5_5_5; 1742 } else if (HAS_SIZE(1, 5, 5, 5)) { 1743 return V_028C70_COLOR_5_5_5_1; 1744 } else if (HAS_SIZE(10, 10, 10, 2)) { 1745 return V_028C70_COLOR_2_10_10_10; 1746 } else if (HAS_SIZE(2, 10, 10, 10)) { 1747 return V_028C70_COLOR_10_10_10_2; 1748 } 1749 break; 1750 } 1751 return V_028C70_COLOR_INVALID; 1752} 1753 1754static uint32_t si_colorformat_endian_swap(uint32_t colorformat) 1755{ 1756 if (SI_BIG_ENDIAN) { 1757 switch (colorformat) { 1758 /* 8-bit buffers. */ 1759 case V_028C70_COLOR_8: 1760 return V_028C70_ENDIAN_NONE; 1761 1762 /* 16-bit buffers. */ 1763 case V_028C70_COLOR_5_6_5: 1764 case V_028C70_COLOR_1_5_5_5: 1765 case V_028C70_COLOR_4_4_4_4: 1766 case V_028C70_COLOR_16: 1767 case V_028C70_COLOR_8_8: 1768 return V_028C70_ENDIAN_8IN16; 1769 1770 /* 32-bit buffers. */ 1771 case V_028C70_COLOR_8_8_8_8: 1772 case V_028C70_COLOR_2_10_10_10: 1773 case V_028C70_COLOR_10_10_10_2: 1774 case V_028C70_COLOR_8_24: 1775 case V_028C70_COLOR_24_8: 1776 case V_028C70_COLOR_16_16: 1777 return V_028C70_ENDIAN_8IN32; 1778 1779 /* 64-bit buffers. */ 1780 case V_028C70_COLOR_16_16_16_16: 1781 return V_028C70_ENDIAN_8IN16; 1782 1783 case V_028C70_COLOR_32_32: 1784 return V_028C70_ENDIAN_8IN32; 1785 1786 /* 128-bit buffers. */ 1787 case V_028C70_COLOR_32_32_32_32: 1788 return V_028C70_ENDIAN_8IN32; 1789 default: 1790 return V_028C70_ENDIAN_NONE; /* Unsupported. */ 1791 } 1792 } else { 1793 return V_028C70_ENDIAN_NONE; 1794 } 1795} 1796 1797static uint32_t si_translate_dbformat(enum pipe_format format) 1798{ 1799 switch (format) { 1800 case PIPE_FORMAT_Z16_UNORM: 1801 return V_028040_Z_16; 1802 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1803 case PIPE_FORMAT_X8Z24_UNORM: 1804 case PIPE_FORMAT_Z24X8_UNORM: 1805 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1806 return V_028040_Z_24; /* deprecated on AMD GCN */ 1807 case PIPE_FORMAT_Z32_FLOAT: 1808 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1809 return V_028040_Z_32_FLOAT; 1810 default: 1811 return V_028040_Z_INVALID; 1812 } 1813} 1814 1815/* 1816 * Texture translation 1817 */ 1818 1819static uint32_t si_translate_texformat(struct pipe_screen *screen, enum pipe_format format, 1820 const struct util_format_description *desc, 1821 int first_non_void) 1822{ 1823 struct si_screen *sscreen = (struct si_screen *)screen; 1824 bool uniform = true; 1825 int i; 1826 1827 assert(sscreen->info.gfx_level <= GFX9); 1828 1829 /* Colorspace (return non-RGB formats directly). */ 1830 switch (desc->colorspace) { 1831 /* Depth stencil formats */ 1832 case UTIL_FORMAT_COLORSPACE_ZS: 1833 switch (format) { 1834 case PIPE_FORMAT_Z16_UNORM: 1835 return V_008F14_IMG_DATA_FORMAT_16; 1836 case PIPE_FORMAT_X24S8_UINT: 1837 case PIPE_FORMAT_S8X24_UINT: 1838 /* 1839 * Implemented as an 8_8_8_8 data format to fix texture 1840 * gathers in stencil sampling. This affects at least 1841 * GL45-CTS.texture_cube_map_array.sampling on GFX8. 1842 */ 1843 if (sscreen->info.gfx_level <= GFX8) 1844 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 1845 1846 if (format == PIPE_FORMAT_X24S8_UINT) 1847 return V_008F14_IMG_DATA_FORMAT_8_24; 1848 else 1849 return V_008F14_IMG_DATA_FORMAT_24_8; 1850 case PIPE_FORMAT_Z24X8_UNORM: 1851 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1852 return V_008F14_IMG_DATA_FORMAT_8_24; 1853 case PIPE_FORMAT_X8Z24_UNORM: 1854 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 1855 return V_008F14_IMG_DATA_FORMAT_24_8; 1856 case PIPE_FORMAT_S8_UINT: 1857 return V_008F14_IMG_DATA_FORMAT_8; 1858 case PIPE_FORMAT_Z32_FLOAT: 1859 return V_008F14_IMG_DATA_FORMAT_32; 1860 case PIPE_FORMAT_X32_S8X24_UINT: 1861 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1862 return V_008F14_IMG_DATA_FORMAT_X24_8_32; 1863 default: 1864 goto out_unknown; 1865 } 1866 1867 case UTIL_FORMAT_COLORSPACE_YUV: 1868 goto out_unknown; /* TODO */ 1869 1870 case UTIL_FORMAT_COLORSPACE_SRGB: 1871 if (desc->nr_channels != 4 && desc->nr_channels != 1) 1872 goto out_unknown; 1873 break; 1874 1875 default: 1876 break; 1877 } 1878 1879 if (desc->layout == UTIL_FORMAT_LAYOUT_RGTC) { 1880 switch (format) { 1881 case PIPE_FORMAT_RGTC1_SNORM: 1882 case PIPE_FORMAT_LATC1_SNORM: 1883 case PIPE_FORMAT_RGTC1_UNORM: 1884 case PIPE_FORMAT_LATC1_UNORM: 1885 return V_008F14_IMG_DATA_FORMAT_BC4; 1886 case PIPE_FORMAT_RGTC2_SNORM: 1887 case PIPE_FORMAT_LATC2_SNORM: 1888 case PIPE_FORMAT_RGTC2_UNORM: 1889 case PIPE_FORMAT_LATC2_UNORM: 1890 return V_008F14_IMG_DATA_FORMAT_BC5; 1891 default: 1892 goto out_unknown; 1893 } 1894 } 1895 1896 if (desc->layout == UTIL_FORMAT_LAYOUT_ETC && 1897 (sscreen->info.family == CHIP_STONEY || sscreen->info.family == CHIP_VEGA10 || 1898 sscreen->info.family == CHIP_RAVEN || sscreen->info.family == CHIP_RAVEN2)) { 1899 switch (format) { 1900 case PIPE_FORMAT_ETC1_RGB8: 1901 case PIPE_FORMAT_ETC2_RGB8: 1902 case PIPE_FORMAT_ETC2_SRGB8: 1903 return V_008F14_IMG_DATA_FORMAT_ETC2_RGB; 1904 case PIPE_FORMAT_ETC2_RGB8A1: 1905 case PIPE_FORMAT_ETC2_SRGB8A1: 1906 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA1; 1907 case PIPE_FORMAT_ETC2_RGBA8: 1908 case PIPE_FORMAT_ETC2_SRGBA8: 1909 return V_008F14_IMG_DATA_FORMAT_ETC2_RGBA; 1910 case PIPE_FORMAT_ETC2_R11_UNORM: 1911 case PIPE_FORMAT_ETC2_R11_SNORM: 1912 return V_008F14_IMG_DATA_FORMAT_ETC2_R; 1913 case PIPE_FORMAT_ETC2_RG11_UNORM: 1914 case PIPE_FORMAT_ETC2_RG11_SNORM: 1915 return V_008F14_IMG_DATA_FORMAT_ETC2_RG; 1916 default: 1917 goto out_unknown; 1918 } 1919 } 1920 1921 if (desc->layout == UTIL_FORMAT_LAYOUT_BPTC) { 1922 switch (format) { 1923 case PIPE_FORMAT_BPTC_RGBA_UNORM: 1924 case PIPE_FORMAT_BPTC_SRGBA: 1925 return V_008F14_IMG_DATA_FORMAT_BC7; 1926 case PIPE_FORMAT_BPTC_RGB_FLOAT: 1927 case PIPE_FORMAT_BPTC_RGB_UFLOAT: 1928 return V_008F14_IMG_DATA_FORMAT_BC6; 1929 default: 1930 goto out_unknown; 1931 } 1932 } 1933 1934 if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 1935 switch (format) { 1936 case PIPE_FORMAT_R8G8_B8G8_UNORM: 1937 case PIPE_FORMAT_G8R8_B8R8_UNORM: 1938 return V_008F14_IMG_DATA_FORMAT_GB_GR; 1939 case PIPE_FORMAT_G8R8_G8B8_UNORM: 1940 case PIPE_FORMAT_R8G8_R8B8_UNORM: 1941 return V_008F14_IMG_DATA_FORMAT_BG_RG; 1942 default: 1943 goto out_unknown; 1944 } 1945 } 1946 1947 if (desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { 1948 switch (format) { 1949 case PIPE_FORMAT_DXT1_RGB: 1950 case PIPE_FORMAT_DXT1_RGBA: 1951 case PIPE_FORMAT_DXT1_SRGB: 1952 case PIPE_FORMAT_DXT1_SRGBA: 1953 return V_008F14_IMG_DATA_FORMAT_BC1; 1954 case PIPE_FORMAT_DXT3_RGBA: 1955 case PIPE_FORMAT_DXT3_SRGBA: 1956 return V_008F14_IMG_DATA_FORMAT_BC2; 1957 case PIPE_FORMAT_DXT5_RGBA: 1958 case PIPE_FORMAT_DXT5_SRGBA: 1959 return V_008F14_IMG_DATA_FORMAT_BC3; 1960 default: 1961 goto out_unknown; 1962 } 1963 } 1964 1965 if (format == PIPE_FORMAT_R9G9B9E5_FLOAT) { 1966 return V_008F14_IMG_DATA_FORMAT_5_9_9_9; 1967 } else if (format == PIPE_FORMAT_R11G11B10_FLOAT) { 1968 return V_008F14_IMG_DATA_FORMAT_10_11_11; 1969 } 1970 1971 /* Other "OTHER" layouts are unsupported. */ 1972 if (desc->layout == UTIL_FORMAT_LAYOUT_OTHER) 1973 goto out_unknown; 1974 1975 /* hw cannot support mixed formats (except depth/stencil, since only 1976 * depth is read).*/ 1977 if (desc->is_mixed && desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS) 1978 goto out_unknown; 1979 1980 if (first_non_void < 0 || first_non_void > 3) 1981 goto out_unknown; 1982 1983 /* Reject SCALED formats because we don't implement them for CB and do the same for texturing. */ 1984 if ((desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED || 1985 desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_SIGNED) && 1986 !desc->channel[first_non_void].normalized && 1987 !desc->channel[first_non_void].pure_integer) 1988 goto out_unknown; 1989 1990 /* Reject unsupported 32_*NORM and FIXED formats. */ 1991 if (desc->channel[first_non_void].size == 32 && 1992 (desc->channel[first_non_void].normalized || 1993 desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_FIXED)) 1994 goto out_unknown; 1995 1996 /* This format fails on Gfx8/Carrizo´. */ 1997 if (format == PIPE_FORMAT_A8R8_UNORM) 1998 goto out_unknown; 1999 2000 /* See whether the components are of the same size. */ 2001 for (i = 1; i < desc->nr_channels; i++) { 2002 uniform = uniform && desc->channel[0].size == desc->channel[i].size; 2003 } 2004 2005 /* Non-uniform formats. */ 2006 if (!uniform) { 2007 switch (desc->nr_channels) { 2008 case 3: 2009 if (desc->channel[0].size == 5 && desc->channel[1].size == 6 && 2010 desc->channel[2].size == 5) { 2011 return V_008F14_IMG_DATA_FORMAT_5_6_5; 2012 } 2013 goto out_unknown; 2014 case 4: 2015 /* 5551 and 1555 UINT formats fail on Gfx8/Carrizo´. */ 2016 if (desc->channel[1].size == 5 && 2017 desc->channel[2].size == 5 && 2018 desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED && 2019 desc->channel[first_non_void].pure_integer) 2020 goto out_unknown; 2021 2022 if (desc->channel[0].size == 5 && desc->channel[1].size == 5 && 2023 desc->channel[2].size == 5 && desc->channel[3].size == 1) { 2024 return V_008F14_IMG_DATA_FORMAT_1_5_5_5; 2025 } 2026 if (desc->channel[0].size == 1 && desc->channel[1].size == 5 && 2027 desc->channel[2].size == 5 && desc->channel[3].size == 5) { 2028 return V_008F14_IMG_DATA_FORMAT_5_5_5_1; 2029 } 2030 if (desc->channel[0].size == 10 && desc->channel[1].size == 10 && 2031 desc->channel[2].size == 10 && desc->channel[3].size == 2) { 2032 return V_008F14_IMG_DATA_FORMAT_2_10_10_10; 2033 } 2034 goto out_unknown; 2035 } 2036 goto out_unknown; 2037 } 2038 2039 /* uniform formats */ 2040 switch (desc->channel[first_non_void].size) { 2041 case 4: 2042 switch (desc->nr_channels) { 2043 case 4: 2044 /* 4444 UINT formats fail on Gfx8/Carrizo´. */ 2045 if (desc->channel[first_non_void].type == UTIL_FORMAT_TYPE_UNSIGNED && 2046 desc->channel[first_non_void].pure_integer) 2047 goto out_unknown; 2048 2049 return V_008F14_IMG_DATA_FORMAT_4_4_4_4; 2050 } 2051 break; 2052 case 8: 2053 switch (desc->nr_channels) { 2054 case 1: 2055 return V_008F14_IMG_DATA_FORMAT_8; 2056 case 2: 2057 return V_008F14_IMG_DATA_FORMAT_8_8; 2058 case 4: 2059 return V_008F14_IMG_DATA_FORMAT_8_8_8_8; 2060 } 2061 break; 2062 case 16: 2063 switch (desc->nr_channels) { 2064 case 1: 2065 return V_008F14_IMG_DATA_FORMAT_16; 2066 case 2: 2067 return V_008F14_IMG_DATA_FORMAT_16_16; 2068 case 4: 2069 return V_008F14_IMG_DATA_FORMAT_16_16_16_16; 2070 } 2071 break; 2072 case 32: 2073 switch (desc->nr_channels) { 2074 case 1: 2075 return V_008F14_IMG_DATA_FORMAT_32; 2076 case 2: 2077 return V_008F14_IMG_DATA_FORMAT_32_32; 2078#if 0 /* Not supported for render targets */ 2079 case 3: 2080 return V_008F14_IMG_DATA_FORMAT_32_32_32; 2081#endif 2082 case 4: 2083 return V_008F14_IMG_DATA_FORMAT_32_32_32_32; 2084 } 2085 } 2086 2087out_unknown: 2088 return ~0; 2089} 2090 2091static unsigned is_wrap_mode_legal(struct si_screen *screen, unsigned wrap) 2092{ 2093 if (!screen->info.has_3d_cube_border_color_mipmap) { 2094 switch (wrap) { 2095 case PIPE_TEX_WRAP_CLAMP: 2096 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 2097 case PIPE_TEX_WRAP_MIRROR_CLAMP: 2098 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 2099 return false; 2100 } 2101 } 2102 return true; 2103} 2104 2105static unsigned si_tex_wrap(unsigned wrap) 2106{ 2107 switch (wrap) { 2108 default: 2109 case PIPE_TEX_WRAP_REPEAT: 2110 return V_008F30_SQ_TEX_WRAP; 2111 case PIPE_TEX_WRAP_CLAMP: 2112 return V_008F30_SQ_TEX_CLAMP_HALF_BORDER; 2113 case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 2114 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 2115 case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 2116 return V_008F30_SQ_TEX_CLAMP_BORDER; 2117 case PIPE_TEX_WRAP_MIRROR_REPEAT: 2118 return V_008F30_SQ_TEX_MIRROR; 2119 case PIPE_TEX_WRAP_MIRROR_CLAMP: 2120 return V_008F30_SQ_TEX_MIRROR_ONCE_HALF_BORDER; 2121 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: 2122 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 2123 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 2124 return V_008F30_SQ_TEX_MIRROR_ONCE_BORDER; 2125 } 2126} 2127 2128static unsigned si_tex_mipfilter(unsigned filter) 2129{ 2130 switch (filter) { 2131 case PIPE_TEX_MIPFILTER_NEAREST: 2132 return V_008F38_SQ_TEX_Z_FILTER_POINT; 2133 case PIPE_TEX_MIPFILTER_LINEAR: 2134 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 2135 default: 2136 case PIPE_TEX_MIPFILTER_NONE: 2137 return V_008F38_SQ_TEX_Z_FILTER_NONE; 2138 } 2139} 2140 2141static unsigned si_tex_compare(unsigned compare) 2142{ 2143 switch (compare) { 2144 default: 2145 case PIPE_FUNC_NEVER: 2146 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 2147 case PIPE_FUNC_LESS: 2148 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 2149 case PIPE_FUNC_EQUAL: 2150 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 2151 case PIPE_FUNC_LEQUAL: 2152 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 2153 case PIPE_FUNC_GREATER: 2154 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 2155 case PIPE_FUNC_NOTEQUAL: 2156 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 2157 case PIPE_FUNC_GEQUAL: 2158 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 2159 case PIPE_FUNC_ALWAYS: 2160 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 2161 } 2162} 2163 2164static unsigned si_tex_dim(struct si_screen *sscreen, struct si_texture *tex, unsigned view_target, 2165 unsigned nr_samples) 2166{ 2167 unsigned res_target = tex->buffer.b.b.target; 2168 2169 if (view_target == PIPE_TEXTURE_CUBE || view_target == PIPE_TEXTURE_CUBE_ARRAY) 2170 res_target = view_target; 2171 /* If interpreting cubemaps as something else, set 2D_ARRAY. */ 2172 else if (res_target == PIPE_TEXTURE_CUBE || res_target == PIPE_TEXTURE_CUBE_ARRAY) 2173 res_target = PIPE_TEXTURE_2D_ARRAY; 2174 2175 /* GFX9 allocates 1D textures as 2D. */ 2176 if ((res_target == PIPE_TEXTURE_1D || res_target == PIPE_TEXTURE_1D_ARRAY) && 2177 sscreen->info.gfx_level == GFX9 && 2178 tex->surface.u.gfx9.resource_type == RADEON_RESOURCE_2D) { 2179 if (res_target == PIPE_TEXTURE_1D) 2180 res_target = PIPE_TEXTURE_2D; 2181 else 2182 res_target = PIPE_TEXTURE_2D_ARRAY; 2183 } 2184 2185 switch (res_target) { 2186 default: 2187 case PIPE_TEXTURE_1D: 2188 return V_008F1C_SQ_RSRC_IMG_1D; 2189 case PIPE_TEXTURE_1D_ARRAY: 2190 return V_008F1C_SQ_RSRC_IMG_1D_ARRAY; 2191 case PIPE_TEXTURE_2D: 2192 case PIPE_TEXTURE_RECT: 2193 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA : V_008F1C_SQ_RSRC_IMG_2D; 2194 case PIPE_TEXTURE_2D_ARRAY: 2195 return nr_samples > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 2196 case PIPE_TEXTURE_3D: 2197 return V_008F1C_SQ_RSRC_IMG_3D; 2198 case PIPE_TEXTURE_CUBE: 2199 case PIPE_TEXTURE_CUBE_ARRAY: 2200 return V_008F1C_SQ_RSRC_IMG_CUBE; 2201 } 2202} 2203 2204/* 2205 * Format support testing 2206 */ 2207 2208static bool si_is_sampler_format_supported(struct pipe_screen *screen, enum pipe_format format) 2209{ 2210 struct si_screen *sscreen = (struct si_screen *)screen; 2211 const struct util_format_description *desc = util_format_description(format); 2212 2213 /* Samplers don't support 64 bits per channel. */ 2214 if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN && 2215 desc->channel[0].size == 64) 2216 return false; 2217 2218 if (sscreen->info.gfx_level >= GFX10) { 2219 const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&sscreen->info)[format]; 2220 if (!fmt->img_format || fmt->buffers_only) 2221 return false; 2222 return true; 2223 } 2224 2225 return si_translate_texformat(screen, format, desc, 2226 util_format_get_first_non_void_channel(format)) != ~0U; 2227} 2228 2229static uint32_t si_translate_buffer_dataformat(struct pipe_screen *screen, 2230 const struct util_format_description *desc, 2231 int first_non_void) 2232{ 2233 int i; 2234 2235 assert(((struct si_screen *)screen)->info.gfx_level <= GFX9); 2236 2237 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 2238 return V_008F0C_BUF_DATA_FORMAT_10_11_11; 2239 2240 assert(first_non_void >= 0); 2241 2242 if (desc->nr_channels == 4 && desc->channel[0].size == 10 && desc->channel[1].size == 10 && 2243 desc->channel[2].size == 10 && desc->channel[3].size == 2) 2244 return V_008F0C_BUF_DATA_FORMAT_2_10_10_10; 2245 2246 /* See whether the components are of the same size. */ 2247 for (i = 0; i < desc->nr_channels; i++) { 2248 if (desc->channel[first_non_void].size != desc->channel[i].size) 2249 return V_008F0C_BUF_DATA_FORMAT_INVALID; 2250 } 2251 2252 switch (desc->channel[first_non_void].size) { 2253 case 8: 2254 switch (desc->nr_channels) { 2255 case 1: 2256 case 3: /* 3 loads */ 2257 return V_008F0C_BUF_DATA_FORMAT_8; 2258 case 2: 2259 return V_008F0C_BUF_DATA_FORMAT_8_8; 2260 case 4: 2261 return V_008F0C_BUF_DATA_FORMAT_8_8_8_8; 2262 } 2263 break; 2264 case 16: 2265 switch (desc->nr_channels) { 2266 case 1: 2267 case 3: /* 3 loads */ 2268 return V_008F0C_BUF_DATA_FORMAT_16; 2269 case 2: 2270 return V_008F0C_BUF_DATA_FORMAT_16_16; 2271 case 4: 2272 return V_008F0C_BUF_DATA_FORMAT_16_16_16_16; 2273 } 2274 break; 2275 case 32: 2276 switch (desc->nr_channels) { 2277 case 1: 2278 return V_008F0C_BUF_DATA_FORMAT_32; 2279 case 2: 2280 return V_008F0C_BUF_DATA_FORMAT_32_32; 2281 case 3: 2282 return V_008F0C_BUF_DATA_FORMAT_32_32_32; 2283 case 4: 2284 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2285 } 2286 break; 2287 case 64: 2288 /* Legacy double formats. */ 2289 switch (desc->nr_channels) { 2290 case 1: /* 1 load */ 2291 return V_008F0C_BUF_DATA_FORMAT_32_32; 2292 case 2: /* 1 load */ 2293 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2294 case 3: /* 3 loads */ 2295 return V_008F0C_BUF_DATA_FORMAT_32_32; 2296 case 4: /* 2 loads */ 2297 return V_008F0C_BUF_DATA_FORMAT_32_32_32_32; 2298 } 2299 break; 2300 } 2301 2302 return V_008F0C_BUF_DATA_FORMAT_INVALID; 2303} 2304 2305static uint32_t si_translate_buffer_numformat(struct pipe_screen *screen, 2306 const struct util_format_description *desc, 2307 int first_non_void) 2308{ 2309 assert(((struct si_screen *)screen)->info.gfx_level <= GFX9); 2310 2311 if (desc->format == PIPE_FORMAT_R11G11B10_FLOAT) 2312 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2313 2314 assert(first_non_void >= 0); 2315 2316 switch (desc->channel[first_non_void].type) { 2317 case UTIL_FORMAT_TYPE_SIGNED: 2318 case UTIL_FORMAT_TYPE_FIXED: 2319 if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer) 2320 return V_008F0C_BUF_NUM_FORMAT_SINT; 2321 else if (desc->channel[first_non_void].normalized) 2322 return V_008F0C_BUF_NUM_FORMAT_SNORM; 2323 else 2324 return V_008F0C_BUF_NUM_FORMAT_SSCALED; 2325 break; 2326 case UTIL_FORMAT_TYPE_UNSIGNED: 2327 if (desc->channel[first_non_void].size >= 32 || desc->channel[first_non_void].pure_integer) 2328 return V_008F0C_BUF_NUM_FORMAT_UINT; 2329 else if (desc->channel[first_non_void].normalized) 2330 return V_008F0C_BUF_NUM_FORMAT_UNORM; 2331 else 2332 return V_008F0C_BUF_NUM_FORMAT_USCALED; 2333 break; 2334 case UTIL_FORMAT_TYPE_FLOAT: 2335 default: 2336 return V_008F0C_BUF_NUM_FORMAT_FLOAT; 2337 } 2338} 2339 2340static unsigned si_is_vertex_format_supported(struct pipe_screen *screen, enum pipe_format format, 2341 unsigned usage) 2342{ 2343 struct si_screen *sscreen = (struct si_screen *)screen; 2344 const struct util_format_description *desc; 2345 int first_non_void; 2346 unsigned data_format; 2347 2348 assert((usage & ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_VERTEX_BUFFER)) == 2349 0); 2350 2351 desc = util_format_description(format); 2352 2353 /* There are no native 8_8_8 or 16_16_16 data formats, and we currently 2354 * select 8_8_8_8 and 16_16_16_16 instead. This works reasonably well 2355 * for read-only access (with caveats surrounding bounds checks), but 2356 * obviously fails for write access which we have to implement for 2357 * shader images. Luckily, OpenGL doesn't expect this to be supported 2358 * anyway, and so the only impact is on PBO uploads / downloads, which 2359 * shouldn't be expected to be fast for GL_RGB anyway. 2360 */ 2361 if (desc->block.bits == 3 * 8 || desc->block.bits == 3 * 16) { 2362 if (usage & (PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW)) { 2363 usage &= ~(PIPE_BIND_SHADER_IMAGE | PIPE_BIND_SAMPLER_VIEW); 2364 if (!usage) 2365 return 0; 2366 } 2367 } 2368 2369 if (sscreen->info.gfx_level >= GFX10) { 2370 const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&sscreen->info)[format]; 2371 if (!fmt->img_format || fmt->img_format >= 128) 2372 return 0; 2373 return usage; 2374 } 2375 2376 first_non_void = util_format_get_first_non_void_channel(format); 2377 data_format = si_translate_buffer_dataformat(screen, desc, first_non_void); 2378 if (data_format == V_008F0C_BUF_DATA_FORMAT_INVALID) 2379 return 0; 2380 2381 return usage; 2382} 2383 2384static bool si_is_colorbuffer_format_supported(enum amd_gfx_level gfx_level, 2385 enum pipe_format format) 2386{ 2387 return si_translate_colorformat(gfx_level, format) != V_028C70_COLOR_INVALID && 2388 si_translate_colorswap(gfx_level, format, false) != ~0U; 2389} 2390 2391static bool si_is_zs_format_supported(enum pipe_format format) 2392{ 2393 return si_translate_dbformat(format) != V_028040_Z_INVALID; 2394} 2395 2396static bool si_is_format_supported(struct pipe_screen *screen, enum pipe_format format, 2397 enum pipe_texture_target target, unsigned sample_count, 2398 unsigned storage_sample_count, unsigned usage) 2399{ 2400 struct si_screen *sscreen = (struct si_screen *)screen; 2401 unsigned retval = 0; 2402 2403 if (target >= PIPE_MAX_TEXTURE_TYPES) { 2404 PRINT_ERR("radeonsi: unsupported texture type %d\n", target); 2405 return false; 2406 } 2407 2408 /* Require PIPE_BIND_SAMPLER_VIEW support when PIPE_BIND_RENDER_TARGET 2409 * is requested. 2410 */ 2411 if (usage & PIPE_BIND_RENDER_TARGET) 2412 usage |= PIPE_BIND_SAMPLER_VIEW; 2413 2414 if ((target == PIPE_TEXTURE_3D || target == PIPE_TEXTURE_CUBE) && 2415 !sscreen->info.has_3d_cube_border_color_mipmap) 2416 return false; 2417 2418 if (util_format_get_num_planes(format) >= 2) 2419 return false; 2420 2421 if (MAX2(1, sample_count) < MAX2(1, storage_sample_count)) 2422 return false; 2423 2424 if (sample_count > 1) { 2425 if (!screen->get_param(screen, PIPE_CAP_TEXTURE_MULTISAMPLE)) 2426 return false; 2427 2428 /* Only power-of-two sample counts are supported. */ 2429 if (!util_is_power_of_two_or_zero(sample_count) || 2430 !util_is_power_of_two_or_zero(storage_sample_count)) 2431 return false; 2432 2433 /* Chips with 1 RB don't increment occlusion queries at 16x MSAA sample rate, 2434 * so don't expose 16 samples there. 2435 */ 2436 const unsigned max_eqaa_samples = util_bitcount(sscreen->info.enabled_rb_mask) <= 1 ? 8 : 16; 2437 const unsigned max_samples = 8; 2438 2439 /* MSAA support without framebuffer attachments. */ 2440 if (format == PIPE_FORMAT_NONE && sample_count <= max_eqaa_samples) 2441 return true; 2442 2443 if (!sscreen->info.has_eqaa_surface_allocator || util_format_is_depth_or_stencil(format)) { 2444 /* Color without EQAA or depth/stencil. */ 2445 if (sample_count > max_samples || sample_count != storage_sample_count) 2446 return false; 2447 } else { 2448 /* Color with EQAA. */ 2449 if (sample_count > max_eqaa_samples || storage_sample_count > max_samples) 2450 return false; 2451 } 2452 } 2453 2454 if (usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)) { 2455 if (target == PIPE_BUFFER) { 2456 retval |= si_is_vertex_format_supported( 2457 screen, format, usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE)); 2458 } else { 2459 if (si_is_sampler_format_supported(screen, format)) 2460 retval |= usage & (PIPE_BIND_SAMPLER_VIEW | PIPE_BIND_SHADER_IMAGE); 2461 } 2462 } 2463 2464 if ((usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | 2465 PIPE_BIND_SHARED | PIPE_BIND_BLENDABLE)) && 2466 si_is_colorbuffer_format_supported(sscreen->info.gfx_level, format)) { 2467 retval |= usage & (PIPE_BIND_RENDER_TARGET | PIPE_BIND_DISPLAY_TARGET | PIPE_BIND_SCANOUT | 2468 PIPE_BIND_SHARED); 2469 if (!util_format_is_pure_integer(format) && !util_format_is_depth_or_stencil(format)) 2470 retval |= usage & PIPE_BIND_BLENDABLE; 2471 } 2472 2473 if ((usage & PIPE_BIND_DEPTH_STENCIL) && si_is_zs_format_supported(format)) { 2474 retval |= PIPE_BIND_DEPTH_STENCIL; 2475 } 2476 2477 if (usage & PIPE_BIND_VERTEX_BUFFER) { 2478 retval |= si_is_vertex_format_supported(screen, format, PIPE_BIND_VERTEX_BUFFER); 2479 } 2480 2481 if (usage & PIPE_BIND_INDEX_BUFFER) { 2482 if (format == PIPE_FORMAT_R8_UINT || 2483 format == PIPE_FORMAT_R16_UINT || 2484 format == PIPE_FORMAT_R32_UINT) 2485 retval |= PIPE_BIND_INDEX_BUFFER; 2486 } 2487 2488 if ((usage & PIPE_BIND_LINEAR) && !util_format_is_compressed(format) && 2489 !(usage & PIPE_BIND_DEPTH_STENCIL)) 2490 retval |= PIPE_BIND_LINEAR; 2491 2492 return retval == usage; 2493} 2494 2495/* 2496 * framebuffer handling 2497 */ 2498 2499static void si_choose_spi_color_formats(struct si_surface *surf, unsigned format, unsigned swap, 2500 unsigned ntype, bool is_depth) 2501{ 2502 struct ac_spi_color_formats formats = {}; 2503 2504 ac_choose_spi_color_formats(format, swap, ntype, is_depth, true, &formats); 2505 2506 surf->spi_shader_col_format = formats.normal; 2507 surf->spi_shader_col_format_alpha = formats.alpha; 2508 surf->spi_shader_col_format_blend = formats.blend; 2509 surf->spi_shader_col_format_blend_alpha = formats.blend_alpha; 2510} 2511 2512static void si_initialize_color_surface(struct si_context *sctx, struct si_surface *surf) 2513{ 2514 struct si_texture *tex = (struct si_texture *)surf->base.texture; 2515 unsigned color_info, color_attrib; 2516 unsigned format, swap, ntype, endian; 2517 const struct util_format_description *desc; 2518 int firstchan; 2519 unsigned blend_clamp = 0, blend_bypass = 0; 2520 2521 desc = util_format_description(surf->base.format); 2522 for (firstchan = 0; firstchan < 4; firstchan++) { 2523 if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) { 2524 break; 2525 } 2526 } 2527 if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) { 2528 ntype = V_028C70_NUMBER_FLOAT; 2529 } else { 2530 ntype = V_028C70_NUMBER_UNORM; 2531 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 2532 ntype = V_028C70_NUMBER_SRGB; 2533 else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) { 2534 if (desc->channel[firstchan].pure_integer) { 2535 ntype = V_028C70_NUMBER_SINT; 2536 } else { 2537 assert(desc->channel[firstchan].normalized); 2538 ntype = V_028C70_NUMBER_SNORM; 2539 } 2540 } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) { 2541 if (desc->channel[firstchan].pure_integer) { 2542 ntype = V_028C70_NUMBER_UINT; 2543 } else { 2544 assert(desc->channel[firstchan].normalized); 2545 ntype = V_028C70_NUMBER_UNORM; 2546 } 2547 } 2548 } 2549 2550 format = si_translate_colorformat(sctx->gfx_level, surf->base.format); 2551 if (format == V_028C70_COLOR_INVALID) { 2552 PRINT_ERR("Invalid CB format: %d, disabling CB.\n", surf->base.format); 2553 } 2554 assert(format != V_028C70_COLOR_INVALID); 2555 swap = si_translate_colorswap(sctx->gfx_level, surf->base.format, false); 2556 endian = si_colorformat_endian_swap(format); 2557 2558 /* blend clamp should be set for all NORM/SRGB types */ 2559 if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || 2560 ntype == V_028C70_NUMBER_SRGB) 2561 blend_clamp = 1; 2562 2563 /* set blend bypass according to docs if SINT/UINT or 2564 8/24 COLOR variants */ 2565 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 2566 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 2567 format == V_028C70_COLOR_X24_8_32_FLOAT) { 2568 blend_clamp = 0; 2569 blend_bypass = 1; 2570 } 2571 2572 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) { 2573 if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_8_8 || 2574 format == V_028C70_COLOR_8_8_8_8) 2575 surf->color_is_int8 = true; 2576 else if (format == V_028C70_COLOR_10_10_10_2 || format == V_028C70_COLOR_2_10_10_10) 2577 surf->color_is_int10 = true; 2578 } 2579 2580 color_info = 2581 S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) | 2582 S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) | 2583 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM && 2584 ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 && 2585 format != V_028C70_COLOR_24_8) | 2586 S_028C70_NUMBER_TYPE(ntype); 2587 2588 if (sctx->gfx_level >= GFX11) { 2589 assert(!SI_BIG_ENDIAN); 2590 color_info |= S_028C70_FORMAT_GFX11(format); 2591 } else { 2592 color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian); 2593 } 2594 2595 /* Intensity is implemented as Red, so treat it that way. */ 2596 color_attrib = sctx->gfx_level >= GFX11 ? 2597 S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1 || util_format_is_intensity(surf->base.format)): 2598 S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1 || util_format_is_intensity(surf->base.format)); 2599 2600 if (tex->buffer.b.b.nr_samples > 1) { 2601 unsigned log_samples = util_logbase2(tex->buffer.b.b.nr_samples); 2602 unsigned log_fragments = util_logbase2(tex->buffer.b.b.nr_storage_samples); 2603 2604 if (sctx->gfx_level >= GFX11) { 2605 color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_fragments); 2606 } else { 2607 color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_fragments); 2608 2609 if (tex->surface.fmask_offset) { 2610 color_info |= S_028C70_COMPRESSION(1); 2611 unsigned fmask_bankh = util_logbase2(tex->surface.u.legacy.color.fmask.bankh); 2612 2613 if (sctx->gfx_level == GFX6) { 2614 /* due to a hw bug, FMASK_BANK_HEIGHT must be set on GFX6 too */ 2615 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh); 2616 } 2617 } 2618 } 2619 } 2620 2621 /* amdvlk: [min-compressed-block-size] should be set to 32 for dGPU and 2622 * 64 for APU because all of our APUs to date use DIMMs which have 2623 * a request granularity size of 64B while all other chips have a 2624 * 32B request size */ 2625 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B; 2626 if (!sctx->screen->info.has_dedicated_vram) 2627 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B; 2628 2629 if (sctx->gfx_level >= GFX10) { 2630 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | 2631 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) | 2632 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 2633 S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.color.dcc.independent_64B_blocks); 2634 if (sctx->gfx_level >= GFX11) 2635 surf->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(tex->surface.u.gfx9.color.dcc.independent_128B_blocks); 2636 else 2637 surf->cb_dcc_control |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(tex->surface.u.gfx9.color.dcc.independent_128B_blocks); 2638 } else if (sctx->gfx_level >= GFX8) { 2639 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B; 2640 2641 if (tex->buffer.b.b.nr_storage_samples > 1) { 2642 if (tex->surface.bpe == 1) 2643 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2644 else if (tex->surface.bpe == 2) 2645 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 2646 } 2647 2648 surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 2649 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) | 2650 S_028C78_INDEPENDENT_64B_BLOCKS(1); 2651 } 2652 2653 /* This must be set for fast clear to work without FMASK. */ 2654 if (!tex->surface.fmask_size && sctx->gfx_level == GFX6) { 2655 unsigned bankh = util_logbase2(tex->surface.u.legacy.bankh); 2656 color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 2657 } 2658 2659 /* GFX10 field has the same base shift as the GFX6 field */ 2660 unsigned color_view = S_028C6C_SLICE_START(surf->base.u.tex.first_layer) | 2661 S_028C6C_SLICE_MAX_GFX10(surf->base.u.tex.last_layer); 2662 unsigned mip0_width = surf->width0 - 1; 2663 unsigned mip0_height = surf->height0 - 1; 2664 unsigned mip0_depth = util_max_layer(&tex->buffer.b.b, 0); 2665 2666 if (sctx->gfx_level >= GFX10) { 2667 color_view |= S_028C6C_MIP_LEVEL_GFX10(surf->base.u.tex.level); 2668 2669 surf->cb_color_attrib3 = S_028EE0_MIP0_DEPTH(mip0_depth) | 2670 S_028EE0_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type) | 2671 S_028EE0_RESOURCE_LEVEL(sctx->gfx_level >= GFX11 ? 0 : 1); 2672 } else if (sctx->gfx_level == GFX9) { 2673 color_view |= S_028C6C_MIP_LEVEL_GFX9(surf->base.u.tex.level); 2674 color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | 2675 S_028C74_RESOURCE_TYPE(tex->surface.u.gfx9.resource_type); 2676 } 2677 2678 if (sctx->gfx_level >= GFX9) { 2679 surf->cb_color_attrib2 = S_028C68_MIP0_WIDTH(mip0_width) | 2680 S_028C68_MIP0_HEIGHT(mip0_height) | 2681 S_028C68_MAX_MIP(tex->buffer.b.b.last_level); 2682 } 2683 2684 surf->cb_color_view = color_view; 2685 surf->cb_color_info = color_info; 2686 surf->cb_color_attrib = color_attrib; 2687 2688 /* Determine pixel shader export format */ 2689 si_choose_spi_color_formats(surf, format, swap, ntype, tex->is_depth); 2690 2691 surf->color_initialized = true; 2692} 2693 2694static void si_init_depth_surface(struct si_context *sctx, struct si_surface *surf) 2695{ 2696 struct si_texture *tex = (struct si_texture *)surf->base.texture; 2697 unsigned level = surf->base.u.tex.level; 2698 unsigned format, stencil_format; 2699 uint32_t z_info, s_info; 2700 2701 format = si_translate_dbformat(tex->db_render_format); 2702 stencil_format = tex->surface.has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID; 2703 2704 assert(format != V_028040_Z_INVALID); 2705 if (format == V_028040_Z_INVALID) 2706 PRINT_ERR("Invalid DB format: %d, disabling DB.\n", tex->buffer.b.b.format); 2707 2708 surf->db_depth_view = S_028008_SLICE_START(surf->base.u.tex.first_layer) | 2709 S_028008_SLICE_MAX(surf->base.u.tex.last_layer); 2710 surf->db_htile_data_base = 0; 2711 surf->db_htile_surface = 0; 2712 2713 if (sctx->gfx_level >= GFX10) { 2714 surf->db_depth_view |= S_028008_SLICE_START_HI(surf->base.u.tex.first_layer >> 11) | 2715 S_028008_SLICE_MAX_HI(surf->base.u.tex.last_layer >> 11); 2716 } 2717 2718 if (sctx->gfx_level >= GFX9) { 2719 assert(tex->surface.u.gfx9.surf_offset == 0); 2720 surf->db_depth_base = tex->buffer.gpu_address >> 8; 2721 surf->db_stencil_base = (tex->buffer.gpu_address + tex->surface.u.gfx9.zs.stencil_offset) >> 8; 2722 z_info = S_028038_FORMAT(format) | 2723 S_028038_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)) | 2724 S_028038_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 2725 S_028038_MAXMIP(tex->buffer.b.b.last_level) | 2726 S_028040_ITERATE_256(sctx->gfx_level >= GFX11); 2727 s_info = S_02803C_FORMAT(stencil_format) | 2728 S_02803C_SW_MODE(tex->surface.u.gfx9.zs.stencil_swizzle_mode) | 2729 S_028044_ITERATE_256(sctx->gfx_level >= GFX11); 2730 2731 if (sctx->gfx_level == GFX9) { 2732 surf->db_z_info2 = S_028068_EPITCH(tex->surface.u.gfx9.epitch); 2733 surf->db_stencil_info2 = S_02806C_EPITCH(tex->surface.u.gfx9.zs.stencil_epitch); 2734 } 2735 surf->db_depth_view |= S_028008_MIPID(level); 2736 surf->db_depth_size = 2737 S_02801C_X_MAX(tex->buffer.b.b.width0 - 1) | S_02801C_Y_MAX(tex->buffer.b.b.height0 - 1); 2738 2739 if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) { 2740 z_info |= S_028038_TILE_SURFACE_ENABLE(1) | S_028038_ALLOW_EXPCLEAR(1); 2741 s_info |= S_02803C_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled); 2742 2743 if (tex->surface.has_stencil && !tex->htile_stencil_disabled) { 2744 /* Stencil buffer workaround ported from the GFX6-GFX8 code. 2745 * See that for explanation. 2746 */ 2747 s_info |= S_02803C_ALLOW_EXPCLEAR(tex->buffer.b.b.nr_samples <= 1); 2748 } 2749 2750 surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 2751 surf->db_htile_surface = 2752 S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1); 2753 if (sctx->gfx_level == GFX9) { 2754 surf->db_htile_surface |= S_028ABC_RB_ALIGNED(1); 2755 } 2756 } 2757 } else { 2758 /* GFX6-GFX8 */ 2759 struct legacy_surf_level *levelinfo = &tex->surface.u.legacy.level[level]; 2760 2761 assert(levelinfo->nblk_x % 8 == 0 && levelinfo->nblk_y % 8 == 0); 2762 2763 surf->db_depth_base = 2764 (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.level[level].offset_256B; 2765 surf->db_stencil_base = 2766 (tex->buffer.gpu_address >> 8) + tex->surface.u.legacy.zs.stencil_level[level].offset_256B; 2767 2768 z_info = 2769 S_028040_FORMAT(format) | S_028040_NUM_SAMPLES(util_logbase2(tex->buffer.b.b.nr_samples)); 2770 s_info = S_028044_FORMAT(stencil_format); 2771 surf->db_depth_info = 0; 2772 2773 if (sctx->gfx_level >= GFX7) { 2774 struct radeon_info *info = &sctx->screen->info; 2775 unsigned index = tex->surface.u.legacy.tiling_index[level]; 2776 unsigned stencil_index = tex->surface.u.legacy.zs.stencil_tiling_index[level]; 2777 unsigned macro_index = tex->surface.u.legacy.macro_tile_index; 2778 unsigned tile_mode = info->si_tile_mode_array[index]; 2779 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 2780 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 2781 2782 surf->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 2783 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 2784 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 2785 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 2786 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 2787 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 2788 z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 2789 s_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 2790 } else { 2791 unsigned tile_mode_index = si_tile_mode_index(tex, level, false); 2792 z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 2793 tile_mode_index = si_tile_mode_index(tex, level, true); 2794 s_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 2795 } 2796 2797 surf->db_depth_size = S_028058_PITCH_TILE_MAX((levelinfo->nblk_x / 8) - 1) | 2798 S_028058_HEIGHT_TILE_MAX((levelinfo->nblk_y / 8) - 1); 2799 surf->db_depth_slice = 2800 S_02805C_SLICE_TILE_MAX((levelinfo->nblk_x * levelinfo->nblk_y) / 64 - 1); 2801 2802 if (si_htile_enabled(tex, level, PIPE_MASK_ZS)) { 2803 z_info |= S_028040_TILE_SURFACE_ENABLE(1) | S_028040_ALLOW_EXPCLEAR(1); 2804 s_info |= S_028044_TILE_STENCIL_DISABLE(tex->htile_stencil_disabled); 2805 2806 if (tex->surface.has_stencil) { 2807 /* Workaround: For a not yet understood reason, the 2808 * combination of MSAA, fast stencil clear and stencil 2809 * decompress messes with subsequent stencil buffer 2810 * uses. Problem was reproduced on Verde, Bonaire, 2811 * Tonga, and Carrizo. 2812 * 2813 * Disabling EXPCLEAR works around the problem. 2814 * 2815 * Check piglit's arb_texture_multisample-stencil-clear 2816 * test if you want to try changing this. 2817 */ 2818 if (tex->buffer.b.b.nr_samples <= 1) 2819 s_info |= S_028044_ALLOW_EXPCLEAR(1); 2820 } 2821 2822 surf->db_htile_data_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 2823 surf->db_htile_surface = S_028ABC_FULL_CACHE(1); 2824 } 2825 } 2826 2827 surf->db_z_info = z_info; 2828 surf->db_stencil_info = s_info; 2829 2830 surf->depth_initialized = true; 2831} 2832 2833void si_set_sampler_depth_decompress_mask(struct si_context *sctx, struct si_texture *tex) 2834{ 2835 /* Check all sampler bindings in all shaders where depth textures are bound, and update 2836 * which samplers should be decompressed. 2837 */ 2838 u_foreach_bit(sh, sctx->shader_has_depth_tex) { 2839 u_foreach_bit(i, sctx->samplers[sh].has_depth_tex_mask) { 2840 if (sctx->samplers[sh].views[i]->texture == &tex->buffer.b.b) { 2841 sctx->samplers[sh].needs_depth_decompress_mask |= 1 << i; 2842 sctx->shader_needs_decompress_mask |= 1 << sh; 2843 } 2844 } 2845 } 2846} 2847 2848void si_update_fb_dirtiness_after_rendering(struct si_context *sctx) 2849{ 2850 if (sctx->decompression_enabled) 2851 return; 2852 2853 if (sctx->framebuffer.state.zsbuf) { 2854 struct pipe_surface *surf = sctx->framebuffer.state.zsbuf; 2855 struct si_texture *tex = (struct si_texture *)surf->texture; 2856 2857 tex->dirty_level_mask |= 1 << surf->u.tex.level; 2858 2859 if (tex->surface.has_stencil) 2860 tex->stencil_dirty_level_mask |= 1 << surf->u.tex.level; 2861 2862 si_set_sampler_depth_decompress_mask(sctx, tex); 2863 } 2864 2865 unsigned compressed_cb_mask = sctx->framebuffer.compressed_cb_mask; 2866 while (compressed_cb_mask) { 2867 unsigned i = u_bit_scan(&compressed_cb_mask); 2868 struct pipe_surface *surf = sctx->framebuffer.state.cbufs[i]; 2869 struct si_texture *tex = (struct si_texture *)surf->texture; 2870 2871 if (tex->surface.fmask_offset) { 2872 tex->dirty_level_mask |= 1 << surf->u.tex.level; 2873 tex->fmask_is_identity = false; 2874 } 2875 } 2876} 2877 2878static void si_dec_framebuffer_counters(const struct pipe_framebuffer_state *state) 2879{ 2880 for (int i = 0; i < state->nr_cbufs; ++i) { 2881 struct si_surface *surf = NULL; 2882 struct si_texture *tex; 2883 2884 if (!state->cbufs[i]) 2885 continue; 2886 surf = (struct si_surface *)state->cbufs[i]; 2887 tex = (struct si_texture *)surf->base.texture; 2888 2889 p_atomic_dec(&tex->framebuffers_bound); 2890 } 2891} 2892 2893void si_mark_display_dcc_dirty(struct si_context *sctx, struct si_texture *tex) 2894{ 2895 if (!tex->surface.display_dcc_offset || tex->displayable_dcc_dirty) 2896 return; 2897 2898 if (!(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) { 2899 struct hash_entry *entry = _mesa_hash_table_search(sctx->dirty_implicit_resources, tex); 2900 if (!entry) { 2901 struct pipe_resource *dummy = NULL; 2902 pipe_resource_reference(&dummy, &tex->buffer.b.b); 2903 _mesa_hash_table_insert(sctx->dirty_implicit_resources, tex, tex); 2904 } 2905 } 2906 tex->displayable_dcc_dirty = true; 2907} 2908 2909static void si_update_display_dcc_dirty(struct si_context *sctx) 2910{ 2911 const struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 2912 2913 for (unsigned i = 0; i < state->nr_cbufs; i++) { 2914 if (state->cbufs[i]) 2915 si_mark_display_dcc_dirty(sctx, (struct si_texture *)state->cbufs[i]->texture); 2916 } 2917} 2918 2919static void si_set_framebuffer_state(struct pipe_context *ctx, 2920 const struct pipe_framebuffer_state *state) 2921{ 2922 struct si_context *sctx = (struct si_context *)ctx; 2923 struct si_surface *surf = NULL; 2924 struct si_texture *tex; 2925 bool old_any_dst_linear = sctx->framebuffer.any_dst_linear; 2926 unsigned old_nr_samples = sctx->framebuffer.nr_samples; 2927 unsigned old_colorbuf_enabled_4bit = sctx->framebuffer.colorbuf_enabled_4bit; 2928 bool old_has_zsbuf = !!sctx->framebuffer.state.zsbuf; 2929 bool old_has_stencil = 2930 old_has_zsbuf && 2931 ((struct si_texture *)sctx->framebuffer.state.zsbuf->texture)->surface.has_stencil; 2932 int i; 2933 2934 /* Reject zero-sized framebuffers due to a hw bug on GFX6 that occurs 2935 * when PA_SU_HARDWARE_SCREEN_OFFSET != 0 and any_scissor.BR_X/Y <= 0. 2936 * We could implement the full workaround here, but it's a useless case. 2937 */ 2938 if ((!state->width || !state->height) && (state->nr_cbufs || state->zsbuf)) { 2939 unreachable("the framebuffer shouldn't have zero area"); 2940 return; 2941 } 2942 2943 si_update_fb_dirtiness_after_rendering(sctx); 2944 2945 /* Disable DCC if the formats are incompatible. */ 2946 for (i = 0; i < state->nr_cbufs; i++) { 2947 if (!state->cbufs[i]) 2948 continue; 2949 2950 surf = (struct si_surface *)state->cbufs[i]; 2951 tex = (struct si_texture *)surf->base.texture; 2952 2953 if (!surf->dcc_incompatible) 2954 continue; 2955 2956 if (vi_dcc_enabled(tex, surf->base.u.tex.level)) 2957 if (!si_texture_disable_dcc(sctx, tex)) 2958 si_decompress_dcc(sctx, tex); 2959 2960 surf->dcc_incompatible = false; 2961 } 2962 2963 /* Only flush TC when changing the framebuffer state, because 2964 * the only client not using TC that can change textures is 2965 * the framebuffer. 2966 * 2967 * Wait for compute shaders because of possible transitions: 2968 * - FB write -> shader read 2969 * - shader write -> FB read 2970 * 2971 * Wait for draws because of possible transitions: 2972 * - texture -> render (eg: glBlitFramebuffer(with src=dst) then glDraw*) 2973 * 2974 * DB caches are flushed on demand (using si_decompress_textures). 2975 * 2976 * When MSAA is enabled, CB and TC caches are flushed on demand 2977 * (after FMASK decompression). Shader write -> FB read transitions 2978 * cannot happen for MSAA textures, because MSAA shader images are 2979 * not supported. 2980 * 2981 * Only flush and wait for CB if there is actually a bound color buffer. 2982 */ 2983 if (sctx->framebuffer.uncompressed_cb_mask) { 2984 si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 2985 sctx->framebuffer.CB_has_shader_readable_metadata, 2986 sctx->framebuffer.all_DCC_pipe_aligned); 2987 } 2988 2989 sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH | SI_CONTEXT_PS_PARTIAL_FLUSH; 2990 2991 /* u_blitter doesn't invoke depth decompression when it does multiple 2992 * blits in a row, but the only case when it matters for DB is when 2993 * doing generate_mipmap. So here we flush DB manually between 2994 * individual generate_mipmap blits. 2995 * Note that lower mipmap levels aren't compressed. 2996 */ 2997 if (sctx->generate_mipmap_for_depth) { 2998 si_make_DB_shader_coherent(sctx, 1, false, sctx->framebuffer.DB_has_shader_readable_metadata); 2999 } else if (sctx->gfx_level == GFX9) { 3000 /* It appears that DB metadata "leaks" in a sequence of: 3001 * - depth clear 3002 * - DCC decompress for shader image writes (with DB disabled) 3003 * - render with DEPTH_BEFORE_SHADER=1 3004 * Flushing DB metadata works around the problem. 3005 */ 3006 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB_META; 3007 } 3008 3009 /* Take the maximum of the old and new count. If the new count is lower, 3010 * dirtying is needed to disable the unbound colorbuffers. 3011 */ 3012 sctx->framebuffer.dirty_cbufs |= 3013 (1 << MAX2(sctx->framebuffer.state.nr_cbufs, state->nr_cbufs)) - 1; 3014 sctx->framebuffer.dirty_zsbuf |= sctx->framebuffer.state.zsbuf != state->zsbuf; 3015 3016 si_dec_framebuffer_counters(&sctx->framebuffer.state); 3017 util_copy_framebuffer_state(&sctx->framebuffer.state, state); 3018 3019 sctx->framebuffer.colorbuf_enabled_4bit = 0; 3020 sctx->framebuffer.spi_shader_col_format = 0; 3021 sctx->framebuffer.spi_shader_col_format_alpha = 0; 3022 sctx->framebuffer.spi_shader_col_format_blend = 0; 3023 sctx->framebuffer.spi_shader_col_format_blend_alpha = 0; 3024 sctx->framebuffer.color_is_int8 = 0; 3025 sctx->framebuffer.color_is_int10 = 0; 3026 3027 sctx->framebuffer.compressed_cb_mask = 0; 3028 sctx->framebuffer.uncompressed_cb_mask = 0; 3029 sctx->framebuffer.nr_samples = util_framebuffer_get_num_samples(state); 3030 sctx->framebuffer.nr_color_samples = sctx->framebuffer.nr_samples; 3031 sctx->framebuffer.log_samples = util_logbase2(sctx->framebuffer.nr_samples); 3032 sctx->framebuffer.any_dst_linear = false; 3033 sctx->framebuffer.CB_has_shader_readable_metadata = false; 3034 sctx->framebuffer.DB_has_shader_readable_metadata = false; 3035 sctx->framebuffer.all_DCC_pipe_aligned = true; 3036 sctx->framebuffer.has_dcc_msaa = false; 3037 sctx->framebuffer.min_bytes_per_pixel = 0; 3038 3039 for (i = 0; i < state->nr_cbufs; i++) { 3040 if (!state->cbufs[i]) 3041 continue; 3042 3043 surf = (struct si_surface *)state->cbufs[i]; 3044 tex = (struct si_texture *)surf->base.texture; 3045 3046 if (!surf->color_initialized) { 3047 si_initialize_color_surface(sctx, surf); 3048 } 3049 3050 sctx->framebuffer.colorbuf_enabled_4bit |= 0xf << (i * 4); 3051 sctx->framebuffer.spi_shader_col_format |= surf->spi_shader_col_format << (i * 4); 3052 sctx->framebuffer.spi_shader_col_format_alpha |= surf->spi_shader_col_format_alpha << (i * 4); 3053 sctx->framebuffer.spi_shader_col_format_blend |= surf->spi_shader_col_format_blend << (i * 4); 3054 sctx->framebuffer.spi_shader_col_format_blend_alpha |= surf->spi_shader_col_format_blend_alpha 3055 << (i * 4); 3056 3057 if (surf->color_is_int8) 3058 sctx->framebuffer.color_is_int8 |= 1 << i; 3059 if (surf->color_is_int10) 3060 sctx->framebuffer.color_is_int10 |= 1 << i; 3061 3062 if (tex->surface.fmask_offset) 3063 sctx->framebuffer.compressed_cb_mask |= 1 << i; 3064 else 3065 sctx->framebuffer.uncompressed_cb_mask |= 1 << i; 3066 3067 /* Don't update nr_color_samples for non-AA buffers. 3068 * (e.g. destination of MSAA resolve) 3069 */ 3070 if (tex->buffer.b.b.nr_samples >= 2 && 3071 tex->buffer.b.b.nr_storage_samples < tex->buffer.b.b.nr_samples) { 3072 sctx->framebuffer.nr_color_samples = 3073 MIN2(sctx->framebuffer.nr_color_samples, tex->buffer.b.b.nr_storage_samples); 3074 sctx->framebuffer.nr_color_samples = MAX2(1, sctx->framebuffer.nr_color_samples); 3075 } 3076 3077 if (tex->surface.is_linear) 3078 sctx->framebuffer.any_dst_linear = true; 3079 3080 if (vi_dcc_enabled(tex, surf->base.u.tex.level)) { 3081 sctx->framebuffer.CB_has_shader_readable_metadata = true; 3082 3083 if (sctx->gfx_level >= GFX9 && !tex->surface.u.gfx9.color.dcc.pipe_aligned) 3084 sctx->framebuffer.all_DCC_pipe_aligned = false; 3085 3086 if (tex->buffer.b.b.nr_storage_samples >= 2) 3087 sctx->framebuffer.has_dcc_msaa = true; 3088 } 3089 3090 si_context_add_resource_size(sctx, surf->base.texture); 3091 3092 p_atomic_inc(&tex->framebuffers_bound); 3093 3094 /* Update the minimum but don't keep 0. */ 3095 if (!sctx->framebuffer.min_bytes_per_pixel || 3096 tex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel) 3097 sctx->framebuffer.min_bytes_per_pixel = tex->surface.bpe; 3098 } 3099 3100 struct si_texture *zstex = NULL; 3101 3102 if (state->zsbuf) { 3103 surf = (struct si_surface *)state->zsbuf; 3104 zstex = (struct si_texture *)surf->base.texture; 3105 3106 if (!surf->depth_initialized) { 3107 si_init_depth_surface(sctx, surf); 3108 } 3109 3110 if (vi_tc_compat_htile_enabled(zstex, surf->base.u.tex.level, PIPE_MASK_ZS)) 3111 sctx->framebuffer.DB_has_shader_readable_metadata = true; 3112 3113 si_context_add_resource_size(sctx, surf->base.texture); 3114 3115 /* Update the minimum but don't keep 0. */ 3116 if (!sctx->framebuffer.min_bytes_per_pixel || 3117 zstex->surface.bpe < sctx->framebuffer.min_bytes_per_pixel) 3118 sctx->framebuffer.min_bytes_per_pixel = zstex->surface.bpe; 3119 } 3120 3121 si_update_ps_colorbuf0_slot(sctx); 3122 si_update_poly_offset_state(sctx); 3123 si_mark_atom_dirty(sctx, &sctx->atoms.s.cb_render_state); 3124 si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 3125 3126 /* NGG cull state uses the sample count. */ 3127 if (sctx->screen->use_ngg_culling) 3128 si_mark_atom_dirty(sctx, &sctx->atoms.s.ngg_cull_state); 3129 3130 if (sctx->screen->dpbb_allowed) 3131 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 3132 3133 if (sctx->framebuffer.any_dst_linear != old_any_dst_linear) 3134 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3135 3136 if (sctx->screen->has_out_of_order_rast && 3137 (sctx->framebuffer.colorbuf_enabled_4bit != old_colorbuf_enabled_4bit || 3138 !!sctx->framebuffer.state.zsbuf != old_has_zsbuf || 3139 (zstex && zstex->surface.has_stencil != old_has_stencil))) 3140 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3141 3142 if (sctx->framebuffer.nr_samples != old_nr_samples) { 3143 struct pipe_constant_buffer constbuf = {0}; 3144 3145 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3146 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 3147 3148 if (!sctx->sample_pos_buffer) { 3149 sctx->sample_pos_buffer = pipe_buffer_create_with_data(&sctx->b, 0, PIPE_USAGE_DEFAULT, 3150 sizeof(sctx->sample_positions), 3151 &sctx->sample_positions); 3152 } 3153 constbuf.buffer = sctx->sample_pos_buffer; 3154 3155 /* Set sample locations as fragment shader constants. */ 3156 switch (sctx->framebuffer.nr_samples) { 3157 case 1: 3158 constbuf.buffer_offset = 0; 3159 break; 3160 case 2: 3161 constbuf.buffer_offset = 3162 (ubyte *)sctx->sample_positions.x2 - (ubyte *)sctx->sample_positions.x1; 3163 break; 3164 case 4: 3165 constbuf.buffer_offset = 3166 (ubyte *)sctx->sample_positions.x4 - (ubyte *)sctx->sample_positions.x1; 3167 break; 3168 case 8: 3169 constbuf.buffer_offset = 3170 (ubyte *)sctx->sample_positions.x8 - (ubyte *)sctx->sample_positions.x1; 3171 break; 3172 case 16: 3173 constbuf.buffer_offset = 3174 (ubyte *)sctx->sample_positions.x16 - (ubyte *)sctx->sample_positions.x1; 3175 break; 3176 default: 3177 PRINT_ERR("Requested an invalid number of samples %i.\n", sctx->framebuffer.nr_samples); 3178 assert(0); 3179 } 3180 constbuf.buffer_size = sctx->framebuffer.nr_samples * 2 * 4; 3181 si_set_internal_const_buffer(sctx, SI_PS_CONST_SAMPLE_POSITIONS, &constbuf); 3182 3183 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_sample_locs); 3184 } 3185 3186 si_ps_key_update_framebuffer(sctx); 3187 si_ps_key_update_framebuffer_blend(sctx); 3188 si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 3189 si_update_ps_inputs_read_or_disabled(sctx); 3190 sctx->do_update_shaders = true; 3191 3192 if (!sctx->decompression_enabled) { 3193 /* Prevent textures decompression when the framebuffer state 3194 * changes come from the decompression passes themselves. 3195 */ 3196 sctx->need_check_render_feedback = true; 3197 } 3198} 3199 3200static void si_emit_framebuffer_state(struct si_context *sctx) 3201{ 3202 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 3203 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 3204 unsigned i, nr_cbufs = state->nr_cbufs; 3205 struct si_texture *tex = NULL; 3206 struct si_surface *cb = NULL; 3207 unsigned cb_color_info = 0; 3208 3209 radeon_begin(cs); 3210 3211 /* Colorbuffers. */ 3212 for (i = 0; i < nr_cbufs; i++) { 3213 uint64_t cb_color_base, cb_color_fmask, cb_color_cmask, cb_dcc_base; 3214 unsigned cb_color_attrib; 3215 3216 if (!(sctx->framebuffer.dirty_cbufs & (1 << i))) 3217 continue; 3218 3219 cb = (struct si_surface *)state->cbufs[i]; 3220 if (!cb) { 3221 radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 3222 sctx->gfx_level >= GFX11 ? 3223 S_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID) : 3224 S_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID)); 3225 continue; 3226 } 3227 3228 tex = (struct si_texture *)cb->base.texture; 3229 radeon_add_to_buffer_list( 3230 sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC | 3231 (tex->buffer.b.b.nr_samples > 1 ? RADEON_PRIO_COLOR_BUFFER_MSAA : RADEON_PRIO_COLOR_BUFFER)); 3232 3233 if (tex->cmask_buffer && tex->cmask_buffer != &tex->buffer) { 3234 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, tex->cmask_buffer, 3235 RADEON_USAGE_READWRITE | RADEON_USAGE_NEEDS_IMPLICIT_SYNC | 3236 RADEON_PRIO_SEPARATE_META); 3237 } 3238 3239 /* Compute mutable surface parameters. */ 3240 cb_color_base = tex->buffer.gpu_address >> 8; 3241 cb_color_fmask = 0; 3242 cb_color_cmask = tex->cmask_base_address_reg; 3243 cb_dcc_base = 0; 3244 cb_color_info = cb->cb_color_info | tex->cb_color_info; 3245 cb_color_attrib = cb->cb_color_attrib; 3246 3247 if (tex->swap_rgb_to_bgr) { 3248 /* Swap R and B channels. */ 3249 static unsigned rgb_to_bgr[4] = { 3250 [V_028C70_SWAP_STD] = V_028C70_SWAP_ALT, 3251 [V_028C70_SWAP_ALT] = V_028C70_SWAP_STD, 3252 [V_028C70_SWAP_STD_REV] = V_028C70_SWAP_ALT_REV, 3253 [V_028C70_SWAP_ALT_REV] = V_028C70_SWAP_STD_REV, 3254 }; 3255 unsigned swap = rgb_to_bgr[G_028C70_COMP_SWAP(cb_color_info)]; 3256 3257 cb_color_info &= C_028C70_COMP_SWAP; 3258 cb_color_info |= S_028C70_COMP_SWAP(swap); 3259 } 3260 3261 if (sctx->gfx_level < GFX11 && cb->base.u.tex.level > 0) 3262 cb_color_info &= C_028C70_FAST_CLEAR; 3263 3264 if (tex->surface.fmask_offset) { 3265 cb_color_fmask = (tex->buffer.gpu_address + tex->surface.fmask_offset) >> 8; 3266 cb_color_fmask |= tex->surface.fmask_tile_swizzle; 3267 } 3268 3269 /* Set up DCC. */ 3270 if (vi_dcc_enabled(tex, cb->base.u.tex.level)) { 3271 bool is_msaa_resolve_dst = state->cbufs[0] && state->cbufs[0]->texture->nr_samples > 1 && 3272 state->cbufs[1] == &cb->base && 3273 state->cbufs[1]->texture->nr_samples <= 1; 3274 3275 /* CB can't do MSAA resolve on gfx11. */ 3276 assert(!is_msaa_resolve_dst || sctx->gfx_level < GFX11); 3277 3278 if (!is_msaa_resolve_dst && sctx->gfx_level < GFX11) 3279 cb_color_info |= S_028C70_DCC_ENABLE(1); 3280 3281 cb_dcc_base = (tex->buffer.gpu_address + tex->surface.meta_offset) >> 8; 3282 3283 unsigned dcc_tile_swizzle = tex->surface.tile_swizzle; 3284 dcc_tile_swizzle &= ((1 << tex->surface.meta_alignment_log2) - 1) >> 8; 3285 cb_dcc_base |= dcc_tile_swizzle; 3286 } 3287 3288 if (sctx->gfx_level >= GFX11) { 3289 unsigned cb_color_attrib3, cb_fdcc_control; 3290 3291 /* Set mutable surface parameters. */ 3292 cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3293 cb_color_base |= tex->surface.tile_swizzle; 3294 3295 cb_color_attrib3 = cb->cb_color_attrib3 | 3296 S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 3297 S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned); 3298 cb_fdcc_control = cb->cb_dcc_control | 3299 S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) | 3300 S_028C78_FDCC_ENABLE(vi_dcc_enabled(tex, cb->base.u.tex.level)); 3301 3302 radeon_set_context_reg_seq(R_028C6C_CB_COLOR0_VIEW + i * 0x3C, 4); 3303 radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3304 radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3305 radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3306 radeon_emit(cb_fdcc_control); /* CB_COLOR0_FDCC_CONTROL */ 3307 3308 radeon_set_context_reg(R_028C60_CB_COLOR0_BASE + i * 0x3C, cb_color_base); 3309 radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32); 3310 radeon_set_context_reg(R_028C94_CB_COLOR0_DCC_BASE + i * 0x3C, cb_dcc_base); 3311 radeon_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32); 3312 radeon_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2); 3313 radeon_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3); 3314 } else if (sctx->gfx_level >= GFX10) { 3315 unsigned cb_color_attrib3; 3316 3317 /* Set mutable surface parameters. */ 3318 cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3319 cb_color_base |= tex->surface.tile_swizzle; 3320 if (!tex->surface.fmask_offset) 3321 cb_color_fmask = cb_color_base; 3322 if (cb->base.u.tex.level > 0) 3323 cb_color_cmask = cb_color_base; 3324 3325 cb_color_attrib3 = cb->cb_color_attrib3 | 3326 S_028EE0_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 3327 S_028EE0_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 3328 S_028EE0_CMASK_PIPE_ALIGNED(1) | 3329 S_028EE0_DCC_PIPE_ALIGNED(tex->surface.u.gfx9.color.dcc.pipe_aligned); 3330 3331 radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 14); 3332 radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 3333 radeon_emit(0); /* hole */ 3334 radeon_emit(0); /* hole */ 3335 radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3336 radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3337 radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3338 radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3339 radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 3340 radeon_emit(0); /* hole */ 3341 radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 3342 radeon_emit(0); /* hole */ 3343 radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3344 radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3345 radeon_emit(cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 3346 3347 radeon_set_context_reg(R_028E40_CB_COLOR0_BASE_EXT + i * 4, cb_color_base >> 32); 3348 radeon_set_context_reg(R_028E60_CB_COLOR0_CMASK_BASE_EXT + i * 4, 3349 cb_color_cmask >> 32); 3350 radeon_set_context_reg(R_028E80_CB_COLOR0_FMASK_BASE_EXT + i * 4, 3351 cb_color_fmask >> 32); 3352 radeon_set_context_reg(R_028EA0_CB_COLOR0_DCC_BASE_EXT + i * 4, cb_dcc_base >> 32); 3353 radeon_set_context_reg(R_028EC0_CB_COLOR0_ATTRIB2 + i * 4, cb->cb_color_attrib2); 3354 radeon_set_context_reg(R_028EE0_CB_COLOR0_ATTRIB3 + i * 4, cb_color_attrib3); 3355 } else if (sctx->gfx_level == GFX9) { 3356 struct gfx9_surf_meta_flags meta = { 3357 .rb_aligned = 1, 3358 .pipe_aligned = 1, 3359 }; 3360 3361 if (!tex->is_depth && tex->surface.meta_offset) 3362 meta = tex->surface.u.gfx9.color.dcc; 3363 3364 /* Set mutable surface parameters. */ 3365 cb_color_base += tex->surface.u.gfx9.surf_offset >> 8; 3366 cb_color_base |= tex->surface.tile_swizzle; 3367 if (!tex->surface.fmask_offset) 3368 cb_color_fmask = cb_color_base; 3369 if (cb->base.u.tex.level > 0) 3370 cb_color_cmask = cb_color_base; 3371 cb_color_attrib |= S_028C74_COLOR_SW_MODE(tex->surface.u.gfx9.swizzle_mode) | 3372 S_028C74_FMASK_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 3373 S_028C74_RB_ALIGNED(meta.rb_aligned) | 3374 S_028C74_PIPE_ALIGNED(meta.pipe_aligned); 3375 3376 radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 15); 3377 radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 3378 radeon_emit(S_028C64_BASE_256B(cb_color_base >> 32)); /* CB_COLOR0_BASE_EXT */ 3379 radeon_emit(cb->cb_color_attrib2); /* CB_COLOR0_ATTRIB2 */ 3380 radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3381 radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3382 radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3383 radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3384 radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 3385 radeon_emit(S_028C80_BASE_256B(cb_color_cmask >> 32)); /* CB_COLOR0_CMASK_BASE_EXT */ 3386 radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 3387 radeon_emit(S_028C88_BASE_256B(cb_color_fmask >> 32)); /* CB_COLOR0_FMASK_BASE_EXT */ 3388 radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3389 radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3390 radeon_emit(cb_dcc_base); /* CB_COLOR0_DCC_BASE */ 3391 radeon_emit(S_028C98_BASE_256B(cb_dcc_base >> 32)); /* CB_COLOR0_DCC_BASE_EXT */ 3392 3393 radeon_set_context_reg(R_0287A0_CB_MRT0_EPITCH + i * 4, 3394 S_0287A0_EPITCH(tex->surface.u.gfx9.epitch)); 3395 } else { 3396 /* Compute mutable surface parameters (GFX6-GFX8). */ 3397 const struct legacy_surf_level *level_info = 3398 &tex->surface.u.legacy.level[cb->base.u.tex.level]; 3399 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 3400 unsigned cb_color_pitch, cb_color_slice, cb_color_fmask_slice; 3401 3402 cb_color_base += level_info->offset_256B; 3403 /* Only macrotiled modes can set tile swizzle. */ 3404 if (level_info->mode == RADEON_SURF_MODE_2D) 3405 cb_color_base |= tex->surface.tile_swizzle; 3406 3407 if (!tex->surface.fmask_offset) 3408 cb_color_fmask = cb_color_base; 3409 if (cb->base.u.tex.level > 0) 3410 cb_color_cmask = cb_color_base; 3411 if (cb_dcc_base) 3412 cb_dcc_base += tex->surface.u.legacy.color.dcc_level[cb->base.u.tex.level].dcc_offset >> 8; 3413 3414 pitch_tile_max = level_info->nblk_x / 8 - 1; 3415 slice_tile_max = level_info->nblk_x * level_info->nblk_y / 64 - 1; 3416 tile_mode_index = si_tile_mode_index(tex, cb->base.u.tex.level, false); 3417 3418 cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index); 3419 cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 3420 cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 3421 3422 if (tex->surface.fmask_offset) { 3423 if (sctx->gfx_level >= GFX7) 3424 cb_color_pitch |= 3425 S_028C64_FMASK_TILE_MAX(tex->surface.u.legacy.color.fmask.pitch_in_pixels / 8 - 1); 3426 cb_color_attrib |= 3427 S_028C74_FMASK_TILE_MODE_INDEX(tex->surface.u.legacy.color.fmask.tiling_index); 3428 cb_color_fmask_slice = S_028C88_TILE_MAX(tex->surface.u.legacy.color.fmask.slice_tile_max); 3429 } else { 3430 /* This must be set for fast clear to work without FMASK. */ 3431 if (sctx->gfx_level >= GFX7) 3432 cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 3433 cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 3434 cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 3435 } 3436 3437 radeon_set_context_reg_seq(R_028C60_CB_COLOR0_BASE + i * 0x3C, 3438 sctx->gfx_level >= GFX8 ? 14 : 13); 3439 radeon_emit(cb_color_base); /* CB_COLOR0_BASE */ 3440 radeon_emit(cb_color_pitch); /* CB_COLOR0_PITCH */ 3441 radeon_emit(cb_color_slice); /* CB_COLOR0_SLICE */ 3442 radeon_emit(cb->cb_color_view); /* CB_COLOR0_VIEW */ 3443 radeon_emit(cb_color_info); /* CB_COLOR0_INFO */ 3444 radeon_emit(cb_color_attrib); /* CB_COLOR0_ATTRIB */ 3445 radeon_emit(cb->cb_dcc_control); /* CB_COLOR0_DCC_CONTROL */ 3446 radeon_emit(cb_color_cmask); /* CB_COLOR0_CMASK */ 3447 radeon_emit(tex->surface.u.legacy.color.cmask_slice_tile_max); /* CB_COLOR0_CMASK_SLICE */ 3448 radeon_emit(cb_color_fmask); /* CB_COLOR0_FMASK */ 3449 radeon_emit(cb_color_fmask_slice); /* CB_COLOR0_FMASK_SLICE */ 3450 radeon_emit(tex->color_clear_value[0]); /* CB_COLOR0_CLEAR_WORD0 */ 3451 radeon_emit(tex->color_clear_value[1]); /* CB_COLOR0_CLEAR_WORD1 */ 3452 3453 if (sctx->gfx_level >= GFX8) /* R_028C94_CB_COLOR0_DCC_BASE */ 3454 radeon_emit(cb_dcc_base); 3455 } 3456 } 3457 for (; i < 8; i++) 3458 if (sctx->framebuffer.dirty_cbufs & (1 << i)) 3459 radeon_set_context_reg(R_028C70_CB_COLOR0_INFO + i * 0x3C, 0); 3460 3461 /* ZS buffer. */ 3462 if (state->zsbuf && sctx->framebuffer.dirty_zsbuf) { 3463 struct si_surface *zb = (struct si_surface *)state->zsbuf; 3464 struct si_texture *tex = (struct si_texture *)zb->base.texture; 3465 unsigned db_z_info = zb->db_z_info; 3466 unsigned db_stencil_info = zb->db_stencil_info; 3467 unsigned db_htile_surface = zb->db_htile_surface; 3468 3469 radeon_add_to_buffer_list(sctx, &sctx->gfx_cs, &tex->buffer, RADEON_USAGE_READWRITE | 3470 (zb->base.texture->nr_samples > 1 ? RADEON_PRIO_DEPTH_BUFFER_MSAA 3471 : RADEON_PRIO_DEPTH_BUFFER)); 3472 bool tc_compat_htile = vi_tc_compat_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS); 3473 3474 /* Set fields dependent on tc_compatile_htile. */ 3475 if (sctx->gfx_level >= GFX9 && tc_compat_htile) { 3476 unsigned max_zplanes = 4; 3477 3478 if (tex->db_render_format == PIPE_FORMAT_Z16_UNORM && tex->buffer.b.b.nr_samples > 1) 3479 max_zplanes = 2; 3480 3481 if (sctx->gfx_level >= GFX10) { 3482 bool iterate256 = tex->buffer.b.b.nr_samples >= 2; 3483 db_z_info |= S_028040_ITERATE_FLUSH(1) | 3484 S_028040_ITERATE_256(iterate256); 3485 db_stencil_info |= S_028044_ITERATE_FLUSH(!tex->htile_stencil_disabled) | 3486 S_028044_ITERATE_256(iterate256); 3487 3488 /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */ 3489 if (sctx->screen->info.has_two_planes_iterate256_bug && iterate256 && 3490 !tex->htile_stencil_disabled && tex->buffer.b.b.nr_samples == 4) { 3491 max_zplanes = 1; 3492 } 3493 } else { 3494 db_z_info |= S_028038_ITERATE_FLUSH(1); 3495 db_stencil_info |= S_02803C_ITERATE_FLUSH(1); 3496 } 3497 3498 db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes + 1); 3499 } 3500 3501 unsigned level = zb->base.u.tex.level; 3502 3503 if (sctx->gfx_level >= GFX10) { 3504 radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 3505 radeon_set_context_reg(R_02801C_DB_DEPTH_SIZE_XY, zb->db_depth_size); 3506 3507 if (sctx->gfx_level >= GFX11) { 3508 radeon_set_context_reg_seq(R_028040_DB_Z_INFO, 6); 3509 } else { 3510 radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 7); 3511 radeon_emit(S_02803C_RESOURCE_LEVEL(1)); /* DB_DEPTH_INFO */ 3512 } 3513 radeon_emit(db_z_info | /* DB_Z_INFO */ 3514 S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 3515 radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 3516 radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 3517 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3518 radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3519 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3520 3521 radeon_set_context_reg_seq(R_028068_DB_Z_READ_BASE_HI, 5); 3522 radeon_emit(zb->db_depth_base >> 32); /* DB_Z_READ_BASE_HI */ 3523 radeon_emit(zb->db_stencil_base >> 32); /* DB_STENCIL_READ_BASE_HI */ 3524 radeon_emit(zb->db_depth_base >> 32); /* DB_Z_WRITE_BASE_HI */ 3525 radeon_emit(zb->db_stencil_base >> 32); /* DB_STENCIL_WRITE_BASE_HI */ 3526 radeon_emit(zb->db_htile_data_base >> 32); /* DB_HTILE_DATA_BASE_HI */ 3527 } else if (sctx->gfx_level == GFX9) { 3528 radeon_set_context_reg_seq(R_028014_DB_HTILE_DATA_BASE, 3); 3529 radeon_emit(zb->db_htile_data_base); /* DB_HTILE_DATA_BASE */ 3530 radeon_emit(S_028018_BASE_HI(zb->db_htile_data_base >> 32)); /* DB_HTILE_DATA_BASE_HI */ 3531 radeon_emit(zb->db_depth_size); /* DB_DEPTH_SIZE */ 3532 3533 radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 10); 3534 radeon_emit(db_z_info | /* DB_Z_INFO */ 3535 S_028038_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 3536 radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 3537 radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 3538 radeon_emit(S_028044_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_READ_BASE_HI */ 3539 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3540 radeon_emit(S_02804C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_READ_BASE_HI */ 3541 radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3542 radeon_emit(S_028054_BASE_HI(zb->db_depth_base >> 32)); /* DB_Z_WRITE_BASE_HI */ 3543 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3544 radeon_emit(S_02805C_BASE_HI(zb->db_stencil_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */ 3545 3546 radeon_set_context_reg_seq(R_028068_DB_Z_INFO2, 2); 3547 radeon_emit(zb->db_z_info2); /* DB_Z_INFO2 */ 3548 radeon_emit(zb->db_stencil_info2); /* DB_STENCIL_INFO2 */ 3549 } else { 3550 /* GFX6-GFX8 */ 3551 /* Set fields dependent on tc_compatile_htile. */ 3552 if (si_htile_enabled(tex, zb->base.u.tex.level, PIPE_MASK_ZS)) { 3553 if (tex->tc_compatible_htile) { 3554 db_htile_surface |= S_028ABC_TC_COMPATIBLE(1); 3555 3556 /* 0 = full compression. N = only compress up to N-1 Z planes. */ 3557 if (tex->buffer.b.b.nr_samples <= 1) 3558 db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(5); 3559 else if (tex->buffer.b.b.nr_samples <= 4) 3560 db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(3); 3561 else 3562 db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(2); 3563 } 3564 } 3565 3566 radeon_set_context_reg(R_028014_DB_HTILE_DATA_BASE, zb->db_htile_data_base); 3567 3568 radeon_set_context_reg_seq(R_02803C_DB_DEPTH_INFO, 9); 3569 radeon_emit(zb->db_depth_info | /* DB_DEPTH_INFO */ 3570 S_02803C_ADDR5_SWIZZLE_MASK(!tex->tc_compatible_htile)); 3571 radeon_emit(db_z_info | /* DB_Z_INFO */ 3572 S_028040_ZRANGE_PRECISION(tex->depth_clear_value[level] != 0)); 3573 radeon_emit(db_stencil_info); /* DB_STENCIL_INFO */ 3574 radeon_emit(zb->db_depth_base); /* DB_Z_READ_BASE */ 3575 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_READ_BASE */ 3576 radeon_emit(zb->db_depth_base); /* DB_Z_WRITE_BASE */ 3577 radeon_emit(zb->db_stencil_base); /* DB_STENCIL_WRITE_BASE */ 3578 radeon_emit(zb->db_depth_size); /* DB_DEPTH_SIZE */ 3579 radeon_emit(zb->db_depth_slice); /* DB_DEPTH_SLICE */ 3580 } 3581 3582 radeon_set_context_reg_seq(R_028028_DB_STENCIL_CLEAR, 2); 3583 radeon_emit(tex->stencil_clear_value[level]); /* R_028028_DB_STENCIL_CLEAR */ 3584 radeon_emit(fui(tex->depth_clear_value[level])); /* R_02802C_DB_DEPTH_CLEAR */ 3585 3586 radeon_set_context_reg(R_028008_DB_DEPTH_VIEW, zb->db_depth_view); 3587 radeon_set_context_reg(R_028ABC_DB_HTILE_SURFACE, db_htile_surface); 3588 } else if (sctx->framebuffer.dirty_zsbuf) { 3589 if (sctx->gfx_level == GFX9) 3590 radeon_set_context_reg_seq(R_028038_DB_Z_INFO, 2); 3591 else 3592 radeon_set_context_reg_seq(R_028040_DB_Z_INFO, 2); 3593 3594 /* Gfx11 only: DB_Z_INFO.NUM_SAMPLES should always match the framebuffer samples. 3595 * It affects VRS and occlusion queries if depth and stencil are not bound. 3596 */ 3597 radeon_emit(S_028040_FORMAT(V_028040_Z_INVALID) | /* DB_Z_INFO */ 3598 S_028040_NUM_SAMPLES(sctx->gfx_level == GFX11 ? sctx->framebuffer.log_samples : 0)); 3599 radeon_emit(S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */ 3600 } 3601 3602 /* Framebuffer dimensions. */ 3603 /* PA_SC_WINDOW_SCISSOR_TL is set in si_init_cs_preamble_state */ 3604 radeon_set_context_reg(R_028208_PA_SC_WINDOW_SCISSOR_BR, 3605 S_028208_BR_X(state->width) | S_028208_BR_Y(state->height)); 3606 3607 if (sctx->screen->dpbb_allowed) { 3608 radeon_emit(PKT3(PKT3_EVENT_WRITE, 0, 0)); 3609 radeon_emit(EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 3610 } 3611 radeon_end(); 3612 3613 si_update_display_dcc_dirty(sctx); 3614 3615 sctx->framebuffer.dirty_cbufs = 0; 3616 sctx->framebuffer.dirty_zsbuf = false; 3617} 3618 3619static void si_emit_msaa_sample_locs(struct si_context *sctx) 3620{ 3621 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 3622 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 3623 unsigned nr_samples = sctx->framebuffer.nr_samples; 3624 bool has_msaa_sample_loc_bug = sctx->screen->info.has_msaa_sample_loc_bug; 3625 3626 /* Smoothing (only possible with nr_samples == 1) uses the same 3627 * sample locations as the MSAA it simulates. 3628 */ 3629 if (nr_samples <= 1 && sctx->smoothing_enabled) 3630 nr_samples = SI_NUM_SMOOTH_AA_SAMPLES; 3631 3632 /* On Polaris, the small primitive filter uses the sample locations 3633 * even when MSAA is off, so we need to make sure they're set to 0. 3634 * 3635 * GFX10 uses sample locations unconditionally, so they always need 3636 * to be set up. 3637 */ 3638 if ((nr_samples >= 2 || has_msaa_sample_loc_bug || sctx->gfx_level >= GFX10) && 3639 nr_samples != sctx->sample_locs_num_samples) { 3640 sctx->sample_locs_num_samples = nr_samples; 3641 si_emit_sample_locations(cs, nr_samples); 3642 } 3643 3644 radeon_begin(cs); 3645 3646 if (sctx->family >= CHIP_POLARIS10) { 3647 unsigned small_prim_filter_cntl = 3648 S_028830_SMALL_PRIM_FILTER_ENABLE(1) | 3649 /* line bug */ 3650 S_028830_LINE_FILTER_DISABLE(sctx->family <= CHIP_POLARIS12); 3651 3652 /* For hardware with the sample location bug, the problem is that in order to use the small 3653 * primitive filter, we need to explicitly set the sample locations to 0. But the DB doesn't 3654 * properly process the change of sample locations without a flush, and so we can end up 3655 * with incorrect Z values. 3656 * 3657 * Instead of doing a flush, just disable the small primitive filter when MSAA is 3658 * force-disabled. 3659 * 3660 * The alternative of setting sample locations to 0 would require a DB flush to avoid 3661 * Z errors, see https://bugs.freedesktop.org/show_bug.cgi?id=96908 3662 */ 3663 if (has_msaa_sample_loc_bug && sctx->framebuffer.nr_samples > 1 && !rs->multisample_enable) 3664 small_prim_filter_cntl &= C_028830_SMALL_PRIM_FILTER_ENABLE; 3665 3666 radeon_opt_set_context_reg(sctx, R_028830_PA_SU_SMALL_PRIM_FILTER_CNTL, 3667 SI_TRACKED_PA_SU_SMALL_PRIM_FILTER_CNTL, small_prim_filter_cntl); 3668 } 3669 3670 /* The exclusion bits can be set to improve rasterization efficiency 3671 * if no sample lies on the pixel boundary (-8 sample offset). 3672 */ 3673 bool exclusion = sctx->gfx_level >= GFX7 && (!rs->multisample_enable || nr_samples != 16); 3674 radeon_opt_set_context_reg( 3675 sctx, R_02882C_PA_SU_PRIM_FILTER_CNTL, SI_TRACKED_PA_SU_PRIM_FILTER_CNTL, 3676 S_02882C_XMAX_RIGHT_EXCLUSION(exclusion) | S_02882C_YMAX_BOTTOM_EXCLUSION(exclusion)); 3677 radeon_end(); 3678} 3679 3680static bool si_out_of_order_rasterization(struct si_context *sctx) 3681{ 3682 struct si_state_blend *blend = sctx->queued.named.blend; 3683 struct si_state_dsa *dsa = sctx->queued.named.dsa; 3684 3685 if (!sctx->screen->has_out_of_order_rast) 3686 return false; 3687 3688 unsigned colormask = sctx->framebuffer.colorbuf_enabled_4bit; 3689 3690 colormask &= blend->cb_target_enabled_4bit; 3691 3692 /* Conservative: No logic op. */ 3693 if (colormask && blend->logicop_enable) 3694 return false; 3695 3696 struct si_dsa_order_invariance dsa_order_invariant = {.zs = true, 3697 .pass_set = true}; 3698 3699 if (sctx->framebuffer.state.zsbuf) { 3700 struct si_texture *zstex = (struct si_texture *)sctx->framebuffer.state.zsbuf->texture; 3701 bool has_stencil = zstex->surface.has_stencil; 3702 dsa_order_invariant = dsa->order_invariance[has_stencil]; 3703 if (!dsa_order_invariant.zs) 3704 return false; 3705 3706 /* The set of PS invocations is always order invariant, 3707 * except when early Z/S tests are requested. */ 3708 if (sctx->shader.ps.cso && sctx->shader.ps.cso->info.base.writes_memory && 3709 sctx->shader.ps.cso->info.base.fs.early_fragment_tests && 3710 !dsa_order_invariant.pass_set) 3711 return false; 3712 3713 if (sctx->num_perfect_occlusion_queries != 0 && !dsa_order_invariant.pass_set) 3714 return false; 3715 } 3716 3717 if (!colormask) 3718 return true; 3719 3720 unsigned blendmask = colormask & blend->blend_enable_4bit; 3721 3722 if (blendmask) { 3723 /* Only commutative blending. */ 3724 if (blendmask & ~blend->commutative_4bit) 3725 return false; 3726 3727 if (!dsa_order_invariant.pass_set) 3728 return false; 3729 } 3730 3731 if (colormask & ~blendmask) 3732 return false; 3733 3734 return true; 3735} 3736 3737static void si_emit_msaa_config(struct si_context *sctx) 3738{ 3739 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 3740 unsigned num_tile_pipes = sctx->screen->info.num_tile_pipes; 3741 /* 33% faster rendering to linear color buffers */ 3742 bool dst_is_linear = sctx->framebuffer.any_dst_linear; 3743 bool out_of_order_rast = si_out_of_order_rasterization(sctx); 3744 unsigned sc_mode_cntl_1 = 3745 S_028A4C_WALK_SIZE(dst_is_linear) | S_028A4C_WALK_FENCE_ENABLE(!dst_is_linear) | 3746 S_028A4C_WALK_FENCE_SIZE(num_tile_pipes == 2 ? 2 : 3) | 3747 S_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(out_of_order_rast) | 3748 S_028A4C_OUT_OF_ORDER_WATER_MARK(0x7) | 3749 /* always 1: */ 3750 S_028A4C_WALK_ALIGN8_PRIM_FITS_ST(1) | S_028A4C_SUPERTILE_WALK_ORDER_ENABLE(1) | 3751 S_028A4C_TILE_WALK_ORDER_ENABLE(1) | S_028A4C_MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE(1) | 3752 S_028A4C_FORCE_EOV_CNTDWN_ENABLE(1) | S_028A4C_FORCE_EOV_REZ_ENABLE(1); 3753 unsigned db_eqaa = S_028804_HIGH_QUALITY_INTERSECTIONS(1) | S_028804_INCOHERENT_EQAA_READS(1) | 3754 S_028804_INTERPOLATE_COMP_Z(sctx->gfx_level < GFX11) | 3755 S_028804_STATIC_ANCHOR_ASSOCIATIONS(1); 3756 unsigned coverage_samples, z_samples; 3757 struct si_state_rasterizer *rs = sctx->queued.named.rasterizer; 3758 3759 /* S: Coverage samples (up to 16x): 3760 * - Scan conversion samples (PA_SC_AA_CONFIG.MSAA_NUM_SAMPLES) 3761 * - CB FMASK samples (CB_COLORi_ATTRIB.NUM_SAMPLES) 3762 * 3763 * Z: Z/S samples (up to 8x, must be <= coverage samples and >= color samples): 3764 * - Value seen by DB (DB_Z_INFO.NUM_SAMPLES) 3765 * - Value seen by CB, must be correct even if Z/S is unbound (DB_EQAA.MAX_ANCHOR_SAMPLES) 3766 * # Missing samples are derived from Z planes if Z is compressed (up to 16x quality), or 3767 * # from the closest defined sample if Z is uncompressed (same quality as the number of 3768 * # Z samples). 3769 * 3770 * F: Color samples (up to 8x, must be <= coverage samples): 3771 * - CB color samples (CB_COLORi_ATTRIB.NUM_FRAGMENTS) 3772 * - PS iter samples (DB_EQAA.PS_ITER_SAMPLES) 3773 * 3774 * Can be anything between coverage and color samples: 3775 * - SampleMaskIn samples (PA_SC_AA_CONFIG.MSAA_EXPOSED_SAMPLES) 3776 * - SampleMaskOut samples (DB_EQAA.MASK_EXPORT_NUM_SAMPLES) 3777 * - Alpha-to-coverage samples (DB_EQAA.ALPHA_TO_MASK_NUM_SAMPLES) 3778 * - Occlusion query samples (DB_COUNT_CONTROL.SAMPLE_RATE) 3779 * # All are currently set the same as coverage samples. 3780 * 3781 * If color samples < coverage samples, FMASK has a higher bpp to store an "unknown" 3782 * flag for undefined color samples. A shader-based resolve must handle unknowns 3783 * or mask them out with AND. Unknowns can also be guessed from neighbors via 3784 * an edge-detect shader-based resolve, which is required to make "color samples = 1" 3785 * useful. The CB resolve always drops unknowns. 3786 * 3787 * Sensible AA configurations: 3788 * EQAA 16s 8z 8f - might look the same as 16x MSAA if Z is compressed 3789 * EQAA 16s 8z 4f - might look the same as 16x MSAA if Z is compressed 3790 * EQAA 16s 4z 4f - might look the same as 16x MSAA if Z is compressed 3791 * EQAA 8s 8z 8f = 8x MSAA 3792 * EQAA 8s 8z 4f - might look the same as 8x MSAA 3793 * EQAA 8s 8z 2f - might look the same as 8x MSAA with low-density geometry 3794 * EQAA 8s 4z 4f - might look the same as 8x MSAA if Z is compressed 3795 * EQAA 8s 4z 2f - might look the same as 8x MSAA with low-density geometry if Z is compressed 3796 * EQAA 4s 4z 4f = 4x MSAA 3797 * EQAA 4s 4z 2f - might look the same as 4x MSAA with low-density geometry 3798 * EQAA 2s 2z 2f = 2x MSAA 3799 */ 3800 coverage_samples = si_get_num_coverage_samples(sctx); 3801 3802 /* The DX10 diamond test is not required by GL and decreases line rasterization 3803 * performance, so don't use it. 3804 */ 3805 unsigned sc_line_cntl = 0; 3806 unsigned sc_aa_config = 0; 3807 3808 if (coverage_samples > 1 && rs->multisample_enable) { 3809 /* distance from the pixel center, indexed by log2(nr_samples) */ 3810 static unsigned max_dist[] = { 3811 0, /* unused */ 3812 4, /* 2x MSAA */ 3813 6, /* 4x MSAA */ 3814 7, /* 8x MSAA */ 3815 8, /* 16x MSAA */ 3816 }; 3817 unsigned log_samples = util_logbase2(coverage_samples); 3818 3819 sc_line_cntl |= S_028BDC_EXPAND_LINE_WIDTH(1) | 3820 S_028BDC_PERPENDICULAR_ENDCAP_ENA(rs->perpendicular_end_caps) | 3821 S_028BDC_EXTRA_DX_DY_PRECISION(rs->perpendicular_end_caps && 3822 (sctx->family == CHIP_VEGA20 || 3823 sctx->gfx_level >= GFX10)); 3824 sc_aa_config = S_028BE0_MSAA_NUM_SAMPLES(log_samples) | 3825 S_028BE0_MAX_SAMPLE_DIST(max_dist[log_samples]) | 3826 S_028BE0_MSAA_EXPOSED_SAMPLES(log_samples) | 3827 S_028BE0_COVERED_CENTROID_IS_CENTER(sctx->gfx_level >= GFX10_3); 3828 } 3829 3830 if (sctx->framebuffer.nr_samples > 1) { 3831 if (sctx->framebuffer.state.zsbuf) { 3832 z_samples = sctx->framebuffer.state.zsbuf->texture->nr_samples; 3833 z_samples = MAX2(1, z_samples); 3834 } else { 3835 z_samples = coverage_samples; 3836 } 3837 unsigned log_samples = util_logbase2(coverage_samples); 3838 unsigned log_z_samples = util_logbase2(z_samples); 3839 unsigned ps_iter_samples = si_get_ps_iter_samples(sctx); 3840 unsigned log_ps_iter_samples = util_logbase2(ps_iter_samples); 3841 if (sctx->framebuffer.nr_samples > 1) { 3842 db_eqaa |= S_028804_MAX_ANCHOR_SAMPLES(log_z_samples) | 3843 S_028804_PS_ITER_SAMPLES(log_ps_iter_samples) | 3844 S_028804_MASK_EXPORT_NUM_SAMPLES(log_samples) | 3845 S_028804_ALPHA_TO_MASK_NUM_SAMPLES(log_samples); 3846 sc_mode_cntl_1 |= S_028A4C_PS_ITER_SAMPLE(ps_iter_samples > 1); 3847 } else if (sctx->smoothing_enabled) { 3848 db_eqaa |= S_028804_OVERRASTERIZATION_AMOUNT(log_samples); 3849 } 3850 } 3851 3852 radeon_begin(cs); 3853 3854 /* R_028BDC_PA_SC_LINE_CNTL, R_028BE0_PA_SC_AA_CONFIG */ 3855 radeon_opt_set_context_reg2(sctx, R_028BDC_PA_SC_LINE_CNTL, SI_TRACKED_PA_SC_LINE_CNTL, 3856 sc_line_cntl, sc_aa_config); 3857 /* R_028804_DB_EQAA */ 3858 radeon_opt_set_context_reg(sctx, R_028804_DB_EQAA, SI_TRACKED_DB_EQAA, db_eqaa); 3859 /* R_028A4C_PA_SC_MODE_CNTL_1 */ 3860 radeon_opt_set_context_reg(sctx, R_028A4C_PA_SC_MODE_CNTL_1, SI_TRACKED_PA_SC_MODE_CNTL_1, 3861 sc_mode_cntl_1); 3862 radeon_end_update_context_roll(sctx); 3863} 3864 3865void si_update_ps_iter_samples(struct si_context *sctx) 3866{ 3867 if (sctx->framebuffer.nr_samples > 1) 3868 si_mark_atom_dirty(sctx, &sctx->atoms.s.msaa_config); 3869 if (sctx->screen->dpbb_allowed) 3870 si_mark_atom_dirty(sctx, &sctx->atoms.s.dpbb_state); 3871} 3872 3873static void si_set_min_samples(struct pipe_context *ctx, unsigned min_samples) 3874{ 3875 struct si_context *sctx = (struct si_context *)ctx; 3876 3877 /* The hardware can only do sample shading with 2^n samples. */ 3878 min_samples = util_next_power_of_two(min_samples); 3879 3880 if (sctx->ps_iter_samples == min_samples) 3881 return; 3882 3883 sctx->ps_iter_samples = min_samples; 3884 3885 si_ps_key_update_sample_shading(sctx); 3886 si_ps_key_update_framebuffer_rasterizer_sample_shading(sctx); 3887 sctx->do_update_shaders = true; 3888 3889 si_update_ps_iter_samples(sctx); 3890} 3891 3892/* 3893 * Samplers 3894 */ 3895 3896/** 3897 * Build the sampler view descriptor for a buffer texture. 3898 * @param state 256-bit descriptor; only the high 128 bits are filled in 3899 */ 3900void si_make_buffer_descriptor(struct si_screen *screen, struct si_resource *buf, 3901 enum pipe_format format, unsigned offset, unsigned num_elements, 3902 uint32_t *state) 3903{ 3904 const struct util_format_description *desc; 3905 unsigned stride; 3906 unsigned num_records; 3907 3908 desc = util_format_description(format); 3909 stride = desc->block.bits / 8; 3910 3911 num_records = num_elements; 3912 num_records = MIN2(num_records, (buf->b.b.width0 - offset) / stride); 3913 3914 /* The NUM_RECORDS field has a different meaning depending on the chip, 3915 * instruction type, STRIDE, and SWIZZLE_ENABLE. 3916 * 3917 * GFX6-7,10: 3918 * - If STRIDE == 0, it's in byte units. 3919 * - If STRIDE != 0, it's in units of STRIDE, used with inst.IDXEN. 3920 * 3921 * GFX8: 3922 * - For SMEM and STRIDE == 0, it's in byte units. 3923 * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3924 * - For VMEM and STRIDE == 0 or SWIZZLE_ENABLE == 0, it's in byte units. 3925 * - For VMEM and STRIDE != 0 and SWIZZLE_ENABLE == 1, it's in units of STRIDE. 3926 * NOTE: There is incompatibility between VMEM and SMEM opcodes due to SWIZZLE_- 3927 * ENABLE. The workaround is to set STRIDE = 0 if SWIZZLE_ENABLE == 0 when 3928 * using SMEM. This can be done in the shader by clearing STRIDE with s_and. 3929 * That way the same descriptor can be used by both SMEM and VMEM. 3930 * 3931 * GFX9: 3932 * - For SMEM and STRIDE == 0, it's in byte units. 3933 * - For SMEM and STRIDE != 0, it's in units of STRIDE. 3934 * - For VMEM and inst.IDXEN == 0 or STRIDE == 0, it's in byte units. 3935 * - For VMEM and inst.IDXEN == 1 and STRIDE != 0, it's in units of STRIDE. 3936 */ 3937 if (screen->info.gfx_level == GFX8) 3938 num_records *= stride; 3939 3940 state[4] = 0; 3941 state[5] = S_008F04_STRIDE(stride); 3942 state[6] = num_records; 3943 state[7] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 3944 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 3945 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 3946 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])); 3947 3948 if (screen->info.gfx_level >= GFX10) { 3949 const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&screen->info)[format]; 3950 3951 /* OOB_SELECT chooses the out-of-bounds check: 3952 * - 0: (index >= NUM_RECORDS) || (offset >= STRIDE) 3953 * - 1: index >= NUM_RECORDS 3954 * - 2: NUM_RECORDS == 0 3955 * - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS 3956 * else: swizzle_address >= NUM_RECORDS 3957 */ 3958 state[7] |= S_008F0C_FORMAT(fmt->img_format) | 3959 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) | 3960 S_008F0C_RESOURCE_LEVEL(screen->info.gfx_level < GFX11); 3961 } else { 3962 int first_non_void; 3963 unsigned num_format, data_format; 3964 3965 first_non_void = util_format_get_first_non_void_channel(format); 3966 num_format = si_translate_buffer_numformat(&screen->b, desc, first_non_void); 3967 data_format = si_translate_buffer_dataformat(&screen->b, desc, first_non_void); 3968 3969 state[7] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); 3970 } 3971} 3972 3973static unsigned gfx9_border_color_swizzle(const unsigned char swizzle[4]) 3974{ 3975 unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3976 3977 if (swizzle[3] == PIPE_SWIZZLE_X) { 3978 /* For the pre-defined border color values (white, opaque 3979 * black, transparent black), the only thing that matters is 3980 * that the alpha channel winds up in the correct place 3981 * (because the RGB channels are all the same) so either of 3982 * these enumerations will work. 3983 */ 3984 if (swizzle[2] == PIPE_SWIZZLE_Y) 3985 bc_swizzle = V_008F20_BC_SWIZZLE_WZYX; 3986 else 3987 bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ; 3988 } else if (swizzle[0] == PIPE_SWIZZLE_X) { 3989 if (swizzle[1] == PIPE_SWIZZLE_Y) 3990 bc_swizzle = V_008F20_BC_SWIZZLE_XYZW; 3991 else 3992 bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ; 3993 } else if (swizzle[1] == PIPE_SWIZZLE_X) { 3994 bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ; 3995 } else if (swizzle[2] == PIPE_SWIZZLE_X) { 3996 bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW; 3997 } 3998 3999 return bc_swizzle; 4000} 4001 4002/** 4003 * Build the sampler view descriptor for a texture. 4004 */ 4005static void gfx10_make_texture_descriptor( 4006 struct si_screen *screen, struct si_texture *tex, bool sampler, enum pipe_texture_target target, 4007 enum pipe_format pipe_format, const unsigned char state_swizzle[4], unsigned first_level, 4008 unsigned last_level, unsigned first_layer, unsigned last_layer, unsigned width, unsigned height, 4009 unsigned depth, uint32_t *state, uint32_t *fmask_state) 4010{ 4011 struct pipe_resource *res = &tex->buffer.b.b; 4012 const struct util_format_description *desc; 4013 unsigned img_format; 4014 unsigned char swizzle[4]; 4015 unsigned type; 4016 uint64_t va; 4017 4018 desc = util_format_description(pipe_format); 4019 img_format = ac_get_gfx10_format_table(&screen->info)[pipe_format].img_format; 4020 4021 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 4022 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 4023 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 4024 const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 4025 bool is_stencil = false; 4026 4027 switch (pipe_format) { 4028 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4029 case PIPE_FORMAT_X32_S8X24_UINT: 4030 case PIPE_FORMAT_X8Z24_UNORM: 4031 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 4032 is_stencil = true; 4033 break; 4034 case PIPE_FORMAT_X24S8_UINT: 4035 /* 4036 * X24S8 is implemented as an 8_8_8_8 data format, to 4037 * fix texture gathers. This affects at least 4038 * GL45-CTS.texture_cube_map_array.sampling on GFX8. 4039 */ 4040 util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 4041 is_stencil = true; 4042 break; 4043 default: 4044 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 4045 is_stencil = pipe_format == PIPE_FORMAT_S8_UINT; 4046 } 4047 4048 if (tex->upgraded_depth && !is_stencil) { 4049 if (screen->info.gfx_level >= GFX11) { 4050 assert(img_format == V_008F0C_GFX11_FORMAT_32_FLOAT); 4051 img_format = V_008F0C_GFX11_FORMAT_32_FLOAT_CLAMP; 4052 } else { 4053 assert(img_format == V_008F0C_GFX10_FORMAT_32_FLOAT); 4054 img_format = V_008F0C_GFX10_FORMAT_32_FLOAT_CLAMP; 4055 } 4056 } 4057 } else { 4058 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 4059 } 4060 4061 if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY)) { 4062 /* For the purpose of shader images, treat cube maps as 2D 4063 * arrays. 4064 */ 4065 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 4066 } else { 4067 type = si_tex_dim(screen, tex, target, res->nr_samples); 4068 } 4069 4070 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 4071 height = 1; 4072 depth = res->array_size; 4073 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 4074 if (sampler || res->target != PIPE_TEXTURE_3D) 4075 depth = res->array_size; 4076 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 4077 depth = res->array_size / 6; 4078 4079 state[0] = 0; 4080 state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1); 4081 state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 4082 S_00A008_RESOURCE_LEVEL(screen->info.gfx_level < GFX11); 4083 4084 state[3] = 4085 S_00A00C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 4086 S_00A00C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 4087 S_00A00C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 4088 S_00A00C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 4089 S_00A00C_BASE_LEVEL(res->nr_samples > 1 ? 0 : first_level) | 4090 S_00A00C_LAST_LEVEL(res->nr_samples > 1 ? util_logbase2(res->nr_samples) : last_level) | 4091 S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc->swizzle)) | S_00A00C_TYPE(type); 4092 /* Depth is the the last accessible layer on gfx9+. The hw doesn't need 4093 * to know the total number of layers. 4094 */ 4095 state[4] = 4096 S_00A010_DEPTH((type == V_008F1C_SQ_RSRC_IMG_3D && sampler) ? depth - 1 : last_layer) | 4097 S_00A010_BASE_ARRAY(first_layer); 4098 state[5] = S_00A014_ARRAY_PITCH(!!(type == V_008F1C_SQ_RSRC_IMG_3D && !sampler)) | 4099 S_00A014_PERF_MOD(4); 4100 4101 unsigned max_mip = res->nr_samples > 1 ? util_logbase2(res->nr_samples) : 4102 tex->buffer.b.b.last_level; 4103 4104 if (screen->info.gfx_level >= GFX11) { 4105 state[1] |= S_00A004_MAX_MIP(max_mip); 4106 } else { 4107 state[5] |= S_00A014_MAX_MIP(max_mip); 4108 } 4109 state[6] = 0; 4110 state[7] = 0; 4111 4112 if (vi_dcc_enabled(tex, first_level)) { 4113 state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) | 4114 S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.color.dcc.max_compressed_block_size) | 4115 S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); 4116 } 4117 4118 /* Initialize the sampler view for FMASK. */ 4119 if (tex->surface.fmask_offset) { 4120 uint32_t format; 4121 4122 va = tex->buffer.gpu_address + tex->surface.fmask_offset; 4123 4124#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) 4125 switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 4126 case FMASK(2, 1): 4127 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F1; 4128 break; 4129 case FMASK(2, 2): 4130 format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2; 4131 break; 4132 case FMASK(4, 1): 4133 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F1; 4134 break; 4135 case FMASK(4, 2): 4136 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F2; 4137 break; 4138 case FMASK(4, 4): 4139 format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4; 4140 break; 4141 case FMASK(8, 1): 4142 format = V_008F0C_GFX10_FORMAT_FMASK8_S8_F1; 4143 break; 4144 case FMASK(8, 2): 4145 format = V_008F0C_GFX10_FORMAT_FMASK16_S8_F2; 4146 break; 4147 case FMASK(8, 4): 4148 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F4; 4149 break; 4150 case FMASK(8, 8): 4151 format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8; 4152 break; 4153 case FMASK(16, 1): 4154 format = V_008F0C_GFX10_FORMAT_FMASK16_S16_F1; 4155 break; 4156 case FMASK(16, 2): 4157 format = V_008F0C_GFX10_FORMAT_FMASK32_S16_F2; 4158 break; 4159 case FMASK(16, 4): 4160 format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F4; 4161 break; 4162 case FMASK(16, 8): 4163 format = V_008F0C_GFX10_FORMAT_FMASK64_S16_F8; 4164 break; 4165 default: 4166 unreachable("invalid nr_samples"); 4167 } 4168#undef FMASK 4169 fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle; 4170 fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) | 4171 S_00A004_WIDTH_LO(width - 1); 4172 fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) | 4173 S_00A008_RESOURCE_LEVEL(1); 4174 fmask_state[3] = 4175 S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 4176 S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 4177 S_00A00C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode) | 4178 S_00A00C_TYPE(si_tex_dim(screen, tex, target, 0)); 4179 fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer); 4180 fmask_state[5] = 0; 4181 fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1); 4182 fmask_state[7] = 0; 4183 } 4184} 4185 4186/** 4187 * Build the sampler view descriptor for a texture (SI-GFX9). 4188 */ 4189static void si_make_texture_descriptor(struct si_screen *screen, struct si_texture *tex, 4190 bool sampler, enum pipe_texture_target target, 4191 enum pipe_format pipe_format, 4192 const unsigned char state_swizzle[4], unsigned first_level, 4193 unsigned last_level, unsigned first_layer, 4194 unsigned last_layer, unsigned width, unsigned height, 4195 unsigned depth, uint32_t *state, uint32_t *fmask_state) 4196{ 4197 struct pipe_resource *res = &tex->buffer.b.b; 4198 const struct util_format_description *desc; 4199 unsigned char swizzle[4]; 4200 int first_non_void; 4201 unsigned num_format, data_format, type, num_samples; 4202 uint64_t va; 4203 4204 desc = util_format_description(pipe_format); 4205 4206 num_samples = desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS ? MAX2(1, res->nr_samples) 4207 : MAX2(1, res->nr_storage_samples); 4208 4209 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) { 4210 const unsigned char swizzle_xxxx[4] = {0, 0, 0, 0}; 4211 const unsigned char swizzle_yyyy[4] = {1, 1, 1, 1}; 4212 const unsigned char swizzle_wwww[4] = {3, 3, 3, 3}; 4213 4214 switch (pipe_format) { 4215 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4216 case PIPE_FORMAT_X32_S8X24_UINT: 4217 case PIPE_FORMAT_X8Z24_UNORM: 4218 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 4219 break; 4220 case PIPE_FORMAT_X24S8_UINT: 4221 /* 4222 * X24S8 is implemented as an 8_8_8_8 data format, to 4223 * fix texture gathers. This affects at least 4224 * GL45-CTS.texture_cube_map_array.sampling on GFX8. 4225 */ 4226 if (screen->info.gfx_level <= GFX8) 4227 util_format_compose_swizzles(swizzle_wwww, state_swizzle, swizzle); 4228 else 4229 util_format_compose_swizzles(swizzle_yyyy, state_swizzle, swizzle); 4230 break; 4231 default: 4232 util_format_compose_swizzles(swizzle_xxxx, state_swizzle, swizzle); 4233 } 4234 } else { 4235 util_format_compose_swizzles(desc->swizzle, state_swizzle, swizzle); 4236 } 4237 4238 first_non_void = util_format_get_first_non_void_channel(pipe_format); 4239 4240 switch (pipe_format) { 4241 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4242 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4243 break; 4244 default: 4245 if (first_non_void < 0) { 4246 if (util_format_is_compressed(pipe_format)) { 4247 switch (pipe_format) { 4248 case PIPE_FORMAT_DXT1_SRGB: 4249 case PIPE_FORMAT_DXT1_SRGBA: 4250 case PIPE_FORMAT_DXT3_SRGBA: 4251 case PIPE_FORMAT_DXT5_SRGBA: 4252 case PIPE_FORMAT_BPTC_SRGBA: 4253 case PIPE_FORMAT_ETC2_SRGB8: 4254 case PIPE_FORMAT_ETC2_SRGB8A1: 4255 case PIPE_FORMAT_ETC2_SRGBA8: 4256 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 4257 break; 4258 case PIPE_FORMAT_RGTC1_SNORM: 4259 case PIPE_FORMAT_LATC1_SNORM: 4260 case PIPE_FORMAT_RGTC2_SNORM: 4261 case PIPE_FORMAT_LATC2_SNORM: 4262 case PIPE_FORMAT_ETC2_R11_SNORM: 4263 case PIPE_FORMAT_ETC2_RG11_SNORM: 4264 /* implies float, so use SNORM/UNORM to determine 4265 whether data is signed or not */ 4266 case PIPE_FORMAT_BPTC_RGB_FLOAT: 4267 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 4268 break; 4269 default: 4270 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4271 break; 4272 } 4273 } else if (desc->layout == UTIL_FORMAT_LAYOUT_SUBSAMPLED) { 4274 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4275 } else { 4276 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 4277 } 4278 } else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) { 4279 num_format = V_008F14_IMG_NUM_FORMAT_SRGB; 4280 } else { 4281 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4282 4283 switch (desc->channel[first_non_void].type) { 4284 case UTIL_FORMAT_TYPE_FLOAT: 4285 num_format = V_008F14_IMG_NUM_FORMAT_FLOAT; 4286 break; 4287 case UTIL_FORMAT_TYPE_SIGNED: 4288 if (desc->channel[first_non_void].normalized) 4289 num_format = V_008F14_IMG_NUM_FORMAT_SNORM; 4290 else if (desc->channel[first_non_void].pure_integer) 4291 num_format = V_008F14_IMG_NUM_FORMAT_SINT; 4292 else 4293 num_format = V_008F14_IMG_NUM_FORMAT_SSCALED; 4294 break; 4295 case UTIL_FORMAT_TYPE_UNSIGNED: 4296 if (desc->channel[first_non_void].normalized) 4297 num_format = V_008F14_IMG_NUM_FORMAT_UNORM; 4298 else if (desc->channel[first_non_void].pure_integer) 4299 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 4300 else 4301 num_format = V_008F14_IMG_NUM_FORMAT_USCALED; 4302 } 4303 } 4304 } 4305 4306 data_format = si_translate_texformat(&screen->b, pipe_format, desc, first_non_void); 4307 if (data_format == ~0) { 4308 data_format = 0; 4309 } 4310 4311 /* S8 with Z32 HTILE needs a special format. */ 4312 if (screen->info.gfx_level == GFX9 && pipe_format == PIPE_FORMAT_S8_UINT) 4313 data_format = V_008F14_IMG_DATA_FORMAT_S8_32; 4314 4315 if (!sampler && (res->target == PIPE_TEXTURE_CUBE || res->target == PIPE_TEXTURE_CUBE_ARRAY || 4316 (screen->info.gfx_level <= GFX8 && res->target == PIPE_TEXTURE_3D))) { 4317 /* For the purpose of shader images, treat cube maps and 3D 4318 * textures as 2D arrays. For 3D textures, the address 4319 * calculations for mipmaps are different, so we rely on the 4320 * caller to effectively disable mipmaps. 4321 */ 4322 type = V_008F1C_SQ_RSRC_IMG_2D_ARRAY; 4323 4324 assert(res->target != PIPE_TEXTURE_3D || (first_level == 0 && last_level == 0)); 4325 } else { 4326 type = si_tex_dim(screen, tex, target, num_samples); 4327 } 4328 4329 if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) { 4330 height = 1; 4331 depth = res->array_size; 4332 } else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 4333 if (sampler || res->target != PIPE_TEXTURE_3D) 4334 depth = res->array_size; 4335 } else if (type == V_008F1C_SQ_RSRC_IMG_CUBE) 4336 depth = res->array_size / 6; 4337 4338 state[0] = 0; 4339 state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format)); 4340 state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4)); 4341 state[3] = (S_008F1C_DST_SEL_X(si_map_swizzle(swizzle[0])) | 4342 S_008F1C_DST_SEL_Y(si_map_swizzle(swizzle[1])) | 4343 S_008F1C_DST_SEL_Z(si_map_swizzle(swizzle[2])) | 4344 S_008F1C_DST_SEL_W(si_map_swizzle(swizzle[3])) | 4345 S_008F1C_BASE_LEVEL(num_samples > 1 ? 0 : first_level) | 4346 S_008F1C_LAST_LEVEL(num_samples > 1 ? util_logbase2(num_samples) : last_level) | 4347 S_008F1C_TYPE(type)); 4348 state[4] = 0; 4349 state[5] = S_008F24_BASE_ARRAY(first_layer); 4350 state[6] = 0; 4351 state[7] = 0; 4352 4353 if (screen->info.gfx_level == GFX9) { 4354 unsigned bc_swizzle = gfx9_border_color_swizzle(desc->swizzle); 4355 4356 /* Depth is the the last accessible layer on Gfx9. 4357 * The hw doesn't need to know the total number of layers. 4358 */ 4359 if (type == V_008F1C_SQ_RSRC_IMG_3D) 4360 state[4] |= S_008F20_DEPTH(depth - 1); 4361 else 4362 state[4] |= S_008F20_DEPTH(last_layer); 4363 4364 state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle); 4365 state[5] |= S_008F24_MAX_MIP(num_samples > 1 ? util_logbase2(num_samples) 4366 : tex->buffer.b.b.last_level); 4367 } else { 4368 state[3] |= S_008F1C_POW2_PAD(res->last_level > 0); 4369 state[4] |= S_008F20_DEPTH(depth - 1); 4370 state[5] |= S_008F24_LAST_ARRAY(last_layer); 4371 } 4372 4373 if (vi_dcc_enabled(tex, first_level)) { 4374 state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format)); 4375 } else { 4376 /* The last dword is unused by hw. The shader uses it to clear 4377 * bits in the first dword of sampler state. 4378 */ 4379 if (screen->info.gfx_level <= GFX7 && res->nr_samples <= 1) { 4380 if (first_level == last_level) 4381 state[7] = C_008F30_MAX_ANISO_RATIO; 4382 else 4383 state[7] = 0xffffffff; 4384 } 4385 } 4386 4387 /* Initialize the sampler view for FMASK. */ 4388 if (tex->surface.fmask_offset) { 4389 uint32_t data_format, num_format; 4390 4391 va = tex->buffer.gpu_address + tex->surface.fmask_offset; 4392 4393#define FMASK(s, f) (((unsigned)(MAX2(1, s)) * 16) + (MAX2(1, f))) 4394 if (screen->info.gfx_level == GFX9) { 4395 data_format = V_008F14_IMG_DATA_FORMAT_FMASK; 4396 switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 4397 case FMASK(2, 1): 4398 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_1; 4399 break; 4400 case FMASK(2, 2): 4401 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2; 4402 break; 4403 case FMASK(4, 1): 4404 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_1; 4405 break; 4406 case FMASK(4, 2): 4407 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_2; 4408 break; 4409 case FMASK(4, 4): 4410 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4; 4411 break; 4412 case FMASK(8, 1): 4413 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_8_1; 4414 break; 4415 case FMASK(8, 2): 4416 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_8_2; 4417 break; 4418 case FMASK(8, 4): 4419 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_4; 4420 break; 4421 case FMASK(8, 8): 4422 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8; 4423 break; 4424 case FMASK(16, 1): 4425 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_16_16_1; 4426 break; 4427 case FMASK(16, 2): 4428 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_16_2; 4429 break; 4430 case FMASK(16, 4): 4431 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_4; 4432 break; 4433 case FMASK(16, 8): 4434 num_format = V_008F14_IMG_NUM_FORMAT_FMASK_64_16_8; 4435 break; 4436 default: 4437 unreachable("invalid nr_samples"); 4438 } 4439 } else { 4440 switch (FMASK(res->nr_samples, res->nr_storage_samples)) { 4441 case FMASK(2, 1): 4442 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F1; 4443 break; 4444 case FMASK(2, 2): 4445 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2; 4446 break; 4447 case FMASK(4, 1): 4448 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F1; 4449 break; 4450 case FMASK(4, 2): 4451 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F2; 4452 break; 4453 case FMASK(4, 4): 4454 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4; 4455 break; 4456 case FMASK(8, 1): 4457 data_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S8_F1; 4458 break; 4459 case FMASK(8, 2): 4460 data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S8_F2; 4461 break; 4462 case FMASK(8, 4): 4463 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F4; 4464 break; 4465 case FMASK(8, 8): 4466 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8; 4467 break; 4468 case FMASK(16, 1): 4469 data_format = V_008F14_IMG_DATA_FORMAT_FMASK16_S16_F1; 4470 break; 4471 case FMASK(16, 2): 4472 data_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S16_F2; 4473 break; 4474 case FMASK(16, 4): 4475 data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F4; 4476 break; 4477 case FMASK(16, 8): 4478 data_format = V_008F14_IMG_DATA_FORMAT_FMASK64_S16_F8; 4479 break; 4480 default: 4481 unreachable("invalid nr_samples"); 4482 } 4483 num_format = V_008F14_IMG_NUM_FORMAT_UINT; 4484 } 4485#undef FMASK 4486 4487 fmask_state[0] = (va >> 8) | tex->surface.fmask_tile_swizzle; 4488 fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(data_format) | 4489 S_008F14_NUM_FORMAT(num_format); 4490 fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1); 4491 fmask_state[3] = 4492 S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) | 4493 S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) | 4494 S_008F1C_TYPE(si_tex_dim(screen, tex, target, 0)); 4495 fmask_state[4] = 0; 4496 fmask_state[5] = S_008F24_BASE_ARRAY(first_layer); 4497 fmask_state[6] = 0; 4498 fmask_state[7] = 0; 4499 4500 if (screen->info.gfx_level == GFX9) { 4501 fmask_state[3] |= S_008F1C_SW_MODE(tex->surface.u.gfx9.color.fmask_swizzle_mode); 4502 fmask_state[4] |= 4503 S_008F20_DEPTH(last_layer) | S_008F20_PITCH(tex->surface.u.gfx9.color.fmask_epitch); 4504 fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | 4505 S_008F24_META_RB_ALIGNED(1); 4506 } else { 4507 fmask_state[3] |= S_008F1C_TILING_INDEX(tex->surface.u.legacy.color.fmask.tiling_index); 4508 fmask_state[4] |= S_008F20_DEPTH(depth - 1) | 4509 S_008F20_PITCH(tex->surface.u.legacy.color.fmask.pitch_in_pixels - 1); 4510 fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer); 4511 } 4512 } 4513} 4514 4515/** 4516 * Create a sampler view. 4517 * 4518 * @param ctx context 4519 * @param texture texture 4520 * @param state sampler view template 4521 */ 4522static struct pipe_sampler_view *si_create_sampler_view(struct pipe_context *ctx, 4523 struct pipe_resource *texture, 4524 const struct pipe_sampler_view *state) 4525{ 4526 struct si_context *sctx = (struct si_context *)ctx; 4527 struct si_sampler_view *view = CALLOC_STRUCT_CL(si_sampler_view); 4528 struct si_texture *tex = (struct si_texture *)texture; 4529 unsigned char state_swizzle[4]; 4530 unsigned last_layer = state->u.tex.last_layer; 4531 enum pipe_format pipe_format; 4532 const struct legacy_surf_level *surflevel; 4533 4534 if (!view) 4535 return NULL; 4536 4537 /* initialize base object */ 4538 view->base = *state; 4539 view->base.texture = NULL; 4540 view->base.reference.count = 1; 4541 view->base.context = ctx; 4542 4543 assert(texture); 4544 pipe_resource_reference(&view->base.texture, texture); 4545 4546 if (state->format == PIPE_FORMAT_X24S8_UINT || state->format == PIPE_FORMAT_S8X24_UINT || 4547 state->format == PIPE_FORMAT_X32_S8X24_UINT || state->format == PIPE_FORMAT_S8_UINT) 4548 view->is_stencil_sampler = true; 4549 4550 /* Buffer resource. */ 4551 if (texture->target == PIPE_BUFFER) { 4552 uint32_t elements = si_clamp_texture_texel_count(sctx->screen->max_texel_buffer_elements, 4553 state->format, state->u.buf.size); 4554 4555 si_make_buffer_descriptor(sctx->screen, si_resource(texture), state->format, 4556 state->u.buf.offset, elements, view->state); 4557 return &view->base; 4558 } 4559 4560 state_swizzle[0] = state->swizzle_r; 4561 state_swizzle[1] = state->swizzle_g; 4562 state_swizzle[2] = state->swizzle_b; 4563 state_swizzle[3] = state->swizzle_a; 4564 4565 /* This is not needed if gallium frontends set last_layer correctly. */ 4566 if (state->target == PIPE_TEXTURE_1D || state->target == PIPE_TEXTURE_2D || 4567 state->target == PIPE_TEXTURE_RECT || state->target == PIPE_TEXTURE_CUBE) 4568 last_layer = state->u.tex.first_layer; 4569 4570 /* Texturing with separate depth and stencil. */ 4571 pipe_format = state->format; 4572 4573 /* Depth/stencil texturing sometimes needs separate texture. */ 4574 if (tex->is_depth && !si_can_sample_zs(tex, view->is_stencil_sampler)) { 4575 if (!tex->flushed_depth_texture && !si_init_flushed_depth_texture(ctx, texture)) { 4576 pipe_resource_reference(&view->base.texture, NULL); 4577 FREE(view); 4578 return NULL; 4579 } 4580 4581 assert(tex->flushed_depth_texture); 4582 4583 /* Override format for the case where the flushed texture 4584 * contains only Z or only S. 4585 */ 4586 if (tex->flushed_depth_texture->buffer.b.b.format != tex->buffer.b.b.format) 4587 pipe_format = tex->flushed_depth_texture->buffer.b.b.format; 4588 4589 tex = tex->flushed_depth_texture; 4590 } 4591 4592 surflevel = tex->surface.u.legacy.level; 4593 4594 if (tex->db_compatible) { 4595 if (!view->is_stencil_sampler) 4596 pipe_format = tex->db_render_format; 4597 4598 switch (pipe_format) { 4599 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 4600 pipe_format = PIPE_FORMAT_Z32_FLOAT; 4601 break; 4602 case PIPE_FORMAT_X8Z24_UNORM: 4603 case PIPE_FORMAT_S8_UINT_Z24_UNORM: 4604 /* Z24 is always stored like this for DB 4605 * compatibility. 4606 */ 4607 pipe_format = PIPE_FORMAT_Z24X8_UNORM; 4608 break; 4609 case PIPE_FORMAT_X24S8_UINT: 4610 case PIPE_FORMAT_S8X24_UINT: 4611 case PIPE_FORMAT_X32_S8X24_UINT: 4612 pipe_format = PIPE_FORMAT_S8_UINT; 4613 surflevel = tex->surface.u.legacy.zs.stencil_level; 4614 break; 4615 default:; 4616 } 4617 } 4618 4619 view->dcc_incompatible = 4620 vi_dcc_formats_are_incompatible(texture, state->u.tex.first_level, state->format); 4621 4622 sctx->screen->make_texture_descriptor( 4623 sctx->screen, tex, true, state->target, pipe_format, state_swizzle, 4624 state->u.tex.first_level, state->u.tex.last_level, 4625 state->u.tex.first_layer, last_layer, texture->width0, texture->height0, texture->depth0, 4626 view->state, view->fmask_state); 4627 4628 view->base_level_info = &surflevel[0]; 4629 view->block_width = util_format_get_blockwidth(pipe_format); 4630 return &view->base; 4631} 4632 4633static void si_sampler_view_destroy(struct pipe_context *ctx, struct pipe_sampler_view *state) 4634{ 4635 struct si_sampler_view *view = (struct si_sampler_view *)state; 4636 4637 pipe_resource_reference(&state->texture, NULL); 4638 FREE_CL(view); 4639} 4640 4641static bool wrap_mode_uses_border_color(unsigned wrap, bool linear_filter) 4642{ 4643 return wrap == PIPE_TEX_WRAP_CLAMP_TO_BORDER || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER || 4644 (linear_filter && (wrap == PIPE_TEX_WRAP_CLAMP || wrap == PIPE_TEX_WRAP_MIRROR_CLAMP)); 4645} 4646 4647static uint32_t si_translate_border_color(struct si_context *sctx, 4648 const struct pipe_sampler_state *state, 4649 const union pipe_color_union *color, bool is_integer) 4650{ 4651 bool linear_filter = state->min_img_filter != PIPE_TEX_FILTER_NEAREST || 4652 state->mag_img_filter != PIPE_TEX_FILTER_NEAREST; 4653 4654 if (!wrap_mode_uses_border_color(state->wrap_s, linear_filter) && 4655 !wrap_mode_uses_border_color(state->wrap_t, linear_filter) && 4656 !wrap_mode_uses_border_color(state->wrap_r, linear_filter)) 4657 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4658 4659#define simple_border_types(elt) \ 4660 do { \ 4661 if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 0) \ 4662 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); \ 4663 if (color->elt[0] == 0 && color->elt[1] == 0 && color->elt[2] == 0 && color->elt[3] == 1) \ 4664 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK); \ 4665 if (color->elt[0] == 1 && color->elt[1] == 1 && color->elt[2] == 1 && color->elt[3] == 1) \ 4666 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE); \ 4667 } while (false) 4668 4669 if (is_integer) 4670 simple_border_types(ui); 4671 else 4672 simple_border_types(f); 4673 4674#undef simple_border_types 4675 4676 int i; 4677 4678 /* Check if the border has been uploaded already. */ 4679 for (i = 0; i < sctx->border_color_count; i++) 4680 if (memcmp(&sctx->border_color_table[i], color, sizeof(*color)) == 0) 4681 break; 4682 4683 if (i >= SI_MAX_BORDER_COLORS) { 4684 /* Getting 4096 unique border colors is very unlikely. */ 4685 static bool printed; 4686 if (!printed) { 4687 fprintf(stderr, "radeonsi: The border color table is full. " 4688 "Any new border colors will be just black. " 4689 "This is a hardware limitation.\n"); 4690 printed = true; 4691 } 4692 return S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK); 4693 } 4694 4695 if (i == sctx->border_color_count) { 4696 /* Upload a new border color. */ 4697 memcpy(&sctx->border_color_table[i], color, sizeof(*color)); 4698 util_memcpy_cpu_to_le32(&sctx->border_color_map[i], color, sizeof(*color)); 4699 sctx->border_color_count++; 4700 } 4701 4702 return (sctx->screen->info.gfx_level >= GFX11 ? S_008F3C_BORDER_COLOR_PTR_GFX11(i): 4703 S_008F3C_BORDER_COLOR_PTR_GFX6(i)) | 4704 S_008F3C_BORDER_COLOR_TYPE(V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER); 4705} 4706 4707static inline int S_FIXED(float value, unsigned frac_bits) 4708{ 4709 return value * (1 << frac_bits); 4710} 4711 4712static inline unsigned si_tex_filter(unsigned filter, unsigned max_aniso) 4713{ 4714 if (filter == PIPE_TEX_FILTER_LINEAR) 4715 return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR 4716 : V_008F38_SQ_TEX_XY_FILTER_BILINEAR; 4717 else 4718 return max_aniso > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT 4719 : V_008F38_SQ_TEX_XY_FILTER_POINT; 4720} 4721 4722static inline unsigned si_tex_aniso_filter(unsigned filter) 4723{ 4724 if (filter < 2) 4725 return 0; 4726 if (filter < 4) 4727 return 1; 4728 if (filter < 8) 4729 return 2; 4730 if (filter < 16) 4731 return 3; 4732 return 4; 4733} 4734 4735static void *si_create_sampler_state(struct pipe_context *ctx, 4736 const struct pipe_sampler_state *state) 4737{ 4738 struct si_context *sctx = (struct si_context *)ctx; 4739 struct si_screen *sscreen = sctx->screen; 4740 struct si_sampler_state *rstate = CALLOC_STRUCT(si_sampler_state); 4741 unsigned max_aniso = sscreen->force_aniso >= 0 ? sscreen->force_aniso : state->max_anisotropy; 4742 unsigned max_aniso_ratio = si_tex_aniso_filter(max_aniso); 4743 bool trunc_coord = state->min_img_filter == PIPE_TEX_FILTER_NEAREST && 4744 state->mag_img_filter == PIPE_TEX_FILTER_NEAREST && 4745 state->compare_mode == PIPE_TEX_COMPARE_NONE; 4746 union pipe_color_union clamped_border_color; 4747 4748 if (!rstate) { 4749 return NULL; 4750 } 4751 4752 /* Validate inputs. */ 4753 if (!is_wrap_mode_legal(sscreen, state->wrap_s) || 4754 !is_wrap_mode_legal(sscreen, state->wrap_t) || 4755 !is_wrap_mode_legal(sscreen, state->wrap_r) || 4756 (!sscreen->info.has_3d_cube_border_color_mipmap && 4757 (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE || 4758 state->max_anisotropy > 0))) { 4759 assert(0); 4760 return NULL; 4761 } 4762 4763#ifndef NDEBUG 4764 rstate->magic = SI_SAMPLER_STATE_MAGIC; 4765#endif 4766 rstate->val[0] = 4767 (S_008F30_CLAMP_X(si_tex_wrap(state->wrap_s)) | S_008F30_CLAMP_Y(si_tex_wrap(state->wrap_t)) | 4768 S_008F30_CLAMP_Z(si_tex_wrap(state->wrap_r)) | S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 4769 S_008F30_DEPTH_COMPARE_FUNC(si_tex_compare(state->compare_func)) | 4770 S_008F30_FORCE_UNNORMALIZED(!state->normalized_coords) | 4771 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) | 4772 S_008F30_DISABLE_CUBE_WRAP(!state->seamless_cube_map) | 4773 S_008F30_TRUNC_COORD(trunc_coord) | 4774 S_008F30_COMPAT_MODE(sctx->gfx_level == GFX8 || sctx->gfx_level == GFX9)); 4775 rstate->val[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(state->min_lod, 0, 15), 8)) | 4776 S_008F34_MAX_LOD(S_FIXED(CLAMP(state->max_lod, 0, 15), 8)) | 4777 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 4778 rstate->val[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(state->lod_bias, -16, 16), 8)) | 4779 S_008F38_XY_MAG_FILTER(si_tex_filter(state->mag_img_filter, max_aniso)) | 4780 S_008F38_XY_MIN_FILTER(si_tex_filter(state->min_img_filter, max_aniso)) | 4781 S_008F38_MIP_FILTER(si_tex_mipfilter(state->min_mip_filter))); 4782 rstate->val[3] = si_translate_border_color(sctx, state, &state->border_color, 4783 state->border_color_is_integer); 4784 4785 if (sscreen->info.gfx_level >= GFX10) { 4786 rstate->val[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1); 4787 } else { 4788 rstate->val[2] |= S_008F38_DISABLE_LSB_CEIL(sctx->gfx_level <= GFX8) | 4789 S_008F38_FILTER_PREC_FIX(1) | 4790 S_008F38_ANISO_OVERRIDE_GFX8(sctx->gfx_level >= GFX8); 4791 } 4792 4793 /* Create sampler resource for upgraded depth textures. */ 4794 memcpy(rstate->upgraded_depth_val, rstate->val, sizeof(rstate->val)); 4795 4796 for (unsigned i = 0; i < 4; ++i) { 4797 /* Use channel 0 on purpose, so that we can use OPAQUE_WHITE 4798 * when the border color is 1.0. */ 4799 clamped_border_color.f[i] = CLAMP(state->border_color.f[0], 0, 1); 4800 } 4801 4802 if (memcmp(&state->border_color, &clamped_border_color, sizeof(clamped_border_color)) == 0) { 4803 if (sscreen->info.gfx_level <= GFX9) 4804 rstate->upgraded_depth_val[3] |= S_008F3C_UPGRADED_DEPTH(1); 4805 } else { 4806 rstate->upgraded_depth_val[3] = 4807 si_translate_border_color(sctx, state, &clamped_border_color, false); 4808 } 4809 4810 return rstate; 4811} 4812 4813static void si_set_sample_mask(struct pipe_context *ctx, unsigned sample_mask) 4814{ 4815 struct si_context *sctx = (struct si_context *)ctx; 4816 4817 if (sctx->sample_mask == (uint16_t)sample_mask) 4818 return; 4819 4820 sctx->sample_mask = sample_mask; 4821 si_mark_atom_dirty(sctx, &sctx->atoms.s.sample_mask); 4822} 4823 4824static void si_emit_sample_mask(struct si_context *sctx) 4825{ 4826 struct radeon_cmdbuf *cs = &sctx->gfx_cs; 4827 unsigned mask = sctx->sample_mask; 4828 4829 /* Needed for line and polygon smoothing as well as for the Polaris 4830 * small primitive filter. We expect the gallium frontend to take care of 4831 * this for us. 4832 */ 4833 assert(mask == 0xffff || sctx->framebuffer.nr_samples > 1 || 4834 (mask & 1 && sctx->blitter_running)); 4835 4836 radeon_begin(cs); 4837 radeon_set_context_reg_seq(R_028C38_PA_SC_AA_MASK_X0Y0_X1Y0, 2); 4838 radeon_emit(mask | (mask << 16)); 4839 radeon_emit(mask | (mask << 16)); 4840 radeon_end(); 4841} 4842 4843static void si_delete_sampler_state(struct pipe_context *ctx, void *state) 4844{ 4845#ifndef NDEBUG 4846 struct si_sampler_state *s = state; 4847 4848 assert(s->magic == SI_SAMPLER_STATE_MAGIC); 4849 s->magic = 0; 4850#endif 4851 free(state); 4852} 4853 4854/* 4855 * Vertex elements & buffers 4856 */ 4857 4858struct si_fast_udiv_info32 si_compute_fast_udiv_info32(uint32_t D, unsigned num_bits) 4859{ 4860 struct util_fast_udiv_info info = util_compute_fast_udiv_info(D, num_bits, 32); 4861 4862 struct si_fast_udiv_info32 result = { 4863 info.multiplier, 4864 info.pre_shift, 4865 info.post_shift, 4866 info.increment, 4867 }; 4868 return result; 4869} 4870 4871static void *si_create_vertex_elements(struct pipe_context *ctx, unsigned count, 4872 const struct pipe_vertex_element *elements) 4873{ 4874 struct si_screen *sscreen = (struct si_screen *)ctx->screen; 4875 struct si_vertex_elements *v = CALLOC_STRUCT(si_vertex_elements); 4876 bool used[SI_NUM_VERTEX_BUFFERS] = {}; 4877 struct si_fast_udiv_info32 divisor_factors[SI_MAX_ATTRIBS] = {}; 4878 STATIC_ASSERT(sizeof(struct si_fast_udiv_info32) == 16); 4879 STATIC_ASSERT(sizeof(divisor_factors[0].multiplier) == 4); 4880 STATIC_ASSERT(sizeof(divisor_factors[0].pre_shift) == 4); 4881 STATIC_ASSERT(sizeof(divisor_factors[0].post_shift) == 4); 4882 STATIC_ASSERT(sizeof(divisor_factors[0].increment) == 4); 4883 int i; 4884 4885 assert(count <= SI_MAX_ATTRIBS); 4886 if (!v) 4887 return NULL; 4888 4889 v->count = count; 4890 4891 unsigned num_vbos_in_user_sgprs = si_num_vbos_in_user_sgprs(sscreen); 4892 unsigned alloc_count = 4893 count > num_vbos_in_user_sgprs ? count - num_vbos_in_user_sgprs : 0; 4894 v->vb_desc_list_alloc_size = align(alloc_count * 16, SI_CPDMA_ALIGNMENT); 4895 4896 for (i = 0; i < count; ++i) { 4897 const struct util_format_description *desc; 4898 const struct util_format_channel_description *channel; 4899 int first_non_void; 4900 unsigned vbo_index = elements[i].vertex_buffer_index; 4901 4902 if (vbo_index >= SI_NUM_VERTEX_BUFFERS) { 4903 FREE(v); 4904 return NULL; 4905 } 4906 4907 unsigned instance_divisor = elements[i].instance_divisor; 4908 if (instance_divisor) { 4909 if (instance_divisor == 1) { 4910 v->instance_divisor_is_one |= 1u << i; 4911 } else { 4912 v->instance_divisor_is_fetched |= 1u << i; 4913 divisor_factors[i] = si_compute_fast_udiv_info32(instance_divisor, 32); 4914 } 4915 } 4916 4917 if (!used[vbo_index]) { 4918 v->first_vb_use_mask |= 1 << i; 4919 used[vbo_index] = true; 4920 } 4921 4922 desc = util_format_description(elements[i].src_format); 4923 first_non_void = util_format_get_first_non_void_channel(elements[i].src_format); 4924 channel = first_non_void >= 0 ? &desc->channel[first_non_void] : NULL; 4925 4926 v->format_size[i] = desc->block.bits / 8; 4927 v->src_offset[i] = elements[i].src_offset; 4928 v->vertex_buffer_index[i] = vbo_index; 4929 4930 bool always_fix = false; 4931 union si_vs_fix_fetch fix_fetch; 4932 unsigned log_hw_load_size; /* the load element size as seen by the hardware */ 4933 4934 fix_fetch.bits = 0; 4935 log_hw_load_size = MIN2(2, util_logbase2(desc->block.bits) - 3); 4936 4937 if (channel) { 4938 switch (channel->type) { 4939 case UTIL_FORMAT_TYPE_FLOAT: 4940 fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT; 4941 break; 4942 case UTIL_FORMAT_TYPE_FIXED: 4943 fix_fetch.u.format = AC_FETCH_FORMAT_FIXED; 4944 break; 4945 case UTIL_FORMAT_TYPE_SIGNED: { 4946 if (channel->pure_integer) 4947 fix_fetch.u.format = AC_FETCH_FORMAT_SINT; 4948 else if (channel->normalized) 4949 fix_fetch.u.format = AC_FETCH_FORMAT_SNORM; 4950 else 4951 fix_fetch.u.format = AC_FETCH_FORMAT_SSCALED; 4952 break; 4953 } 4954 case UTIL_FORMAT_TYPE_UNSIGNED: { 4955 if (channel->pure_integer) 4956 fix_fetch.u.format = AC_FETCH_FORMAT_UINT; 4957 else if (channel->normalized) 4958 fix_fetch.u.format = AC_FETCH_FORMAT_UNORM; 4959 else 4960 fix_fetch.u.format = AC_FETCH_FORMAT_USCALED; 4961 break; 4962 } 4963 default: 4964 unreachable("bad format type"); 4965 } 4966 } else { 4967 switch (elements[i].src_format) { 4968 case PIPE_FORMAT_R11G11B10_FLOAT: 4969 fix_fetch.u.format = AC_FETCH_FORMAT_FLOAT; 4970 break; 4971 default: 4972 unreachable("bad other format"); 4973 } 4974 } 4975 4976 if (desc->channel[0].size == 10) { 4977 fix_fetch.u.log_size = 3; /* special encoding for 2_10_10_10 */ 4978 log_hw_load_size = 2; 4979 4980 /* The hardware always treats the 2-bit alpha channel as 4981 * unsigned, so a shader workaround is needed. The affected 4982 * chips are GFX8 and older except Stoney (GFX8.1). 4983 */ 4984 always_fix = sscreen->info.gfx_level <= GFX8 && sscreen->info.family != CHIP_STONEY && 4985 channel->type == UTIL_FORMAT_TYPE_SIGNED; 4986 } else if (elements[i].src_format == PIPE_FORMAT_R11G11B10_FLOAT) { 4987 fix_fetch.u.log_size = 3; /* special encoding */ 4988 fix_fetch.u.format = AC_FETCH_FORMAT_FIXED; 4989 log_hw_load_size = 2; 4990 } else { 4991 fix_fetch.u.log_size = util_logbase2(channel->size) - 3; 4992 fix_fetch.u.num_channels_m1 = desc->nr_channels - 1; 4993 4994 /* Always fix up: 4995 * - doubles (multiple loads + truncate to float) 4996 * - 32-bit requiring a conversion 4997 */ 4998 always_fix = (fix_fetch.u.log_size == 3) || 4999 (fix_fetch.u.log_size == 2 && fix_fetch.u.format != AC_FETCH_FORMAT_FLOAT && 5000 fix_fetch.u.format != AC_FETCH_FORMAT_UINT && 5001 fix_fetch.u.format != AC_FETCH_FORMAT_SINT); 5002 5003 /* Also fixup 8_8_8 and 16_16_16. */ 5004 if (desc->nr_channels == 3 && fix_fetch.u.log_size <= 1) { 5005 always_fix = true; 5006 log_hw_load_size = fix_fetch.u.log_size; 5007 } 5008 } 5009 5010 if (desc->swizzle[0] != PIPE_SWIZZLE_X) { 5011 assert(desc->swizzle[0] == PIPE_SWIZZLE_Z && 5012 (desc->swizzle[2] == PIPE_SWIZZLE_X || desc->swizzle[2] == PIPE_SWIZZLE_0)); 5013 fix_fetch.u.reverse = 1; 5014 } 5015 5016 /* Force the workaround for unaligned access here already if the 5017 * offset relative to the vertex buffer base is unaligned. 5018 * 5019 * There is a theoretical case in which this is too conservative: 5020 * if the vertex buffer's offset is also unaligned in just the 5021 * right way, we end up with an aligned address after all. 5022 * However, this case should be extremely rare in practice (it 5023 * won't happen in well-behaved applications), and taking it 5024 * into account would complicate the fast path (where everything 5025 * is nicely aligned). 5026 */ 5027 bool check_alignment = 5028 log_hw_load_size >= 1 && 5029 (sscreen->info.gfx_level == GFX6 || sscreen->info.gfx_level >= GFX10); 5030 bool opencode = sscreen->options.vs_fetch_always_opencode; 5031 5032 if (check_alignment && (elements[i].src_offset & ((1 << log_hw_load_size) - 1)) != 0) 5033 opencode = true; 5034 5035 if (always_fix || check_alignment || opencode) 5036 v->fix_fetch[i] = fix_fetch.bits; 5037 5038 if (opencode) 5039 v->fix_fetch_opencode |= 1 << i; 5040 if (opencode || always_fix) 5041 v->fix_fetch_always |= 1 << i; 5042 5043 if (check_alignment && !opencode) { 5044 assert(log_hw_load_size == 1 || log_hw_load_size == 2); 5045 5046 v->fix_fetch_unaligned |= 1 << i; 5047 v->hw_load_is_dword |= (log_hw_load_size - 1) << i; 5048 v->vb_alignment_check_mask |= 1 << vbo_index; 5049 } 5050 5051 v->rsrc_word3[i] = S_008F0C_DST_SEL_X(si_map_swizzle(desc->swizzle[0])) | 5052 S_008F0C_DST_SEL_Y(si_map_swizzle(desc->swizzle[1])) | 5053 S_008F0C_DST_SEL_Z(si_map_swizzle(desc->swizzle[2])) | 5054 S_008F0C_DST_SEL_W(si_map_swizzle(desc->swizzle[3])); 5055 5056 if (sscreen->info.gfx_level >= GFX10) { 5057 const struct gfx10_format *fmt = &ac_get_gfx10_format_table(&sscreen->info)[elements[i].src_format]; 5058 assert(fmt->img_format != 0 && fmt->img_format < 128); 5059 v->rsrc_word3[i] |= S_008F0C_FORMAT(fmt->img_format) | 5060 S_008F0C_RESOURCE_LEVEL(sscreen->info.gfx_level < GFX11); 5061 } else { 5062 unsigned data_format, num_format; 5063 data_format = si_translate_buffer_dataformat(ctx->screen, desc, first_non_void); 5064 num_format = si_translate_buffer_numformat(ctx->screen, desc, first_non_void); 5065 v->rsrc_word3[i] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format); 5066 } 5067 } 5068 5069 if (v->instance_divisor_is_fetched) { 5070 unsigned num_divisors = util_last_bit(v->instance_divisor_is_fetched); 5071 5072 v->instance_divisor_factor_buffer = (struct si_resource *)pipe_buffer_create( 5073 &sscreen->b, 0, PIPE_USAGE_DEFAULT, num_divisors * sizeof(divisor_factors[0])); 5074 if (!v->instance_divisor_factor_buffer) { 5075 FREE(v); 5076 return NULL; 5077 } 5078 void *map = 5079 sscreen->ws->buffer_map(sscreen->ws, v->instance_divisor_factor_buffer->buf, NULL, PIPE_MAP_WRITE); 5080 memcpy(map, divisor_factors, num_divisors * sizeof(divisor_factors[0])); 5081 } 5082 return v; 5083} 5084 5085static void si_bind_vertex_elements(struct pipe_context *ctx, void *state) 5086{ 5087 struct si_context *sctx = (struct si_context *)ctx; 5088 struct si_vertex_elements *old = sctx->vertex_elements; 5089 struct si_vertex_elements *v = (struct si_vertex_elements *)state; 5090 5091 if (!v) 5092 v = sctx->no_velems_state; 5093 5094 sctx->vertex_elements = v; 5095 sctx->num_vertex_elements = v->count; 5096 5097 if (sctx->num_vertex_elements) { 5098 sctx->vertex_buffers_dirty = true; 5099 } else { 5100 sctx->vertex_buffers_dirty = false; 5101 sctx->vertex_buffer_pointer_dirty = false; 5102 sctx->vertex_buffer_user_sgprs_dirty = false; 5103 } 5104 5105 if (old->instance_divisor_is_one != v->instance_divisor_is_one || 5106 old->instance_divisor_is_fetched != v->instance_divisor_is_fetched || 5107 (old->vb_alignment_check_mask ^ v->vb_alignment_check_mask) & 5108 sctx->vertex_buffer_unaligned || 5109 ((v->vb_alignment_check_mask & sctx->vertex_buffer_unaligned) && 5110 memcmp(old->vertex_buffer_index, v->vertex_buffer_index, 5111 sizeof(v->vertex_buffer_index[0]) * MAX2(old->count, v->count))) || 5112 /* fix_fetch_{always,opencode,unaligned} and hw_load_is_dword are 5113 * functions of fix_fetch and the src_offset alignment. 5114 * If they change and fix_fetch doesn't, it must be due to different 5115 * src_offset alignment, which is reflected in fix_fetch_opencode. */ 5116 old->fix_fetch_opencode != v->fix_fetch_opencode || 5117 memcmp(old->fix_fetch, v->fix_fetch, sizeof(v->fix_fetch[0]) * 5118 MAX2(old->count, v->count))) { 5119 si_vs_key_update_inputs(sctx); 5120 sctx->do_update_shaders = true; 5121 } 5122 5123 if (v->instance_divisor_is_fetched) { 5124 struct pipe_constant_buffer cb; 5125 5126 cb.buffer = &v->instance_divisor_factor_buffer->b.b; 5127 cb.user_buffer = NULL; 5128 cb.buffer_offset = 0; 5129 cb.buffer_size = 0xffffffff; 5130 si_set_internal_const_buffer(sctx, SI_VS_CONST_INSTANCE_DIVISORS, &cb); 5131 } 5132} 5133 5134static void si_delete_vertex_element(struct pipe_context *ctx, void *state) 5135{ 5136 struct si_context *sctx = (struct si_context *)ctx; 5137 struct si_vertex_elements *v = (struct si_vertex_elements *)state; 5138 5139 if (sctx->vertex_elements == state) 5140 si_bind_vertex_elements(ctx, sctx->no_velems_state); 5141 5142 si_resource_reference(&v->instance_divisor_factor_buffer, NULL); 5143 FREE(state); 5144} 5145 5146static void si_set_vertex_buffers(struct pipe_context *ctx, unsigned start_slot, unsigned count, 5147 unsigned unbind_num_trailing_slots, bool take_ownership, 5148 const struct pipe_vertex_buffer *buffers) 5149{ 5150 struct si_context *sctx = (struct si_context *)ctx; 5151 struct pipe_vertex_buffer *dst = sctx->vertex_buffer + start_slot; 5152 unsigned updated_mask = u_bit_consecutive(start_slot, count + unbind_num_trailing_slots); 5153 uint32_t orig_unaligned = sctx->vertex_buffer_unaligned; 5154 uint32_t unaligned = 0; 5155 int i; 5156 5157 assert(start_slot + count + unbind_num_trailing_slots <= ARRAY_SIZE(sctx->vertex_buffer)); 5158 5159 if (buffers) { 5160 if (take_ownership) { 5161 for (i = 0; i < count; i++) { 5162 const struct pipe_vertex_buffer *src = buffers + i; 5163 struct pipe_vertex_buffer *dsti = dst + i; 5164 struct pipe_resource *buf = src->buffer.resource; 5165 unsigned slot_bit = 1 << (start_slot + i); 5166 5167 /* Only unreference bound vertex buffers. (take_ownership) */ 5168 pipe_resource_reference(&dsti->buffer.resource, NULL); 5169 5170 if (src->buffer_offset & 3 || src->stride & 3) 5171 unaligned |= slot_bit; 5172 5173 si_context_add_resource_size(sctx, buf); 5174 if (buf) 5175 si_resource(buf)->bind_history |= SI_BIND_VERTEX_BUFFER; 5176 } 5177 /* take_ownership allows us to copy pipe_resource pointers without refcounting. */ 5178 memcpy(dst, buffers, count * sizeof(struct pipe_vertex_buffer)); 5179 } else { 5180 for (i = 0; i < count; i++) { 5181 const struct pipe_vertex_buffer *src = buffers + i; 5182 struct pipe_vertex_buffer *dsti = dst + i; 5183 struct pipe_resource *buf = src->buffer.resource; 5184 unsigned slot_bit = 1 << (start_slot + i); 5185 5186 pipe_resource_reference(&dsti->buffer.resource, buf); 5187 dsti->buffer_offset = src->buffer_offset; 5188 dsti->stride = src->stride; 5189 5190 if (dsti->buffer_offset & 3 || dsti->stride & 3) 5191 unaligned |= slot_bit; 5192 5193 si_context_add_resource_size(sctx, buf); 5194 if (buf) 5195 si_resource(buf)->bind_history |= SI_BIND_VERTEX_BUFFER; 5196 } 5197 } 5198 } else { 5199 for (i = 0; i < count; i++) 5200 pipe_resource_reference(&dst[i].buffer.resource, NULL); 5201 } 5202 5203 for (i = 0; i < unbind_num_trailing_slots; i++) 5204 pipe_resource_reference(&dst[count + i].buffer.resource, NULL); 5205 5206 sctx->vertex_buffers_dirty = sctx->num_vertex_elements > 0; 5207 sctx->vertex_buffer_unaligned = (orig_unaligned & ~updated_mask) | unaligned; 5208 5209 /* Check whether alignment may have changed in a way that requires 5210 * shader changes. This check is conservative: a vertex buffer can only 5211 * trigger a shader change if the misalignment amount changes (e.g. 5212 * from byte-aligned to short-aligned), but we only keep track of 5213 * whether buffers are at least dword-aligned, since that should always 5214 * be the case in well-behaved applications anyway. 5215 */ 5216 if ((sctx->vertex_elements->vb_alignment_check_mask & 5217 (unaligned | orig_unaligned) & updated_mask)) { 5218 si_vs_key_update_inputs(sctx); 5219 sctx->do_update_shaders = true; 5220 } 5221} 5222 5223static struct pipe_vertex_state * 5224si_create_vertex_state(struct pipe_screen *screen, 5225 struct pipe_vertex_buffer *buffer, 5226 const struct pipe_vertex_element *elements, 5227 unsigned num_elements, 5228 struct pipe_resource *indexbuf, 5229 uint32_t full_velem_mask) 5230{ 5231 struct si_screen *sscreen = (struct si_screen *)screen; 5232 struct si_vertex_state *state = CALLOC_STRUCT(si_vertex_state); 5233 5234 util_init_pipe_vertex_state(screen, buffer, elements, num_elements, indexbuf, full_velem_mask, 5235 &state->b); 5236 5237 /* Initialize the vertex element state in state->element. 5238 * Do it by creating a vertex element state object and copying it there. 5239 */ 5240 struct si_context ctx = {}; 5241 ctx.b.screen = screen; 5242 struct si_vertex_elements *velems = si_create_vertex_elements(&ctx.b, num_elements, elements); 5243 state->velems = *velems; 5244 si_delete_vertex_element(&ctx.b, velems); 5245 5246 assert(!state->velems.instance_divisor_is_one); 5247 assert(!state->velems.instance_divisor_is_fetched); 5248 assert(!state->velems.fix_fetch_always); 5249 assert(buffer->stride % 4 == 0); 5250 assert(buffer->buffer_offset % 4 == 0); 5251 assert(!buffer->is_user_buffer); 5252 for (unsigned i = 0; i < num_elements; i++) { 5253 assert(elements[i].src_offset % 4 == 0); 5254 assert(!elements[i].dual_slot); 5255 } 5256 5257 for (unsigned i = 0; i < num_elements; i++) { 5258 si_set_vertex_buffer_descriptor(sscreen, &state->velems, &state->b.input.vbuffer, i, 5259 &state->descriptors[i * 4]); 5260 } 5261 5262 return &state->b; 5263} 5264 5265static void si_vertex_state_destroy(struct pipe_screen *screen, 5266 struct pipe_vertex_state *state) 5267{ 5268 pipe_vertex_buffer_unreference(&state->input.vbuffer); 5269 pipe_resource_reference(&state->input.indexbuf, NULL); 5270 FREE(state); 5271} 5272 5273static struct pipe_vertex_state * 5274si_pipe_create_vertex_state(struct pipe_screen *screen, 5275 struct pipe_vertex_buffer *buffer, 5276 const struct pipe_vertex_element *elements, 5277 unsigned num_elements, 5278 struct pipe_resource *indexbuf, 5279 uint32_t full_velem_mask) 5280{ 5281 struct si_screen *sscreen = (struct si_screen *)screen; 5282 5283 return util_vertex_state_cache_get(screen, buffer, elements, num_elements, indexbuf, 5284 full_velem_mask, &sscreen->vertex_state_cache); 5285} 5286 5287static void si_pipe_vertex_state_destroy(struct pipe_screen *screen, 5288 struct pipe_vertex_state *state) 5289{ 5290 struct si_screen *sscreen = (struct si_screen *)screen; 5291 5292 util_vertex_state_destroy(screen, &sscreen->vertex_state_cache, state); 5293} 5294 5295/* 5296 * Misc 5297 */ 5298 5299static void si_set_tess_state(struct pipe_context *ctx, const float default_outer_level[4], 5300 const float default_inner_level[2]) 5301{ 5302 struct si_context *sctx = (struct si_context *)ctx; 5303 struct pipe_constant_buffer cb; 5304 float array[8]; 5305 5306 memcpy(array, default_outer_level, sizeof(float) * 4); 5307 memcpy(array + 4, default_inner_level, sizeof(float) * 2); 5308 5309 cb.buffer = NULL; 5310 cb.user_buffer = array; 5311 cb.buffer_offset = 0; 5312 cb.buffer_size = sizeof(array); 5313 5314 si_set_internal_const_buffer(sctx, SI_HS_CONST_DEFAULT_TESS_LEVELS, &cb); 5315} 5316 5317static void si_set_patch_vertices(struct pipe_context *ctx, uint8_t patch_vertices) 5318{ 5319 struct si_context *sctx = (struct si_context *)ctx; 5320 5321 sctx->patch_vertices = patch_vertices; 5322} 5323 5324static void si_texture_barrier(struct pipe_context *ctx, unsigned flags) 5325{ 5326 struct si_context *sctx = (struct si_context *)ctx; 5327 5328 si_update_fb_dirtiness_after_rendering(sctx); 5329 5330 /* Multisample surfaces are flushed in si_decompress_textures. */ 5331 if (sctx->framebuffer.uncompressed_cb_mask) { 5332 si_make_CB_shader_coherent(sctx, sctx->framebuffer.nr_samples, 5333 sctx->framebuffer.CB_has_shader_readable_metadata, 5334 sctx->framebuffer.all_DCC_pipe_aligned); 5335 } 5336} 5337 5338/* This only ensures coherency for shader image/buffer stores. */ 5339static void si_memory_barrier(struct pipe_context *ctx, unsigned flags) 5340{ 5341 struct si_context *sctx = (struct si_context *)ctx; 5342 5343 if (!(flags & ~PIPE_BARRIER_UPDATE)) 5344 return; 5345 5346 /* Subsequent commands must wait for all shader invocations to 5347 * complete. */ 5348 sctx->flags |= SI_CONTEXT_PS_PARTIAL_FLUSH | SI_CONTEXT_CS_PARTIAL_FLUSH | 5349 SI_CONTEXT_PFP_SYNC_ME; 5350 5351 if (flags & PIPE_BARRIER_CONSTANT_BUFFER) 5352 sctx->flags |= SI_CONTEXT_INV_SCACHE | SI_CONTEXT_INV_VCACHE; 5353 5354 if (flags & (PIPE_BARRIER_VERTEX_BUFFER | PIPE_BARRIER_SHADER_BUFFER | PIPE_BARRIER_TEXTURE | 5355 PIPE_BARRIER_IMAGE | PIPE_BARRIER_STREAMOUT_BUFFER | PIPE_BARRIER_GLOBAL_BUFFER)) { 5356 /* As far as I can tell, L1 contents are written back to L2 5357 * automatically at end of shader, but the contents of other 5358 * L1 caches might still be stale. */ 5359 sctx->flags |= SI_CONTEXT_INV_VCACHE; 5360 5361 if (flags & (PIPE_BARRIER_IMAGE | PIPE_BARRIER_TEXTURE) && 5362 sctx->screen->info.tcc_rb_non_coherent) 5363 sctx->flags |= SI_CONTEXT_INV_L2; 5364 } 5365 5366 if (flags & PIPE_BARRIER_INDEX_BUFFER) { 5367 /* Indices are read through TC L2 since GFX8. 5368 * L1 isn't used. 5369 */ 5370 if (sctx->screen->info.gfx_level <= GFX7) 5371 sctx->flags |= SI_CONTEXT_WB_L2; 5372 } 5373 5374 /* MSAA color, any depth and any stencil are flushed in 5375 * si_decompress_textures when needed. 5376 */ 5377 if (flags & PIPE_BARRIER_FRAMEBUFFER && sctx->framebuffer.uncompressed_cb_mask) { 5378 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_CB; 5379 5380 if (sctx->gfx_level <= GFX8) 5381 sctx->flags |= SI_CONTEXT_WB_L2; 5382 } 5383 5384 /* Indirect buffers use TC L2 on GFX9, but not older hw. */ 5385 if (sctx->screen->info.gfx_level <= GFX8 && flags & PIPE_BARRIER_INDIRECT_BUFFER) 5386 sctx->flags |= SI_CONTEXT_WB_L2; 5387} 5388 5389static void *si_create_blend_custom(struct si_context *sctx, unsigned mode) 5390{ 5391 struct pipe_blend_state blend; 5392 5393 memset(&blend, 0, sizeof(blend)); 5394 blend.independent_blend_enable = true; 5395 blend.rt[0].colormask = 0xf; 5396 return si_create_blend_state_mode(&sctx->b, &blend, mode); 5397} 5398 5399void si_init_state_compute_functions(struct si_context *sctx) 5400{ 5401 sctx->b.create_sampler_state = si_create_sampler_state; 5402 sctx->b.delete_sampler_state = si_delete_sampler_state; 5403 sctx->b.create_sampler_view = si_create_sampler_view; 5404 sctx->b.sampler_view_destroy = si_sampler_view_destroy; 5405 sctx->b.memory_barrier = si_memory_barrier; 5406} 5407 5408void si_init_state_functions(struct si_context *sctx) 5409{ 5410 sctx->atoms.s.framebuffer.emit = si_emit_framebuffer_state; 5411 sctx->atoms.s.msaa_sample_locs.emit = si_emit_msaa_sample_locs; 5412 sctx->atoms.s.db_render_state.emit = si_emit_db_render_state; 5413 sctx->atoms.s.dpbb_state.emit = si_emit_dpbb_state; 5414 sctx->atoms.s.msaa_config.emit = si_emit_msaa_config; 5415 sctx->atoms.s.sample_mask.emit = si_emit_sample_mask; 5416 sctx->atoms.s.cb_render_state.emit = si_emit_cb_render_state; 5417 sctx->atoms.s.blend_color.emit = si_emit_blend_color; 5418 sctx->atoms.s.clip_regs.emit = si_emit_clip_regs; 5419 sctx->atoms.s.clip_state.emit = si_emit_clip_state; 5420 sctx->atoms.s.stencil_ref.emit = si_emit_stencil_ref; 5421 5422 sctx->b.create_blend_state = si_create_blend_state; 5423 sctx->b.bind_blend_state = si_bind_blend_state; 5424 sctx->b.delete_blend_state = si_delete_blend_state; 5425 sctx->b.set_blend_color = si_set_blend_color; 5426 5427 sctx->b.create_rasterizer_state = si_create_rs_state; 5428 sctx->b.bind_rasterizer_state = si_bind_rs_state; 5429 sctx->b.delete_rasterizer_state = si_delete_rs_state; 5430 5431 sctx->b.create_depth_stencil_alpha_state = si_create_dsa_state; 5432 sctx->b.bind_depth_stencil_alpha_state = si_bind_dsa_state; 5433 sctx->b.delete_depth_stencil_alpha_state = si_delete_dsa_state; 5434 5435 sctx->custom_dsa_flush = si_create_db_flush_dsa(sctx); 5436 5437 if (sctx->gfx_level < GFX11) { 5438 sctx->custom_blend_resolve = si_create_blend_custom(sctx, V_028808_CB_RESOLVE); 5439 sctx->custom_blend_fmask_decompress = si_create_blend_custom(sctx, V_028808_CB_FMASK_DECOMPRESS); 5440 sctx->custom_blend_eliminate_fastclear = 5441 si_create_blend_custom(sctx, V_028808_CB_ELIMINATE_FAST_CLEAR); 5442 } 5443 5444 sctx->custom_blend_dcc_decompress = 5445 si_create_blend_custom(sctx, sctx->gfx_level >= GFX11 ? 5446 V_028808_CB_DCC_DECOMPRESS_GFX11 : 5447 V_028808_CB_DCC_DECOMPRESS_GFX8); 5448 5449 sctx->b.set_clip_state = si_set_clip_state; 5450 sctx->b.set_stencil_ref = si_set_stencil_ref; 5451 5452 sctx->b.set_framebuffer_state = si_set_framebuffer_state; 5453 5454 sctx->b.set_sample_mask = si_set_sample_mask; 5455 5456 sctx->b.create_vertex_elements_state = si_create_vertex_elements; 5457 sctx->b.bind_vertex_elements_state = si_bind_vertex_elements; 5458 sctx->b.delete_vertex_elements_state = si_delete_vertex_element; 5459 sctx->b.set_vertex_buffers = si_set_vertex_buffers; 5460 5461 sctx->b.texture_barrier = si_texture_barrier; 5462 sctx->b.set_min_samples = si_set_min_samples; 5463 sctx->b.set_tess_state = si_set_tess_state; 5464 sctx->b.set_patch_vertices = si_set_patch_vertices; 5465 5466 sctx->b.set_active_query_state = si_set_active_query_state; 5467} 5468 5469void si_init_screen_state_functions(struct si_screen *sscreen) 5470{ 5471 sscreen->b.is_format_supported = si_is_format_supported; 5472 sscreen->b.create_vertex_state = si_pipe_create_vertex_state; 5473 sscreen->b.vertex_state_destroy = si_pipe_vertex_state_destroy; 5474 5475 if (sscreen->info.gfx_level >= GFX10) { 5476 sscreen->make_texture_descriptor = gfx10_make_texture_descriptor; 5477 } else { 5478 sscreen->make_texture_descriptor = si_make_texture_descriptor; 5479 } 5480 5481 util_vertex_state_cache_init(&sscreen->vertex_state_cache, 5482 si_create_vertex_state, si_vertex_state_destroy); 5483} 5484 5485static void si_set_grbm_gfx_index(struct si_context *sctx, struct si_pm4_state *pm4, unsigned value) 5486{ 5487 unsigned reg = sctx->gfx_level >= GFX7 ? R_030800_GRBM_GFX_INDEX : R_00802C_GRBM_GFX_INDEX; 5488 si_pm4_set_reg(pm4, reg, value); 5489} 5490 5491static void si_set_grbm_gfx_index_se(struct si_context *sctx, struct si_pm4_state *pm4, unsigned se) 5492{ 5493 assert(se == ~0 || se < sctx->screen->info.max_se); 5494 si_set_grbm_gfx_index(sctx, pm4, 5495 (se == ~0 ? S_030800_SE_BROADCAST_WRITES(1) : S_030800_SE_INDEX(se)) | 5496 S_030800_SH_BROADCAST_WRITES(1) | 5497 S_030800_INSTANCE_BROADCAST_WRITES(1)); 5498} 5499 5500static void si_write_harvested_raster_configs(struct si_context *sctx, struct si_pm4_state *pm4, 5501 unsigned raster_config, unsigned raster_config_1) 5502{ 5503 unsigned num_se = MAX2(sctx->screen->info.max_se, 1); 5504 unsigned raster_config_se[4]; 5505 unsigned se; 5506 5507 ac_get_harvested_configs(&sctx->screen->info, raster_config, &raster_config_1, raster_config_se); 5508 5509 for (se = 0; se < num_se; se++) { 5510 si_set_grbm_gfx_index_se(sctx, pm4, se); 5511 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config_se[se]); 5512 } 5513 si_set_grbm_gfx_index(sctx, pm4, ~0); 5514 5515 if (sctx->gfx_level >= GFX7) { 5516 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 5517 } 5518} 5519 5520static void si_set_raster_config(struct si_context *sctx, struct si_pm4_state *pm4) 5521{ 5522 struct si_screen *sscreen = sctx->screen; 5523 unsigned num_rb = MIN2(sscreen->info.max_render_backends, 16); 5524 unsigned rb_mask = sscreen->info.enabled_rb_mask; 5525 unsigned raster_config = sscreen->pa_sc_raster_config; 5526 unsigned raster_config_1 = sscreen->pa_sc_raster_config_1; 5527 5528 if (!rb_mask || util_bitcount(rb_mask) >= num_rb) { 5529 /* Always use the default config when all backends are enabled 5530 * (or when we failed to determine the enabled backends). 5531 */ 5532 si_pm4_set_reg(pm4, R_028350_PA_SC_RASTER_CONFIG, raster_config); 5533 if (sctx->gfx_level >= GFX7) 5534 si_pm4_set_reg(pm4, R_028354_PA_SC_RASTER_CONFIG_1, raster_config_1); 5535 } else { 5536 si_write_harvested_raster_configs(sctx, pm4, raster_config, raster_config_1); 5537 } 5538} 5539 5540unsigned gfx103_get_cu_mask_ps(struct si_screen *sscreen) 5541{ 5542 /* It's wasteful to enable all CUs for PS if shader arrays have a different 5543 * number of CUs. The reason is that the hardware sends the same number of PS 5544 * waves to each shader array, so the slowest shader array limits the performance. 5545 * Disable the extra CUs for PS in other shader arrays to save power and thus 5546 * increase clocks for busy CUs. In the future, we might disable or enable this 5547 * tweak only for certain apps. 5548 */ 5549 return u_bit_consecutive(0, sscreen->info.min_good_cu_per_sa); 5550} 5551 5552void si_init_cs_preamble_state(struct si_context *sctx, bool uses_reg_shadowing) 5553{ 5554 struct si_screen *sscreen = sctx->screen; 5555 uint64_t border_color_va = sctx->border_color_buffer->gpu_address; 5556 bool has_clear_state = sscreen->info.has_clear_state; 5557 5558 struct si_cs_preamble { 5559 struct si_pm4_state pm4; 5560 uint32_t more_pm4[150]; /* Add more space because the preamble is large. */ 5561 }; 5562 struct si_pm4_state *pm4 = (struct si_pm4_state *)CALLOC_STRUCT(si_cs_preamble); 5563 5564 if (!pm4) 5565 return; 5566 5567 /* Add all the space that we allocated. */ 5568 pm4->max_dw = sizeof(struct si_cs_preamble) - offsetof(struct si_cs_preamble, pm4.pm4); 5569 5570 if (!uses_reg_shadowing) { 5571 si_pm4_cmd_add(pm4, PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 5572 si_pm4_cmd_add(pm4, CC0_UPDATE_LOAD_ENABLES(1)); 5573 si_pm4_cmd_add(pm4, CC1_UPDATE_SHADOW_ENABLES(1)); 5574 5575 if (sscreen->dpbb_allowed) { 5576 si_pm4_cmd_add(pm4, PKT3(PKT3_EVENT_WRITE, 0, 0)); 5577 si_pm4_cmd_add(pm4, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0)); 5578 } 5579 5580 if (has_clear_state) { 5581 si_pm4_cmd_add(pm4, PKT3(PKT3_CLEAR_STATE, 0, 0)); 5582 si_pm4_cmd_add(pm4, 0); 5583 } 5584 } 5585 5586 /* CLEAR_STATE doesn't restore these correctly. */ 5587 si_pm4_set_reg(pm4, R_028240_PA_SC_GENERIC_SCISSOR_TL, S_028240_WINDOW_OFFSET_DISABLE(1)); 5588 si_pm4_set_reg(pm4, R_028244_PA_SC_GENERIC_SCISSOR_BR, 5589 S_028244_BR_X(16384) | S_028244_BR_Y(16384)); 5590 5591 si_pm4_set_reg(pm4, R_028A18_VGT_HOS_MAX_TESS_LEVEL, fui(64)); 5592 if (!has_clear_state) 5593 si_pm4_set_reg(pm4, R_028A1C_VGT_HOS_MIN_TESS_LEVEL, fui(0)); 5594 5595 if (!has_clear_state) { 5596 si_pm4_set_reg(pm4, R_028820_PA_CL_NANINF_CNTL, 0); 5597 si_pm4_set_reg(pm4, R_028AC0_DB_SRESULTS_COMPARE_STATE0, 0x0); 5598 si_pm4_set_reg(pm4, R_028AC4_DB_SRESULTS_COMPARE_STATE1, 0x0); 5599 si_pm4_set_reg(pm4, R_028AC8_DB_PRELOAD_CONTROL, 0x0); 5600 si_pm4_set_reg(pm4, R_02800C_DB_RENDER_OVERRIDE, 0); 5601 si_pm4_set_reg(pm4, R_028A8C_VGT_PRIMITIVEID_RESET, 0x0); 5602 si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0); 5603 5604 if (sctx->gfx_level < GFX11) { 5605 si_pm4_set_reg(pm4, R_028A5C_VGT_GS_PER_VS, 0x2); 5606 si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0); 5607 } 5608 } 5609 5610 si_pm4_set_reg(pm4, R_028080_TA_BC_BASE_ADDR, border_color_va >> 8); 5611 if (sctx->gfx_level >= GFX7) 5612 si_pm4_set_reg(pm4, R_028084_TA_BC_BASE_ADDR_HI, S_028084_ADDRESS(border_color_va >> 40)); 5613 5614 if (sctx->gfx_level == GFX6) { 5615 si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, 5616 S_008A14_NUM_CLIP_SEQ(3) | S_008A14_CLIP_VTX_REORDER_ENA(1)); 5617 } 5618 5619 if (sctx->gfx_level >= GFX7) { 5620 si_pm4_set_reg(pm4, R_030A00_PA_SU_LINE_STIPPLE_VALUE, 0); 5621 si_pm4_set_reg(pm4, R_030A04_PA_SC_LINE_STIPPLE_STATE, 0); 5622 } else { 5623 si_pm4_set_reg(pm4, R_008A60_PA_SU_LINE_STIPPLE_VALUE, 0); 5624 si_pm4_set_reg(pm4, R_008B10_PA_SC_LINE_STIPPLE_STATE, 0); 5625 } 5626 5627 if (sctx->gfx_level <= GFX7 || !has_clear_state) { 5628 if (sctx->gfx_level < GFX11) { 5629 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 5630 si_pm4_set_reg(pm4, R_028C5C_VGT_OUT_DEALLOC_CNTL, 16); 5631 } 5632 5633 /* CLEAR_STATE doesn't clear these correctly on certain generations. 5634 * I don't know why. Deduced by trial and error. 5635 */ 5636 si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0); 5637 si_pm4_set_reg(pm4, R_028204_PA_SC_WINDOW_SCISSOR_TL, S_028204_WINDOW_OFFSET_DISABLE(1)); 5638 si_pm4_set_reg(pm4, R_028030_PA_SC_SCREEN_SCISSOR_TL, 0); 5639 si_pm4_set_reg(pm4, R_028034_PA_SC_SCREEN_SCISSOR_BR, 5640 S_028034_BR_X(16384) | S_028034_BR_Y(16384)); 5641 } 5642 5643 if (sctx->gfx_level >= GFX10 && sctx->gfx_level < GFX11) { 5644 si_pm4_set_reg(pm4, R_028038_DB_DFSM_CONTROL, 5645 S_028038_PUNCHOUT_MODE(V_028038_FORCE_OFF) | 5646 S_028038_POPS_DRAIN_PS_ON_OVERLAP(1)); 5647 } 5648 5649 unsigned cu_mask_ps = 0xffffffff; 5650 5651 if (sctx->gfx_level >= GFX10_3) 5652 cu_mask_ps = gfx103_get_cu_mask_ps(sscreen); 5653 5654 if (sctx->gfx_level >= GFX7) { 5655 ac_set_reg_cu_en(pm4, R_00B01C_SPI_SHADER_PGM_RSRC3_PS, 5656 S_00B01C_CU_EN(cu_mask_ps) | 5657 S_00B01C_WAVE_LIMIT(0x3F) | 5658 S_00B01C_LDS_GROUP_SIZE(sctx->gfx_level >= GFX11), 5659 C_00B01C_CU_EN, 0, &sscreen->info, 5660 (void*)(sctx->gfx_level >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg)); 5661 } 5662 5663 if (sctx->gfx_level <= GFX8) { 5664 si_set_raster_config(sctx, pm4); 5665 5666 /* FIXME calculate these values somehow ??? */ 5667 si_pm4_set_reg(pm4, R_028A54_VGT_GS_PER_ES, SI_GS_PER_ES); 5668 si_pm4_set_reg(pm4, R_028A58_VGT_ES_PER_GS, 0x40); 5669 5670 /* These registers, when written, also overwrite the CLEAR_STATE 5671 * context, so we can't rely on CLEAR_STATE setting them. 5672 * It would be an issue if there was another UMD changing them. 5673 */ 5674 si_pm4_set_reg(pm4, R_028400_VGT_MAX_VTX_INDX, ~0); 5675 si_pm4_set_reg(pm4, R_028404_VGT_MIN_VTX_INDX, 0); 5676 si_pm4_set_reg(pm4, R_028408_VGT_INDX_OFFSET, 0); 5677 } 5678 5679 if (sscreen->info.gfx_level >= GFX10) { 5680 si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, 5681 S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); 5682 si_pm4_set_reg(pm4, R_00B324_SPI_SHADER_PGM_HI_ES, 5683 S_00B324_MEM_BASE(sscreen->info.address32_hi >> 8)); 5684 } else if (sscreen->info.gfx_level == GFX9) { 5685 si_pm4_set_reg(pm4, R_00B414_SPI_SHADER_PGM_HI_LS, 5686 S_00B414_MEM_BASE(sscreen->info.address32_hi >> 8)); 5687 si_pm4_set_reg(pm4, R_00B214_SPI_SHADER_PGM_HI_ES, 5688 S_00B214_MEM_BASE(sscreen->info.address32_hi >> 8)); 5689 } else { 5690 si_pm4_set_reg(pm4, R_00B524_SPI_SHADER_PGM_HI_LS, 5691 S_00B524_MEM_BASE(sscreen->info.address32_hi >> 8)); 5692 } 5693 5694 if (sctx->gfx_level >= GFX7 && sctx->gfx_level <= GFX8) { 5695 ac_set_reg_cu_en(pm4, R_00B51C_SPI_SHADER_PGM_RSRC3_LS, 5696 S_00B51C_CU_EN(0xffff) | S_00B51C_WAVE_LIMIT(0x3F), 5697 C_00B51C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg); 5698 si_pm4_set_reg(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, S_00B41C_WAVE_LIMIT(0x3F)); 5699 ac_set_reg_cu_en(pm4, R_00B31C_SPI_SHADER_PGM_RSRC3_ES, 5700 S_00B31C_CU_EN(0xffff) | S_00B31C_WAVE_LIMIT(0x3F), 5701 C_00B31C_CU_EN, 0, &sscreen->info, (void*)si_pm4_set_reg); 5702 5703 /* If this is 0, Bonaire can hang even if GS isn't being used. 5704 * Other chips are unaffected. These are suboptimal values, 5705 * but we don't use on-chip GS. 5706 */ 5707 si_pm4_set_reg(pm4, R_028A44_VGT_GS_ONCHIP_CNTL, 5708 S_028A44_ES_VERTS_PER_SUBGRP(64) | S_028A44_GS_PRIMS_PER_SUBGRP(4)); 5709 } 5710 5711 if (sctx->gfx_level >= GFX8) { 5712 unsigned vgt_tess_distribution; 5713 5714 if (sctx->gfx_level >= GFX11) { 5715 /* ACCUM fields changed their meaning. */ 5716 vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(255) | 5717 S_028B50_ACCUM_TRI(255) | 5718 S_028B50_ACCUM_QUAD(255) | 5719 S_028B50_DONUT_SPLIT_GFX9(24) | 5720 S_028B50_TRAP_SPLIT(6); 5721 } else if (sctx->gfx_level >= GFX9) { 5722 vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(12) | 5723 S_028B50_ACCUM_TRI(30) | 5724 S_028B50_ACCUM_QUAD(24) | 5725 S_028B50_DONUT_SPLIT_GFX9(24) | 5726 S_028B50_TRAP_SPLIT(6); 5727 } else if (sctx->gfx_level == GFX8) { 5728 vgt_tess_distribution = S_028B50_ACCUM_ISOLINE(32) | 5729 S_028B50_ACCUM_TRI(11) | 5730 S_028B50_ACCUM_QUAD(11) | 5731 S_028B50_DONUT_SPLIT_GFX81(16); 5732 5733 /* Testing with Unigine Heaven extreme tesselation yielded best results 5734 * with TRAP_SPLIT = 3. 5735 */ 5736 if (sctx->family == CHIP_FIJI || sctx->family >= CHIP_POLARIS10) 5737 vgt_tess_distribution |= S_028B50_TRAP_SPLIT(3); 5738 } 5739 5740 si_pm4_set_reg(pm4, R_028B50_VGT_TESS_DISTRIBUTION, vgt_tess_distribution); 5741 } 5742 5743 if (sscreen->info.gfx_level <= GFX9) { 5744 si_pm4_set_reg(pm4, R_028AA0_VGT_INSTANCE_STEP_RATE_0, 1); 5745 } 5746 5747 if (sctx->gfx_level == GFX9) { 5748 si_pm4_set_reg(pm4, R_030920_VGT_MAX_VTX_INDX, ~0); 5749 si_pm4_set_reg(pm4, R_030924_VGT_MIN_VTX_INDX, 0); 5750 si_pm4_set_reg(pm4, R_030928_VGT_INDX_OFFSET, 0); 5751 5752 si_pm4_set_reg(pm4, R_028060_DB_DFSM_CONTROL, 5753 S_028060_PUNCHOUT_MODE(V_028060_FORCE_OFF) | 5754 S_028060_POPS_DRAIN_PS_ON_OVERLAP(1)); 5755 } 5756 5757 if (sctx->gfx_level >= GFX9) { 5758 ac_set_reg_cu_en(pm4, R_00B41C_SPI_SHADER_PGM_RSRC3_HS, 5759 S_00B41C_CU_EN(0xffff) | S_00B41C_WAVE_LIMIT(0x3F), C_00B41C_CU_EN, 5760 0, &sscreen->info, 5761 (void*)(sctx->gfx_level >= GFX10 ? si_pm4_set_reg_idx3 : si_pm4_set_reg)); 5762 5763 si_pm4_set_reg(pm4, R_028C48_PA_SC_BINNER_CNTL_1, 5764 S_028C48_MAX_ALLOC_COUNT(sscreen->info.pbb_max_alloc_count - 1) | 5765 S_028C48_MAX_PRIM_PER_BATCH(1023)); 5766 si_pm4_set_reg(pm4, R_028C4C_PA_SC_CONSERVATIVE_RASTERIZATION_CNTL, 5767 S_028C4C_NULL_SQUAD_AA_MASK_ENABLE(1)); 5768 5769 si_pm4_set_reg(pm4, R_030968_VGT_INSTANCE_BASE_ID, 0); 5770 5771 if (sctx->gfx_level < GFX11) { 5772 si_pm4_set_reg(pm4, R_0301EC_CP_COHER_START_DELAY, 5773 sctx->gfx_level >= GFX10 ? 0x20 : 0); 5774 } 5775 } 5776 5777 if (sctx->gfx_level >= GFX10) { 5778 si_pm4_set_reg(pm4, R_00B0C8_SPI_SHADER_USER_ACCUM_PS_0, 0); 5779 si_pm4_set_reg(pm4, R_00B0CC_SPI_SHADER_USER_ACCUM_PS_1, 0); 5780 si_pm4_set_reg(pm4, R_00B0D0_SPI_SHADER_USER_ACCUM_PS_2, 0); 5781 si_pm4_set_reg(pm4, R_00B0D4_SPI_SHADER_USER_ACCUM_PS_3, 0); 5782 si_pm4_set_reg(pm4, R_00B2C8_SPI_SHADER_USER_ACCUM_ESGS_0, 0); 5783 si_pm4_set_reg(pm4, R_00B2CC_SPI_SHADER_USER_ACCUM_ESGS_1, 0); 5784 si_pm4_set_reg(pm4, R_00B2D0_SPI_SHADER_USER_ACCUM_ESGS_2, 0); 5785 si_pm4_set_reg(pm4, R_00B2D4_SPI_SHADER_USER_ACCUM_ESGS_3, 0); 5786 si_pm4_set_reg(pm4, R_00B4C8_SPI_SHADER_USER_ACCUM_LSHS_0, 0); 5787 si_pm4_set_reg(pm4, R_00B4CC_SPI_SHADER_USER_ACCUM_LSHS_1, 0); 5788 si_pm4_set_reg(pm4, R_00B4D0_SPI_SHADER_USER_ACCUM_LSHS_2, 0); 5789 si_pm4_set_reg(pm4, R_00B4D4_SPI_SHADER_USER_ACCUM_LSHS_3, 0); 5790 5791 si_pm4_set_reg(pm4, R_00B0C0_SPI_SHADER_REQ_CTRL_PS, 5792 S_00B0C0_SOFT_GROUPING_EN(1) | 5793 S_00B0C0_NUMBER_OF_REQUESTS_PER_CU(4 - 1)); 5794 5795 /* Enable CMASK/HTILE/DCC caching in L2 for small chips. */ 5796 unsigned meta_write_policy, meta_read_policy; 5797 unsigned no_alloc = sctx->gfx_level >= GFX11 ? V_02807C_CACHE_NOA_GFX11: 5798 V_02807C_CACHE_NOA_GFX10; 5799 if (sscreen->info.max_render_backends <= 4) { 5800 meta_write_policy = V_02807C_CACHE_LRU_WR; /* cache writes */ 5801 meta_read_policy = V_02807C_CACHE_LRU_RD; /* cache reads */ 5802 } else { 5803 meta_write_policy = V_02807C_CACHE_STREAM; /* write combine */ 5804 meta_read_policy = no_alloc; /* don't cache reads that miss */ 5805 } 5806 5807 si_pm4_set_reg(pm4, R_02807C_DB_RMI_L2_CACHE_CONTROL, 5808 S_02807C_Z_WR_POLICY(V_02807C_CACHE_STREAM) | 5809 S_02807C_S_WR_POLICY(V_02807C_CACHE_STREAM) | 5810 S_02807C_HTILE_WR_POLICY(meta_write_policy) | 5811 S_02807C_ZPCPSD_WR_POLICY(V_02807C_CACHE_STREAM) | 5812 S_02807C_Z_RD_POLICY(no_alloc) | 5813 S_02807C_S_RD_POLICY(no_alloc) | 5814 S_02807C_HTILE_RD_POLICY(meta_read_policy)); 5815 5816 unsigned gl2_cc; 5817 if (sctx->gfx_level >= GFX11) 5818 gl2_cc = S_028410_DCC_WR_POLICY_GFX11(meta_write_policy) | 5819 S_028410_COLOR_WR_POLICY_GFX11(V_028410_CACHE_STREAM) | 5820 S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX11); 5821 else 5822 gl2_cc = S_028410_CMASK_WR_POLICY(meta_write_policy) | 5823 S_028410_FMASK_WR_POLICY(V_028410_CACHE_STREAM) | 5824 S_028410_DCC_WR_POLICY_GFX10(meta_write_policy) | 5825 S_028410_COLOR_WR_POLICY_GFX10(V_028410_CACHE_STREAM) | 5826 S_028410_CMASK_RD_POLICY(meta_read_policy) | 5827 S_028410_FMASK_RD_POLICY(V_028410_CACHE_NOA_GFX10) | 5828 S_028410_COLOR_RD_POLICY(V_028410_CACHE_NOA_GFX10); 5829 5830 si_pm4_set_reg(pm4, R_028410_CB_RMI_GL2_CACHE_CONTROL, 5831 gl2_cc | 5832 S_028410_DCC_RD_POLICY(meta_read_policy)); 5833 5834 si_pm4_set_reg(pm4, R_028428_CB_COVERAGE_OUT_CONTROL, 0); 5835 si_pm4_set_reg(pm4, R_028A98_VGT_DRAW_PAYLOAD_CNTL, 0); 5836 5837 /* Break up a pixel wave if it contains deallocs for more than 5838 * half the parameter cache. 5839 * 5840 * To avoid a deadlock where pixel waves aren't launched 5841 * because they're waiting for more pixels while the frontend 5842 * is stuck waiting for PC space, the maximum allowed value is 5843 * the size of the PC minus the largest possible allocation for 5844 * a single primitive shader subgroup. 5845 */ 5846 si_pm4_set_reg(pm4, R_028C50_PA_SC_NGG_MODE_CNTL, 5847 S_028C50_MAX_DEALLOCS_IN_WAVE(sctx->gfx_level >= GFX11 ? 16 : 512)); 5848 5849 if (sctx->gfx_level < GFX11) { 5850 /* Reuse for legacy (non-NGG) only. */ 5851 si_pm4_set_reg(pm4, R_028C58_VGT_VERTEX_REUSE_BLOCK_CNTL, 14); 5852 } 5853 5854 if (!has_clear_state) { 5855 si_pm4_set_reg(pm4, R_02835C_PA_SC_TILE_STEERING_OVERRIDE, 5856 sscreen->info.pa_sc_tile_steering_override); 5857 } 5858 5859 5860 si_pm4_set_reg(pm4, R_030964_GE_MAX_VTX_INDX, ~0); 5861 si_pm4_set_reg(pm4, R_030924_GE_MIN_VTX_INDX, 0); 5862 si_pm4_set_reg(pm4, R_030928_GE_INDX_OFFSET, 0); 5863 si_pm4_set_reg(pm4, R_03097C_GE_STEREO_CNTL, 0); 5864 si_pm4_set_reg(pm4, R_030988_GE_USER_VGPR_EN, 0); 5865 } 5866 5867 if (sctx->gfx_level >= GFX10 && sctx->gfx_level <= GFX10_3) { 5868 /* Logical CUs 16 - 31 */ 5869 ac_set_reg_cu_en(pm4, R_00B004_SPI_SHADER_PGM_RSRC4_PS, S_00B004_CU_EN(cu_mask_ps >> 16), 5870 C_00B004_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3); 5871 ac_set_reg_cu_en(pm4, R_00B104_SPI_SHADER_PGM_RSRC4_VS, S_00B104_CU_EN(0xffff), 5872 C_00B104_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3); 5873 ac_set_reg_cu_en(pm4, R_00B404_SPI_SHADER_PGM_RSRC4_HS, S_00B404_CU_EN(0xffff), 5874 C_00B404_CU_EN, 16, &sscreen->info, (void*)si_pm4_set_reg_idx3); 5875 5876 si_pm4_set_reg(pm4, R_00B1C0_SPI_SHADER_REQ_CTRL_VS, 0); 5877 si_pm4_set_reg(pm4, R_00B1C8_SPI_SHADER_USER_ACCUM_VS_0, 0); 5878 si_pm4_set_reg(pm4, R_00B1CC_SPI_SHADER_USER_ACCUM_VS_1, 0); 5879 si_pm4_set_reg(pm4, R_00B1D0_SPI_SHADER_USER_ACCUM_VS_2, 0); 5880 si_pm4_set_reg(pm4, R_00B1D4_SPI_SHADER_USER_ACCUM_VS_3, 0); 5881 } 5882 5883 if (sctx->gfx_level >= GFX10_3) { 5884 si_pm4_set_reg(pm4, R_028750_SX_PS_DOWNCONVERT_CONTROL, 0xff); 5885 /* The rate combiners have no effect if they are disabled like this: 5886 * VERTEX_RATE: BYPASS_VTX_RATE_COMBINER = 1 5887 * PRIMITIVE_RATE: BYPASS_PRIM_RATE_COMBINER = 1 5888 * HTILE_RATE: VRS_HTILE_ENCODING = 0 5889 * SAMPLE_ITER: PS_ITER_SAMPLE = 0 5890 * 5891 * Use OVERRIDE, which will ignore results from previous combiners. 5892 * (e.g. enabled sample shading overrides the vertex rate) 5893 */ 5894 si_pm4_set_reg(pm4, R_028848_PA_CL_VRS_CNTL, 5895 S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE) | 5896 S_028848_SAMPLE_ITER_COMBINER_MODE(V_028848_VRS_COMB_MODE_OVERRIDE)); 5897 } 5898 5899 if (sctx->gfx_level >= GFX11) { 5900 si_pm4_set_reg(pm4, R_028C54_PA_SC_BINNER_CNTL_2, 0); 5901 si_pm4_set_reg(pm4, R_028620_PA_RATE_CNTL, 5902 S_028620_VERTEX_RATE(2) | S_028620_PRIM_RATE(1)); 5903 5904 /* We must wait for idle using an EOP event before changing the attribute ring registers. 5905 * Use the bottom-of-pipe EOP event, but increment the PWS counter instead of writing memory. 5906 */ 5907 si_pm4_cmd_add(pm4, PKT3(PKT3_RELEASE_MEM, 6, 0)); 5908 si_pm4_cmd_add(pm4, S_490_EVENT_TYPE(V_028A90_BOTTOM_OF_PIPE_TS) | 5909 S_490_EVENT_INDEX(5) | 5910 S_490_PWS_ENABLE(1)); 5911 si_pm4_cmd_add(pm4, 0); /* DST_SEL, INT_SEL, DATA_SEL */ 5912 si_pm4_cmd_add(pm4, 0); /* ADDRESS_LO */ 5913 si_pm4_cmd_add(pm4, 0); /* ADDRESS_HI */ 5914 si_pm4_cmd_add(pm4, 0); /* DATA_LO */ 5915 si_pm4_cmd_add(pm4, 0); /* DATA_HI */ 5916 si_pm4_cmd_add(pm4, 0); /* INT_CTXID */ 5917 5918 /* Wait for the PWS counter. */ 5919 si_pm4_cmd_add(pm4, PKT3(PKT3_ACQUIRE_MEM, 6, 0)); 5920 si_pm4_cmd_add(pm4, S_580_PWS_STAGE_SEL(V_580_CP_ME) | 5921 S_580_PWS_COUNTER_SEL(V_580_TS_SELECT) | 5922 S_580_PWS_ENA2(1) | 5923 S_580_PWS_COUNT(0)); 5924 si_pm4_cmd_add(pm4, 0xffffffff); /* GCR_SIZE */ 5925 si_pm4_cmd_add(pm4, 0x01ffffff); /* GCR_SIZE_HI */ 5926 si_pm4_cmd_add(pm4, 0); /* GCR_BASE_LO */ 5927 si_pm4_cmd_add(pm4, 0); /* GCR_BASE_HI */ 5928 si_pm4_cmd_add(pm4, S_585_PWS_ENA(1)); 5929 si_pm4_cmd_add(pm4, 0); /* GCR_CNTL */ 5930 5931 si_pm4_set_reg(pm4, R_031110_SPI_GS_THROTTLE_CNTL1, 0x12355123); 5932 si_pm4_set_reg(pm4, R_031114_SPI_GS_THROTTLE_CNTL2, 0x1544D); 5933 5934 assert((sscreen->attribute_ring->gpu_address >> 32) == sscreen->info.address32_hi); 5935 5936 /* The PS will read inputs from this address. */ 5937 si_pm4_set_reg(pm4, R_031118_SPI_ATTRIBUTE_RING_BASE, 5938 sscreen->attribute_ring->gpu_address >> 16); 5939 si_pm4_set_reg(pm4, R_03111C_SPI_ATTRIBUTE_RING_SIZE, 5940 S_03111C_MEM_SIZE(((sscreen->attribute_ring->bo_size / 5941 sscreen->info.max_se) >> 16) - 1) | 5942 S_03111C_BIG_PAGE(sscreen->info.discardable_allows_big_page) | 5943 S_03111C_L1_POLICY(1)); 5944 } 5945 5946 sctx->cs_preamble_state = pm4; 5947 5948 /* Make a copy of the preamble for TMZ. */ 5949 sctx->cs_preamble_state_tmz = (struct si_pm4_state *)CALLOC_STRUCT(si_cs_preamble); 5950 memcpy(sctx->cs_preamble_state_tmz, sctx->cs_preamble_state, sizeof(struct si_cs_preamble)); 5951} 5952