1/* 2 * Copyright 2017 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "si_pipe.h" 26#include "sid.h" 27#include "util/format/u_format.h" 28#include "util/u_pack_color.h" 29#include "util/u_surface.h" 30 31enum 32{ 33 SI_CLEAR = SI_SAVE_FRAGMENT_STATE, 34 SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE, 35}; 36 37void si_init_buffer_clear(struct si_clear_info *info, 38 struct pipe_resource *resource, uint64_t offset, 39 uint32_t size, uint32_t clear_value) 40{ 41 info->resource = resource; 42 info->offset = offset; 43 info->size = size; 44 info->clear_value = clear_value; 45 info->writemask = 0xffffffff; 46 info->is_dcc_msaa = false; 47} 48 49static void si_init_buffer_clear_rmw(struct si_clear_info *info, 50 struct pipe_resource *resource, uint64_t offset, 51 uint32_t size, uint32_t clear_value, uint32_t writemask) 52{ 53 si_init_buffer_clear(info, resource, offset, size, clear_value); 54 info->writemask = writemask; 55} 56 57void si_execute_clears(struct si_context *sctx, struct si_clear_info *info, 58 unsigned num_clears, unsigned types) 59{ 60 if (!num_clears) 61 return; 62 63 /* Flush caches and wait for idle. */ 64 if (types & (SI_CLEAR_TYPE_CMASK | SI_CLEAR_TYPE_DCC)) 65 sctx->flags |= si_get_flush_flags(sctx, SI_COHERENCY_CB_META, L2_LRU); 66 67 if (types & SI_CLEAR_TYPE_HTILE) 68 sctx->flags |= si_get_flush_flags(sctx, SI_COHERENCY_DB_META, L2_LRU); 69 70 /* Flush caches in case we use compute. */ 71 sctx->flags |= SI_CONTEXT_INV_VCACHE; 72 73 /* GFX6-8: CB and DB don't use L2. */ 74 if (sctx->gfx_level <= GFX8) 75 sctx->flags |= SI_CONTEXT_INV_L2; 76 77 /* Execute clears. */ 78 for (unsigned i = 0; i < num_clears; i++) { 79 if (info[i].is_dcc_msaa) { 80 gfx9_clear_dcc_msaa(sctx, info[i].resource, info[i].clear_value, 81 SI_OP_SKIP_CACHE_INV_BEFORE, SI_COHERENCY_CP); 82 continue; 83 } 84 85 assert(info[i].size > 0); 86 87 if (info[i].writemask != 0xffffffff) { 88 si_compute_clear_buffer_rmw(sctx, info[i].resource, info[i].offset, info[i].size, 89 info[i].clear_value, info[i].writemask, 90 SI_OP_SKIP_CACHE_INV_BEFORE, SI_COHERENCY_CP); 91 } else { 92 /* Compute shaders are much faster on both dGPUs and APUs. Don't use CP DMA. */ 93 si_clear_buffer(sctx, info[i].resource, info[i].offset, info[i].size, 94 &info[i].clear_value, 4, SI_OP_SKIP_CACHE_INV_BEFORE, 95 SI_COHERENCY_CP, SI_COMPUTE_CLEAR_METHOD); 96 } 97 } 98 99 /* Wait for idle. */ 100 sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH; 101 102 /* GFX6-8: CB and DB don't use L2. */ 103 if (sctx->gfx_level <= GFX8) 104 sctx->flags |= SI_CONTEXT_WB_L2; 105} 106 107static bool si_alloc_separate_cmask(struct si_screen *sscreen, struct si_texture *tex) 108{ 109 assert(sscreen->info.gfx_level < GFX11); 110 111 /* CMASK for MSAA is allocated in advance or always disabled 112 * by "nofmask" option. 113 */ 114 if (tex->cmask_buffer) 115 return true; 116 117 if (!tex->surface.cmask_size) 118 return false; 119 120 tex->cmask_buffer = 121 si_aligned_buffer_create(&sscreen->b, PIPE_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT, 122 tex->surface.cmask_size, 1 << tex->surface.cmask_alignment_log2); 123 if (tex->cmask_buffer == NULL) 124 return false; 125 126 tex->cmask_base_address_reg = tex->cmask_buffer->gpu_address >> 8; 127 tex->cb_color_info |= S_028C70_FAST_CLEAR(1); 128 129 p_atomic_inc(&sscreen->compressed_colortex_counter); 130 return true; 131} 132 133static bool si_set_clear_color(struct si_texture *tex, enum pipe_format surface_format, 134 const union pipe_color_union *color) 135{ 136 union util_color uc; 137 138 memset(&uc, 0, sizeof(uc)); 139 140 if (tex->surface.bpe == 16) { 141 /* DCC fast clear only: 142 * CLEAR_WORD0 = R = G = B 143 * CLEAR_WORD1 = A 144 */ 145 assert(color->ui[0] == color->ui[1] && color->ui[0] == color->ui[2]); 146 uc.ui[0] = color->ui[0]; 147 uc.ui[1] = color->ui[3]; 148 } else { 149 if (tex->swap_rgb_to_bgr) 150 surface_format = util_format_rgb_to_bgr(surface_format); 151 152 util_pack_color_union(surface_format, &uc, color); 153 } 154 155 if (memcmp(tex->color_clear_value, &uc, 2 * sizeof(uint32_t)) == 0) 156 return false; 157 158 memcpy(tex->color_clear_value, &uc, 2 * sizeof(uint32_t)); 159 return true; 160} 161 162/** Linearize and convert luminance/intensity to red. */ 163enum pipe_format si_simplify_cb_format(enum pipe_format format) 164{ 165 format = util_format_linear(format); 166 format = util_format_luminance_to_red(format); 167 return util_format_intensity_to_red(format); 168} 169 170bool vi_alpha_is_on_msb(struct si_screen *sscreen, enum pipe_format format) 171{ 172 format = si_simplify_cb_format(format); 173 const struct util_format_description *desc = util_format_description(format); 174 unsigned comp_swap = si_translate_colorswap(sscreen->info.gfx_level, format, false); 175 176 /* The following code matches the hw behavior. */ 177 if (desc->nr_channels == 1) { 178 return (comp_swap == V_028C70_SWAP_ALT_REV) != (sscreen->info.family == CHIP_RAVEN2 || 179 sscreen->info.family == CHIP_RENOIR); 180 } 181 182 return comp_swap != V_028C70_SWAP_STD_REV && comp_swap != V_028C70_SWAP_ALT_REV; 183} 184 185static bool gfx8_get_dcc_clear_parameters(struct si_screen *sscreen, enum pipe_format base_format, 186 enum pipe_format surface_format, 187 const union pipe_color_union *color, uint32_t *clear_value, 188 bool *eliminate_needed) 189{ 190 /* If we want to clear without needing a fast clear eliminate step, we 191 * can set color and alpha independently to 0 or 1 (or 0/max for integer 192 * formats). 193 */ 194 bool values[4] = {}; /* whether to clear to 0 or 1 */ 195 bool color_value = false; /* clear color to 0 or 1 */ 196 bool alpha_value = false; /* clear alpha to 0 or 1 */ 197 int alpha_channel; /* index of the alpha component */ 198 bool has_color = false; 199 bool has_alpha = false; 200 201 const struct util_format_description *desc = 202 util_format_description(si_simplify_cb_format(surface_format)); 203 204 /* 128-bit fast clear with different R,G,B values is unsupported. */ 205 if (desc->block.bits == 128 && (color->ui[0] != color->ui[1] || color->ui[0] != color->ui[2])) 206 return false; 207 208 *eliminate_needed = true; 209 *clear_value = GFX8_DCC_CLEAR_REG; 210 211 if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN) 212 return true; /* need ELIMINATE_FAST_CLEAR */ 213 214 bool base_alpha_is_on_msb = vi_alpha_is_on_msb(sscreen, base_format); 215 bool surf_alpha_is_on_msb = vi_alpha_is_on_msb(sscreen, surface_format); 216 217 /* Formats with 3 channels can't have alpha. */ 218 if (desc->nr_channels == 3) 219 alpha_channel = -1; 220 else if (surf_alpha_is_on_msb) 221 alpha_channel = desc->nr_channels - 1; 222 else 223 alpha_channel = 0; 224 225 for (int i = 0; i < 4; ++i) { 226 if (desc->swizzle[i] >= PIPE_SWIZZLE_0) 227 continue; 228 229 if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) { 230 /* Use the maximum value for clamping the clear color. */ 231 int max = u_bit_consecutive(0, desc->channel[i].size - 1); 232 233 values[i] = color->i[i] != 0; 234 if (color->i[i] != 0 && MIN2(color->i[i], max) != max) 235 return true; /* need ELIMINATE_FAST_CLEAR */ 236 } else if (desc->channel[i].pure_integer && 237 desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) { 238 /* Use the maximum value for clamping the clear color. */ 239 unsigned max = u_bit_consecutive(0, desc->channel[i].size); 240 241 values[i] = color->ui[i] != 0U; 242 if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max) 243 return true; /* need ELIMINATE_FAST_CLEAR */ 244 } else { 245 values[i] = color->f[i] != 0.0F; 246 if (color->f[i] != 0.0F && color->f[i] != 1.0F) 247 return true; /* need ELIMINATE_FAST_CLEAR */ 248 } 249 250 if (desc->swizzle[i] == alpha_channel) { 251 alpha_value = values[i]; 252 has_alpha = true; 253 } else { 254 color_value = values[i]; 255 has_color = true; 256 } 257 } 258 259 /* If alpha isn't present, make it the same as color, and vice versa. */ 260 if (!has_alpha) 261 alpha_value = color_value; 262 else if (!has_color) 263 color_value = alpha_value; 264 265 if (color_value != alpha_value && base_alpha_is_on_msb != surf_alpha_is_on_msb) 266 return true; /* require ELIMINATE_FAST_CLEAR */ 267 268 /* Check if all color values are equal if they are present. */ 269 for (int i = 0; i < 4; ++i) { 270 if (desc->swizzle[i] <= PIPE_SWIZZLE_W && desc->swizzle[i] != alpha_channel && 271 values[i] != color_value) 272 return true; /* require ELIMINATE_FAST_CLEAR */ 273 } 274 275 /* This doesn't need ELIMINATE_FAST_CLEAR. 276 * On chips predating Raven2, the DCC clear codes and the CB clear 277 * color registers must match. 278 */ 279 *eliminate_needed = false; 280 281 if (color_value) { 282 if (alpha_value) 283 *clear_value = GFX8_DCC_CLEAR_1111; 284 else 285 *clear_value = GFX8_DCC_CLEAR_1110; 286 } else { 287 if (alpha_value) 288 *clear_value = GFX8_DCC_CLEAR_0001; 289 else 290 *clear_value = GFX8_DCC_CLEAR_0000; 291 } 292 return true; 293} 294 295static bool gfx11_get_dcc_clear_parameters(struct si_screen *sscreen, enum pipe_format surface_format, 296 const union pipe_color_union *color, uint32_t *clear_value) 297{ 298 const struct util_format_description *desc = 299 util_format_description(si_simplify_cb_format(surface_format)); 300 unsigned start_bit = UINT_MAX; 301 unsigned end_bit = 0; 302 303 /* TODO: 8bpp and 16bpp fast DCC clears don't work. */ 304 if (desc->block.bits <= 16) 305 return false; 306 307 /* Find the used bit range. */ 308 for (unsigned i = 0; i < 4; i++) { 309 unsigned swizzle = desc->swizzle[i]; 310 311 if (swizzle >= PIPE_SWIZZLE_0) 312 continue; 313 314 start_bit = MIN2(start_bit, desc->channel[swizzle].shift); 315 end_bit = MAX2(end_bit, desc->channel[swizzle].shift + desc->channel[swizzle].size); 316 } 317 318 union { 319 uint8_t ub[16]; 320 uint16_t us[8]; 321 uint32_t ui[4]; 322 } value = {}; 323 util_pack_color_union(surface_format, (union util_color*)&value, color); 324 325 /* Check the cases where all components or bits are either all 0 or all 1. */ 326 bool all_bits_are_0 = true; 327 bool all_bits_are_1 = true; 328 bool all_words_are_fp16_1 = false; 329 bool all_words_are_fp32_1 = false; 330 331 for (unsigned i = start_bit; i < end_bit; i++) { 332 bool bit = value.ub[i / 8] & BITFIELD_BIT(i % 8); 333 334 all_bits_are_0 &= !bit; 335 all_bits_are_1 &= bit; 336 } 337 338 if (start_bit % 16 == 0 && end_bit % 16 == 0) { 339 all_words_are_fp16_1 = true; 340 for (unsigned i = start_bit / 16; i < end_bit / 16; i++) 341 all_words_are_fp16_1 &= value.us[i] == 0x3c00; 342 } 343 344 if (start_bit % 32 == 0 && end_bit % 32 == 0) { 345 all_words_are_fp32_1 = true; 346 for (unsigned i = start_bit / 32; i < end_bit / 32; i++) 347 all_words_are_fp32_1 &= value.ui[i] == 0x3f800000; 348 } 349 350#if 0 /* debug code */ 351 int i = util_format_get_first_non_void_channel(surface_format); 352 if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED && desc->channel[i].pure_integer) { 353 printf("%i %i %i %i\n", color->i[0], color->i[1], color->i[2], color->i[3]); 354 } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED && desc->channel[i].pure_integer) { 355 printf("%u %u %u %u\n", color->ui[0], color->ui[1], color->ui[2], color->ui[3]); 356 } else { 357 printf("%f %f %f %f\n", color->f[0], color->f[1], color->f[2], color->f[3]); 358 } 359 for (unsigned i = 0; i < end_bit / 8; i++) 360 printf("%02x", value.ub[i]); 361 printf("\n"); 362 printf("bits=[%u..%u)%s%s%s%s\n", start_bit, end_bit, 363 all_bits_are_0 ? ", all 0" : "", 364 all_bits_are_1 ? ", all 1" : "", 365 all_words_are_fp16_1 ? ", all fp16 1" : "", 366 all_words_are_fp32_1 ? ", all fp32 1" : ""); 367#endif 368 369 *clear_value = 0; 370 371 if (all_bits_are_0 || all_bits_are_1 || all_words_are_fp16_1 || all_words_are_fp32_1) { 372 if (all_bits_are_0) 373 *clear_value = GFX11_DCC_CLEAR_0000; 374 else if (all_bits_are_1) 375 *clear_value = GFX11_DCC_CLEAR_1111_UNORM; 376 else if (all_words_are_fp16_1) 377 *clear_value = GFX11_DCC_CLEAR_1111_FP16; 378 else if (all_words_are_fp32_1) 379 *clear_value = GFX11_DCC_CLEAR_1111_FP32; 380 381 return true; 382 } 383 384 /* Check 0001 and 1110 cases. */ 385 if (vi_alpha_is_on_msb(sscreen, surface_format)) { 386 if (desc->nr_channels == 2 && desc->channel[0].size == 8) { 387 if (value.ub[0] == 0x00 && value.ub[1] == 0xff) { 388 *clear_value = GFX11_DCC_CLEAR_0001_UNORM; 389 return true; 390 } else if (value.ub[0] == 0xff && value.ub[1] == 0x00) { 391 *clear_value = GFX11_DCC_CLEAR_1110_UNORM; 392 return true; 393 } 394 } else if (desc->nr_channels == 4 && desc->channel[0].size == 8) { 395 if (value.ub[0] == 0x00 && value.ub[1] == 0x00 && 396 value.ub[2] == 0x00 && value.ub[3] == 0xff) { 397 *clear_value = GFX11_DCC_CLEAR_0001_UNORM; 398 return true; 399 } else if (value.ub[0] == 0xff && value.ub[1] == 0xff && 400 value.ub[2] == 0xff && value.ub[3] == 0x00) { 401 *clear_value = GFX11_DCC_CLEAR_1110_UNORM; 402 return true; 403 } 404 } else if (desc->nr_channels == 4 && desc->channel[0].size == 16) { 405 if (value.us[0] == 0x0000 && value.us[1] == 0x0000 && 406 value.us[2] == 0x0000 && value.us[3] == 0xffff) { 407 *clear_value = GFX11_DCC_CLEAR_0001_UNORM; 408 return true; 409 } else if (value.us[0] == 0xffff && value.us[1] == 0xffff && 410 value.us[2] == 0xffff && value.us[3] == 0x0000) { 411 *clear_value = GFX11_DCC_CLEAR_1110_UNORM; 412 return true; 413 } 414 } 415 } 416 417 return false; 418} 419 420bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsigned level, 421 unsigned clear_value, struct si_clear_info *out) 422{ 423 struct pipe_resource *dcc_buffer = &tex->buffer.b.b; 424 uint64_t dcc_offset = tex->surface.meta_offset; 425 uint32_t clear_size; 426 427 assert(vi_dcc_enabled(tex, level)); 428 429 if (sctx->gfx_level >= GFX10) { 430 /* 4x and 8x MSAA needs a sophisticated compute shader for 431 * the clear. GFX11 doesn't need that. 432 */ 433 if (sctx->gfx_level < GFX11 && tex->buffer.b.b.nr_storage_samples >= 4) 434 return false; 435 436 unsigned num_layers = util_num_layers(&tex->buffer.b.b, level); 437 438 if (num_layers == 1) { 439 /* Clear a specific level. */ 440 dcc_offset += tex->surface.u.gfx9.meta_levels[level].offset; 441 clear_size = tex->surface.u.gfx9.meta_levels[level].size; 442 } else if (tex->buffer.b.b.last_level == 0) { 443 /* Clear all layers having only 1 level. */ 444 clear_size = tex->surface.meta_size; 445 } else { 446 /* Clearing DCC with both multiple levels and multiple layers is not 447 * implemented. 448 */ 449 return false; 450 } 451 } else if (sctx->gfx_level == GFX9) { 452 /* TODO: Implement DCC fast clear for level 0 of mipmapped textures. Mipmapped 453 * DCC has to clear a rectangular area of DCC for level 0 (because the whole miptree 454 * is organized in a 2D plane). 455 */ 456 if (tex->buffer.b.b.last_level > 0) 457 return false; 458 459 /* 4x and 8x MSAA need to clear only sample 0 and 1 in a compute shader and leave other 460 * samples untouched. (only the first 2 samples are compressed) */ 461 if (tex->buffer.b.b.nr_storage_samples >= 4) { 462 si_init_buffer_clear(out, dcc_buffer, 0, 0, clear_value); 463 out->is_dcc_msaa = true; 464 return true; 465 } 466 467 clear_size = tex->surface.meta_size; 468 } else { 469 unsigned num_layers = util_num_layers(&tex->buffer.b.b, level); 470 471 /* If this is 0, fast clear isn't possible. (can occur with MSAA) */ 472 if (!tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size) 473 return false; 474 475 /* Layered 4x and 8x MSAA DCC fast clears need to clear 476 * dcc_fast_clear_size bytes for each layer. A compute shader 477 * would be more efficient than separate per-layer clear operations. 478 */ 479 if (tex->buffer.b.b.nr_storage_samples >= 4 && num_layers > 1) 480 return false; 481 482 dcc_offset += tex->surface.u.legacy.color.dcc_level[level].dcc_offset; 483 clear_size = tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size; 484 } 485 486 si_init_buffer_clear(out, dcc_buffer, dcc_offset, clear_size, clear_value); 487 return true; 488} 489 490/* Set the same micro tile mode as the destination of the last MSAA resolve. 491 * This allows hitting the MSAA resolve fast path, which requires that both 492 * src and dst micro tile modes match. 493 */ 494static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_texture *tex) 495{ 496 if (sscreen->info.gfx_level >= GFX10 || tex->buffer.b.is_shared || 497 tex->buffer.b.b.nr_samples <= 1 || 498 tex->surface.micro_tile_mode == tex->last_msaa_resolve_target_micro_mode) 499 return; 500 501 assert(sscreen->info.gfx_level >= GFX9 || 502 tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D); 503 assert(tex->buffer.b.b.last_level == 0); 504 505 if (sscreen->info.gfx_level >= GFX9) { 506 /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */ 507 assert(tex->surface.u.gfx9.swizzle_mode >= 4); 508 509 /* If you do swizzle_mode % 4, you'll get: 510 * 0 = Depth 511 * 1 = Standard, 512 * 2 = Displayable 513 * 3 = Rotated 514 * 515 * Depth-sample order isn't allowed: 516 */ 517 assert(tex->surface.u.gfx9.swizzle_mode % 4 != 0); 518 519 switch (tex->last_msaa_resolve_target_micro_mode) { 520 case RADEON_MICRO_MODE_DISPLAY: 521 tex->surface.u.gfx9.swizzle_mode &= ~0x3; 522 tex->surface.u.gfx9.swizzle_mode += 2; /* D */ 523 break; 524 case RADEON_MICRO_MODE_STANDARD: 525 tex->surface.u.gfx9.swizzle_mode &= ~0x3; 526 tex->surface.u.gfx9.swizzle_mode += 1; /* S */ 527 break; 528 case RADEON_MICRO_MODE_RENDER: 529 tex->surface.u.gfx9.swizzle_mode &= ~0x3; 530 tex->surface.u.gfx9.swizzle_mode += 3; /* R */ 531 break; 532 default: /* depth */ 533 assert(!"unexpected micro mode"); 534 return; 535 } 536 } else if (sscreen->info.gfx_level >= GFX7) { 537 /* These magic numbers were copied from addrlib. It doesn't use 538 * any definitions for them either. They are all 2D_TILED_THIN1 539 * modes with different bpp and micro tile mode. 540 */ 541 switch (tex->last_msaa_resolve_target_micro_mode) { 542 case RADEON_MICRO_MODE_DISPLAY: 543 tex->surface.u.legacy.tiling_index[0] = 10; 544 break; 545 case RADEON_MICRO_MODE_STANDARD: 546 tex->surface.u.legacy.tiling_index[0] = 14; 547 break; 548 case RADEON_MICRO_MODE_RENDER: 549 tex->surface.u.legacy.tiling_index[0] = 28; 550 break; 551 default: /* depth, thick */ 552 assert(!"unexpected micro mode"); 553 return; 554 } 555 } else { /* GFX6 */ 556 switch (tex->last_msaa_resolve_target_micro_mode) { 557 case RADEON_MICRO_MODE_DISPLAY: 558 switch (tex->surface.bpe) { 559 case 1: 560 tex->surface.u.legacy.tiling_index[0] = 10; 561 break; 562 case 2: 563 tex->surface.u.legacy.tiling_index[0] = 11; 564 break; 565 default: /* 4, 8 */ 566 tex->surface.u.legacy.tiling_index[0] = 12; 567 break; 568 } 569 break; 570 case RADEON_MICRO_MODE_STANDARD: 571 switch (tex->surface.bpe) { 572 case 1: 573 tex->surface.u.legacy.tiling_index[0] = 14; 574 break; 575 case 2: 576 tex->surface.u.legacy.tiling_index[0] = 15; 577 break; 578 case 4: 579 tex->surface.u.legacy.tiling_index[0] = 16; 580 break; 581 default: /* 8, 16 */ 582 tex->surface.u.legacy.tiling_index[0] = 17; 583 break; 584 } 585 break; 586 default: /* depth, thick */ 587 assert(!"unexpected micro mode"); 588 return; 589 } 590 } 591 592 tex->surface.micro_tile_mode = tex->last_msaa_resolve_target_micro_mode; 593 594 p_atomic_inc(&sscreen->dirty_tex_counter); 595} 596 597static uint32_t si_get_htile_clear_value(struct si_texture *tex, float depth) 598{ 599 /* Maximum 14-bit UINT value. */ 600 const uint32_t max_z_value = 0x3FFF; 601 602 /* For clears, Zmask and Smem will always be set to zero. */ 603 const uint32_t zmask = 0; 604 const uint32_t smem = 0; 605 606 /* Convert depthValue to 14-bit zmin/zmax uint values. */ 607 const uint32_t zmin = lroundf(depth * max_z_value); 608 const uint32_t zmax = zmin; 609 610 if (tex->htile_stencil_disabled) { 611 /* Z-only HTILE is laid out as follows: 612 * |31 18|17 4|3 0| 613 * +---------+---------+-------+ 614 * | Max Z | Min Z | ZMask | 615 */ 616 return ((zmax & 0x3FFF) << 18) | 617 ((zmin & 0x3FFF) << 4) | 618 ((zmask & 0xF) << 0); 619 } else { 620 /* Z+S HTILE is laid out as-follows: 621 * |31 12|11 10|9 8|7 6|5 4|3 0| 622 * +-----------+-----+------+-----+-----+-------+ 623 * | Z Range | | SMem | SR1 | SR0 | ZMask | 624 * 625 * The base value for zRange is either zMax or zMin, depending on ZRANGE_PRECISION. 626 * For a fast clear, zMin == zMax == clearValue. This means that the base will 627 * always be the clear value (converted to 14-bit UINT). 628 * 629 * When abs(zMax-zMin) < 16, the delta is equal to the difference. In the case of 630 * fast clears, where zMax == zMin, the delta is always zero. 631 */ 632 const uint32_t delta = 0; 633 const uint32_t zrange = (zmax << 6) | delta; 634 635 /* SResults 0 & 1 are set based on the stencil compare state. 636 * For fast-clear, the default value of sr0 and sr1 are both 0x3. 637 */ 638 const uint32_t sresults = 0xf; 639 640 return ((zrange & 0xFFFFF) << 12) | 641 ((smem & 0x3) << 8) | 642 ((sresults & 0xF) << 4) | 643 ((zmask & 0xF) << 0); 644 } 645} 646 647static bool si_can_fast_clear_depth(struct si_texture *zstex, unsigned level, float depth, 648 unsigned buffers) 649{ 650 /* TC-compatible HTILE only supports depth clears to 0 or 1. */ 651 return buffers & PIPE_CLEAR_DEPTH && 652 si_htile_enabled(zstex, level, PIPE_MASK_Z) && 653 (!zstex->tc_compatible_htile || depth == 0 || depth == 1); 654} 655 656static bool si_can_fast_clear_stencil(struct si_texture *zstex, unsigned level, uint8_t stencil, 657 unsigned buffers) 658{ 659 /* TC-compatible HTILE only supports stencil clears to 0. */ 660 return buffers & PIPE_CLEAR_STENCIL && 661 si_htile_enabled(zstex, level, PIPE_MASK_S) && 662 (!zstex->tc_compatible_htile || stencil == 0); 663} 664 665static void si_fast_clear(struct si_context *sctx, unsigned *buffers, 666 const union pipe_color_union *color, float depth, uint8_t stencil) 667{ 668 struct pipe_framebuffer_state *fb = &sctx->framebuffer.state; 669 struct si_clear_info info[8 * 2 + 1]; /* MRTs * (CMASK + DCC) + ZS */ 670 unsigned num_clears = 0; 671 unsigned clear_types = 0; 672 unsigned num_pixels = fb->width * fb->height; 673 674 /* This function is broken in BE, so just disable this path for now */ 675#if UTIL_ARCH_BIG_ENDIAN 676 return; 677#endif 678 679 if (sctx->render_cond) 680 return; 681 682 /* Gather information about what to clear. */ 683 unsigned color_buffer_mask = (*buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0); 684 while (color_buffer_mask) { 685 unsigned i = u_bit_scan(&color_buffer_mask); 686 687 struct si_texture *tex = (struct si_texture *)fb->cbufs[i]->texture; 688 unsigned level = fb->cbufs[i]->u.tex.level; 689 unsigned num_layers = util_num_layers(&tex->buffer.b.b, level); 690 691 /* the clear is allowed if all layers are bound */ 692 if (fb->cbufs[i]->u.tex.first_layer != 0 || 693 fb->cbufs[i]->u.tex.last_layer != num_layers - 1) { 694 continue; 695 } 696 697 /* We can change the micro tile mode before a full clear. */ 698 /* This is only used for MSAA textures when clearing all layers. */ 699 si_set_optimal_micro_tile_mode(sctx->screen, tex); 700 701 if (tex->swap_rgb_to_bgr_on_next_clear) { 702 assert(!tex->swap_rgb_to_bgr); 703 assert(tex->buffer.b.b.nr_samples >= 2); 704 tex->swap_rgb_to_bgr = true; 705 tex->swap_rgb_to_bgr_on_next_clear = false; 706 707 /* Update all sampler views and images. */ 708 p_atomic_inc(&sctx->screen->dirty_tex_counter); 709 } 710 711 /* only supported on tiled surfaces */ 712 if (tex->surface.is_linear) { 713 continue; 714 } 715 716 /* Use a slow clear for small surfaces where the cost of 717 * the eliminate pass can be higher than the benefit of fast 718 * clear. The closed driver does this, but the numbers may differ. 719 * 720 * This helps on both dGPUs and APUs, even small APUs like Mullins. 721 */ 722 bool fb_too_small = num_pixels * num_layers <= 512 * 512; 723 bool too_small = tex->buffer.b.b.nr_samples <= 1 && fb_too_small; 724 bool eliminate_needed = false; 725 bool fmask_decompress_needed = false; 726 727 /* Try to clear DCC first, otherwise try CMASK. */ 728 if (vi_dcc_enabled(tex, level)) { 729 uint32_t reset_value; 730 731 if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR)) 732 continue; 733 734 if (sctx->gfx_level >= GFX11) { 735 if (!gfx11_get_dcc_clear_parameters(sctx->screen, fb->cbufs[i]->format, color, 736 &reset_value)) 737 continue; 738 } else { 739 if (!gfx8_get_dcc_clear_parameters(sctx->screen, tex->buffer.b.b.format, 740 fb->cbufs[i]->format, color, &reset_value, 741 &eliminate_needed)) 742 continue; 743 } 744 745 /* Shared textures can't use fast clear without an explicit flush 746 * because the clear color is not exported. 747 * 748 * Chips without DCC constant encoding must set the clear color registers 749 * correctly even if the fast clear eliminate pass is not needed. 750 */ 751 if ((eliminate_needed || !sctx->screen->info.has_dcc_constant_encode) && 752 tex->buffer.b.is_shared && 753 !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) 754 continue; 755 756 if (eliminate_needed && too_small) 757 continue; 758 759 /* We can clear any level, but we only set up the clear value registers for the first 760 * level. Therefore, all other levels can be cleared only if the clear value registers 761 * are not used, which is only the case with DCC constant encoding and 0/1 clear values. 762 */ 763 if (level > 0 && (eliminate_needed || !sctx->screen->info.has_dcc_constant_encode)) 764 continue; 765 766 if (tex->buffer.b.b.nr_samples >= 2 && eliminate_needed && 767 !sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)]) 768 continue; 769 770 assert(num_clears < ARRAY_SIZE(info)); 771 772 if (!vi_dcc_get_clear_info(sctx, tex, level, reset_value, &info[num_clears])) 773 continue; 774 775 num_clears++; 776 clear_types |= SI_CLEAR_TYPE_DCC; 777 778 si_mark_display_dcc_dirty(sctx, tex); 779 780 /* DCC fast clear with MSAA should clear CMASK to 0xC. */ 781 if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) { 782 assert(sctx->gfx_level < GFX11); /* no FMASK/CMASK on GFX11 */ 783 assert(num_clears < ARRAY_SIZE(info)); 784 si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b, 785 tex->surface.cmask_offset, tex->surface.cmask_size, 0xCCCCCCCC); 786 clear_types |= SI_CLEAR_TYPE_CMASK; 787 fmask_decompress_needed = true; 788 } 789 } else { 790 /* No CMASK on GFX11. */ 791 if (sctx->gfx_level >= GFX11) 792 continue; 793 794 if (level > 0) 795 continue; 796 797 /* Shared textures can't use fast clear without an explicit flush 798 * because the clear color is not exported. 799 */ 800 if (tex->buffer.b.is_shared && 801 !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH)) 802 continue; 803 804 if (too_small) 805 continue; 806 807 /* 128-bit formats are unsupported */ 808 if (tex->surface.bpe > 8) { 809 continue; 810 } 811 812 /* RB+ doesn't work with CMASK fast clear on Stoney. */ 813 if (sctx->family == CHIP_STONEY) 814 continue; 815 816 /* Disable fast clear if tex is encrypted */ 817 if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED) 818 continue; 819 820 uint64_t cmask_offset = 0; 821 unsigned clear_size = 0; 822 823 if (sctx->gfx_level >= GFX10) { 824 assert(level == 0); 825 826 /* Clearing CMASK with both multiple levels and multiple layers is not 827 * implemented. 828 */ 829 if (num_layers > 1 && tex->buffer.b.b.last_level > 0) 830 continue; 831 832 if (!si_alloc_separate_cmask(sctx->screen, tex)) 833 continue; 834 835 if (num_layers == 1) { 836 /* Clear level 0. */ 837 cmask_offset = tex->surface.cmask_offset + tex->surface.u.gfx9.color.cmask_level0.offset; 838 clear_size = tex->surface.u.gfx9.color.cmask_level0.size; 839 } else if (tex->buffer.b.b.last_level == 0) { 840 /* Clear all layers having only 1 level. */ 841 cmask_offset = tex->surface.cmask_offset; 842 clear_size = tex->surface.cmask_size; 843 } else { 844 assert(0); /* this is prevented above */ 845 } 846 } else if (sctx->gfx_level == GFX9) { 847 /* TODO: Implement CMASK fast clear for level 0 of mipmapped textures. Mipmapped 848 * CMASK has to clear a rectangular area of CMASK for level 0 (because the whole 849 * miptree is organized in a 2D plane). 850 */ 851 if (tex->buffer.b.b.last_level > 0) 852 continue; 853 854 if (!si_alloc_separate_cmask(sctx->screen, tex)) 855 continue; 856 857 cmask_offset = tex->surface.cmask_offset; 858 clear_size = tex->surface.cmask_size; 859 } else { 860 if (!si_alloc_separate_cmask(sctx->screen, tex)) 861 continue; 862 863 /* GFX6-8: This only covers mipmap level 0. */ 864 cmask_offset = tex->surface.cmask_offset; 865 clear_size = tex->surface.cmask_size; 866 } 867 868 /* Do the fast clear. */ 869 assert(num_clears < ARRAY_SIZE(info)); 870 si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b, 871 cmask_offset, clear_size, 0); 872 clear_types |= SI_CLEAR_TYPE_CMASK; 873 eliminate_needed = true; 874 } 875 876 if ((eliminate_needed || fmask_decompress_needed) && 877 !(tex->dirty_level_mask & (1 << level))) { 878 assert(sctx->gfx_level < GFX11); /* no decompression needed on GFX11 */ 879 tex->dirty_level_mask |= 1 << level; 880 si_set_sampler_depth_decompress_mask(sctx, tex); 881 p_atomic_inc(&sctx->screen->compressed_colortex_counter); 882 } 883 884 *buffers &= ~(PIPE_CLEAR_COLOR0 << i); 885 886 /* Chips with DCC constant encoding don't need to set the clear 887 * color registers for DCC clear values 0 and 1. 888 */ 889 if (sctx->screen->info.has_dcc_constant_encode && !eliminate_needed) 890 continue; 891 892 /* There are no clear color registers on GFX11. */ 893 assert(sctx->gfx_level < GFX11); 894 895 if (si_set_clear_color(tex, fb->cbufs[i]->format, color)) { 896 sctx->framebuffer.dirty_cbufs |= 1 << i; 897 si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 898 } 899 } 900 901 /* Depth/stencil clears. */ 902 struct pipe_surface *zsbuf = fb->zsbuf; 903 struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL; 904 unsigned zs_num_layers = zstex ? util_num_layers(&zstex->buffer.b.b, zsbuf->u.tex.level) : 0; 905 906 if (zstex && zsbuf->u.tex.first_layer == 0 && 907 zsbuf->u.tex.last_layer == zs_num_layers - 1 && 908 si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_ZS)) { 909 unsigned level = zsbuf->u.tex.level; 910 bool update_db_depth_clear = false; 911 bool update_db_stencil_clear = false; 912 bool fb_too_small = num_pixels * zs_num_layers <= 512 * 512; 913 914 /* Transition from TC-incompatible to TC-compatible HTILE if requested. */ 915 if (zstex->enable_tc_compatible_htile_next_clear) { 916 /* If both depth and stencil are present, they must be cleared together. */ 917 if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL || 918 (*buffers & PIPE_CLEAR_DEPTH && (!zstex->surface.has_stencil || 919 zstex->htile_stencil_disabled))) { 920 /* The conversion from TC-incompatible to TC-compatible can only be done in one clear. */ 921 assert(zstex->buffer.b.b.last_level == 0); 922 assert(!zstex->tc_compatible_htile); 923 924 /* Enable TC-compatible HTILE. */ 925 zstex->enable_tc_compatible_htile_next_clear = false; 926 zstex->tc_compatible_htile = true; 927 928 /* Update the framebuffer state to reflect the change. */ 929 sctx->framebuffer.DB_has_shader_readable_metadata = true; 930 sctx->framebuffer.dirty_zsbuf = true; 931 si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 932 933 /* Update all sampler views and shader images in all contexts. */ 934 p_atomic_inc(&sctx->screen->dirty_tex_counter); 935 936 /* Perform the clear here if possible, else clear to uncompressed. */ 937 uint32_t clear_value; 938 939 if (zstex->htile_stencil_disabled || !zstex->surface.has_stencil) { 940 if (si_can_fast_clear_depth(zstex, level, depth, *buffers)) { 941 /* Z-only clear. */ 942 clear_value = si_get_htile_clear_value(zstex, depth); 943 *buffers &= ~PIPE_CLEAR_DEPTH; 944 zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level); 945 zstex->depth_cleared_level_mask |= BITFIELD_BIT(level); 946 update_db_depth_clear = true; 947 } 948 } else if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) { 949 if (si_can_fast_clear_depth(zstex, level, depth, *buffers) && 950 si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) { 951 /* Combined Z+S clear. */ 952 clear_value = si_get_htile_clear_value(zstex, depth); 953 *buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; 954 zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level); 955 zstex->depth_cleared_level_mask |= BITFIELD_BIT(level); 956 zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level); 957 update_db_depth_clear = true; 958 update_db_stencil_clear = true; 959 } 960 } 961 962 if (!update_db_depth_clear) { 963 /* Clear to uncompressed, so that it doesn't contain values incompatible 964 * with the new TC-compatible HTILE setting. 965 * 966 * 0xfffff30f = uncompressed Z + S 967 * 0xfffc000f = uncompressed Z only 968 */ 969 clear_value = !zstex->htile_stencil_disabled ? 0xfffff30f : 0xfffc000f; 970 } 971 972 zstex->need_flush_after_depth_decompression = sctx->gfx_level == GFX10_3; 973 974 assert(num_clears < ARRAY_SIZE(info)); 975 si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, 976 zstex->surface.meta_offset, zstex->surface.meta_size, clear_value); 977 clear_types |= SI_CLEAR_TYPE_HTILE; 978 } 979 } else if (num_clears || !fb_too_small) { 980 /* This is where the HTILE buffer clear is done. 981 * 982 * If there is no clear scheduled and the framebuffer size is too small, we should use 983 * the draw-based clear that is without waits. If there is some other clear scheduled, 984 * we will have to wait anyway, so add the HTILE buffer clear to the batch here. 985 * If the framebuffer size is large enough, use this codepath too. 986 */ 987 uint64_t htile_offset = zstex->surface.meta_offset; 988 unsigned htile_size = 0; 989 990 /* Determine the HTILE subset to clear. */ 991 if (sctx->gfx_level >= GFX10) { 992 /* This can only clear a layered texture with 1 level or a mipmap texture 993 * with 1 layer. Other cases are unimplemented. 994 */ 995 if (zs_num_layers == 1) { 996 /* Clear a specific level. */ 997 htile_offset += zstex->surface.u.gfx9.meta_levels[level].offset; 998 htile_size = zstex->surface.u.gfx9.meta_levels[level].size; 999 } else if (zstex->buffer.b.b.last_level == 0) { 1000 /* Clear all layers having only 1 level. */ 1001 htile_size = zstex->surface.meta_size; 1002 } 1003 } else { 1004 /* This can only clear a layered texture with 1 level. Other cases are 1005 * unimplemented. 1006 */ 1007 if (zstex->buffer.b.b.last_level == 0) 1008 htile_size = zstex->surface.meta_size; 1009 } 1010 1011 /* Perform the clear if it's possible. */ 1012 if (zstex->htile_stencil_disabled || !zstex->surface.has_stencil) { 1013 if (htile_size && 1014 si_can_fast_clear_depth(zstex, level, depth, *buffers)) { 1015 /* Z-only clear. */ 1016 assert(num_clears < ARRAY_SIZE(info)); 1017 si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, htile_offset, 1018 htile_size, si_get_htile_clear_value(zstex, depth)); 1019 clear_types |= SI_CLEAR_TYPE_HTILE; 1020 *buffers &= ~PIPE_CLEAR_DEPTH; 1021 zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level); 1022 zstex->depth_cleared_level_mask |= BITFIELD_BIT(level); 1023 update_db_depth_clear = true; 1024 } 1025 } else if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) { 1026 if (htile_size && 1027 si_can_fast_clear_depth(zstex, level, depth, *buffers) && 1028 si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) { 1029 /* Combined Z+S clear. */ 1030 assert(num_clears < ARRAY_SIZE(info)); 1031 si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, htile_offset, 1032 htile_size, si_get_htile_clear_value(zstex, depth)); 1033 clear_types |= SI_CLEAR_TYPE_HTILE; 1034 *buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; 1035 zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level); 1036 zstex->depth_cleared_level_mask |= BITFIELD_BIT(level); 1037 zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level); 1038 update_db_depth_clear = true; 1039 update_db_stencil_clear = true; 1040 } 1041 } else { 1042 /* Z-only or S-only clear when both Z/S are present using a read-modify-write 1043 * compute shader. 1044 * 1045 * If we get both clears but only one of them can be fast-cleared, we use 1046 * the draw-based fast clear to do both at the same time. 1047 */ 1048 const uint32_t htile_depth_writemask = 0xfffffc0f; 1049 const uint32_t htile_stencil_writemask = 0x000003f0; 1050 1051 if (htile_size && 1052 !(*buffers & PIPE_CLEAR_STENCIL) && 1053 si_can_fast_clear_depth(zstex, level, depth, *buffers)) { 1054 /* Z-only clear with stencil left intact. */ 1055 assert(num_clears < ARRAY_SIZE(info)); 1056 si_init_buffer_clear_rmw(&info[num_clears++], &zstex->buffer.b.b, htile_offset, 1057 htile_size, si_get_htile_clear_value(zstex, depth), 1058 htile_depth_writemask); 1059 clear_types |= SI_CLEAR_TYPE_HTILE; 1060 *buffers &= ~PIPE_CLEAR_DEPTH; 1061 zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level); 1062 zstex->depth_cleared_level_mask |= BITFIELD_BIT(level); 1063 update_db_depth_clear = true; 1064 } else if (htile_size && 1065 !(*buffers & PIPE_CLEAR_DEPTH) && 1066 si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) { 1067 /* Stencil-only clear with depth left intact. */ 1068 assert(num_clears < ARRAY_SIZE(info)); 1069 si_init_buffer_clear_rmw(&info[num_clears++], &zstex->buffer.b.b, htile_offset, 1070 htile_size, si_get_htile_clear_value(zstex, depth), 1071 htile_stencil_writemask); 1072 clear_types |= SI_CLEAR_TYPE_HTILE; 1073 *buffers &= ~PIPE_CLEAR_STENCIL; 1074 zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level); 1075 update_db_stencil_clear = true; 1076 } 1077 } 1078 1079 zstex->need_flush_after_depth_decompression = update_db_depth_clear && sctx->gfx_level == GFX10_3; 1080 1081 /* Update DB_DEPTH_CLEAR. */ 1082 if (update_db_depth_clear && 1083 zstex->depth_clear_value[level] != (float)depth) { 1084 zstex->depth_clear_value[level] = depth; 1085 sctx->framebuffer.dirty_zsbuf = true; 1086 si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 1087 } 1088 1089 /* Update DB_STENCIL_CLEAR. */ 1090 if (update_db_stencil_clear && 1091 zstex->stencil_clear_value[level] != stencil) { 1092 zstex->stencil_clear_value[level] = stencil; 1093 sctx->framebuffer.dirty_zsbuf = true; 1094 si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 1095 } 1096 } 1097 } 1098 1099 si_execute_clears(sctx, info, num_clears, clear_types); 1100} 1101 1102static void si_clear(struct pipe_context *ctx, unsigned buffers, 1103 const struct pipe_scissor_state *scissor_state, 1104 const union pipe_color_union *color, double depth, unsigned stencil) 1105{ 1106 struct si_context *sctx = (struct si_context *)ctx; 1107 struct pipe_framebuffer_state *fb = &sctx->framebuffer.state; 1108 struct pipe_surface *zsbuf = fb->zsbuf; 1109 struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL; 1110 bool needs_db_flush = false; 1111 1112 /* Unset clear flags for non-existent buffers. */ 1113 for (unsigned i = 0; i < 8; i++) { 1114 if (i >= fb->nr_cbufs || !fb->cbufs[i]) 1115 buffers &= ~(PIPE_CLEAR_COLOR0 << i); 1116 } 1117 if (!zsbuf) 1118 buffers &= ~PIPE_CLEAR_DEPTHSTENCIL; 1119 else if (!util_format_has_stencil(util_format_description(zsbuf->format))) 1120 buffers &= ~PIPE_CLEAR_STENCIL; 1121 1122 si_fast_clear(sctx, &buffers, color, depth, stencil); 1123 if (!buffers) 1124 return; /* all buffers have been cleared */ 1125 1126 if (buffers & PIPE_CLEAR_COLOR) { 1127 /* These buffers cannot use fast clear, make sure to disable expansion. */ 1128 unsigned color_buffer_mask = (buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0); 1129 while (color_buffer_mask) { 1130 unsigned i = u_bit_scan(&color_buffer_mask); 1131 struct si_texture *tex = (struct si_texture *)fb->cbufs[i]->texture; 1132 if (tex->surface.fmask_size == 0) 1133 tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level); 1134 } 1135 } 1136 1137 if (zstex && zsbuf->u.tex.first_layer == 0 && 1138 zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) { 1139 unsigned level = zsbuf->u.tex.level; 1140 1141 if (si_can_fast_clear_depth(zstex, level, depth, buffers)) { 1142 /* Need to disable EXPCLEAR temporarily if clearing 1143 * to a new value. */ 1144 if (!(zstex->depth_cleared_level_mask_once & BITFIELD_BIT(level)) || 1145 zstex->depth_clear_value[level] != depth) { 1146 sctx->db_depth_disable_expclear = true; 1147 } 1148 1149 if (zstex->depth_clear_value[level] != (float)depth) { 1150 if ((zstex->depth_clear_value[level] != 0) != (depth != 0)) { 1151 /* ZRANGE_PRECISION register of a bound surface will change so we 1152 * must flush the DB caches. */ 1153 needs_db_flush = true; 1154 } 1155 /* Update DB_DEPTH_CLEAR. */ 1156 zstex->depth_clear_value[level] = depth; 1157 sctx->framebuffer.dirty_zsbuf = true; 1158 si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 1159 } 1160 sctx->db_depth_clear = true; 1161 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1162 } 1163 1164 if (si_can_fast_clear_stencil(zstex, level, stencil, buffers)) { 1165 stencil &= 0xff; 1166 1167 /* Need to disable EXPCLEAR temporarily if clearing 1168 * to a new value. */ 1169 if (!(zstex->stencil_cleared_level_mask_once & BITFIELD_BIT(level)) || 1170 zstex->stencil_clear_value[level] != stencil) { 1171 sctx->db_stencil_disable_expclear = true; 1172 } 1173 1174 if (zstex->stencil_clear_value[level] != (uint8_t)stencil) { 1175 /* Update DB_STENCIL_CLEAR. */ 1176 zstex->stencil_clear_value[level] = stencil; 1177 sctx->framebuffer.dirty_zsbuf = true; 1178 si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer); 1179 } 1180 sctx->db_stencil_clear = true; 1181 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1182 } 1183 1184 if (needs_db_flush) 1185 sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB; 1186 } 1187 1188 if (unlikely(sctx->thread_trace_enabled)) { 1189 if (buffers & PIPE_CLEAR_COLOR) 1190 sctx->sqtt_next_event = EventCmdClearColorImage; 1191 else if (buffers & PIPE_CLEAR_DEPTHSTENCIL) 1192 sctx->sqtt_next_event = EventCmdClearDepthStencilImage; 1193 } 1194 1195 si_blitter_begin(sctx, SI_CLEAR); 1196 util_blitter_clear(sctx->blitter, fb->width, fb->height, util_framebuffer_get_num_layers(fb), 1197 buffers, color, depth, stencil, sctx->framebuffer.nr_samples > 1); 1198 si_blitter_end(sctx); 1199 1200 if (sctx->db_depth_clear) { 1201 sctx->db_depth_clear = false; 1202 sctx->db_depth_disable_expclear = false; 1203 zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(zsbuf->u.tex.level); 1204 zstex->depth_cleared_level_mask |= BITFIELD_BIT(zsbuf->u.tex.level); 1205 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1206 } 1207 1208 if (sctx->db_stencil_clear) { 1209 sctx->db_stencil_clear = false; 1210 sctx->db_stencil_disable_expclear = false; 1211 zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(zsbuf->u.tex.level); 1212 si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state); 1213 } 1214} 1215 1216static bool si_try_normal_clear(struct si_context *sctx, struct pipe_surface *dst, 1217 unsigned dstx, unsigned dsty, unsigned width, unsigned height, 1218 bool render_condition_enabled, unsigned buffers, 1219 const union pipe_color_union *color, 1220 float depth, unsigned stencil) 1221{ 1222 /* This is worth it only if it's a whole image clear, so that we just clear DCC/HTILE. */ 1223 if (dstx == 0 && dsty == 0 && 1224 width == dst->width && 1225 height == dst->height && 1226 dst->u.tex.first_layer == 0 && 1227 dst->u.tex.last_layer == util_max_layer(dst->texture, dst->u.tex.level) && 1228 /* pipe->clear honors render_condition, so only use it if it's unset or if it's set and enabled. */ 1229 (!sctx->render_cond || render_condition_enabled) && 1230 sctx->has_graphics) { 1231 struct pipe_context *ctx = &sctx->b; 1232 struct pipe_framebuffer_state saved_fb = {}, fb = {}; 1233 1234 util_copy_framebuffer_state(&saved_fb, &sctx->framebuffer.state); 1235 1236 if (buffers & PIPE_CLEAR_COLOR) { 1237 fb.cbufs[0] = dst; 1238 fb.nr_cbufs = 1; 1239 } else { 1240 fb.zsbuf = dst; 1241 } 1242 1243 fb.width = dst->width; 1244 fb.height = dst->height; 1245 1246 ctx->set_framebuffer_state(ctx, &fb); 1247 ctx->clear(ctx, buffers, NULL, color, depth, stencil); 1248 ctx->set_framebuffer_state(ctx, &saved_fb); 1249 1250 util_copy_framebuffer_state(&saved_fb, NULL); 1251 1252 return true; 1253 } 1254 1255 return false; 1256} 1257 1258static void si_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dst, 1259 const union pipe_color_union *color, unsigned dstx, 1260 unsigned dsty, unsigned width, unsigned height, 1261 bool render_condition_enabled) 1262{ 1263 struct si_context *sctx = (struct si_context *)ctx; 1264 struct si_texture *sdst = (struct si_texture *)dst->texture; 1265 1266 /* Fast path that just clears DCC. */ 1267 if (si_try_normal_clear(sctx, dst, dstx, dsty, width, height, render_condition_enabled, 1268 PIPE_CLEAR_COLOR0, color, 0, 0)) 1269 return; 1270 1271 if (dst->texture->nr_samples <= 1 && 1272 (sctx->gfx_level >= GFX10 || !vi_dcc_enabled(sdst, dst->u.tex.level))) { 1273 si_compute_clear_render_target(ctx, dst, color, dstx, dsty, width, height, 1274 render_condition_enabled); 1275 return; 1276 } 1277 1278 si_blitter_begin(sctx, 1279 SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND)); 1280 util_blitter_clear_render_target(sctx->blitter, dst, color, dstx, dsty, width, height); 1281 si_blitter_end(sctx); 1282} 1283 1284static void si_clear_depth_stencil(struct pipe_context *ctx, struct pipe_surface *dst, 1285 unsigned clear_flags, double depth, unsigned stencil, 1286 unsigned dstx, unsigned dsty, unsigned width, unsigned height, 1287 bool render_condition_enabled) 1288{ 1289 struct si_context *sctx = (struct si_context *)ctx; 1290 union pipe_color_union unused = {}; 1291 1292 /* Fast path that just clears HTILE. */ 1293 if (si_try_normal_clear(sctx, dst, dstx, dsty, width, height, render_condition_enabled, 1294 clear_flags, &unused, depth, stencil)) 1295 return; 1296 1297 si_blitter_begin(sctx, 1298 SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND)); 1299 util_blitter_clear_depth_stencil(sctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty, 1300 width, height); 1301 si_blitter_end(sctx); 1302} 1303 1304static void si_clear_texture(struct pipe_context *pipe, struct pipe_resource *tex, unsigned level, 1305 const struct pipe_box *box, const void *data) 1306{ 1307 struct pipe_screen *screen = pipe->screen; 1308 struct si_texture *stex = (struct si_texture *)tex; 1309 struct pipe_surface tmpl = {{0}}; 1310 struct pipe_surface *sf; 1311 1312 tmpl.format = tex->format; 1313 tmpl.u.tex.first_layer = box->z; 1314 tmpl.u.tex.last_layer = box->z + box->depth - 1; 1315 tmpl.u.tex.level = level; 1316 sf = pipe->create_surface(pipe, tex, &tmpl); 1317 if (!sf) 1318 return; 1319 1320 if (stex->is_depth) { 1321 unsigned clear; 1322 float depth; 1323 uint8_t stencil = 0; 1324 1325 /* Depth is always present. */ 1326 clear = PIPE_CLEAR_DEPTH; 1327 util_format_unpack_z_float(tex->format, &depth, data, 1); 1328 1329 if (stex->surface.has_stencil) { 1330 clear |= PIPE_CLEAR_STENCIL; 1331 util_format_unpack_s_8uint(tex->format, &stencil, data, 1); 1332 } 1333 1334 si_clear_depth_stencil(pipe, sf, clear, depth, stencil, box->x, box->y, box->width, 1335 box->height, false); 1336 } else { 1337 union pipe_color_union color; 1338 1339 util_format_unpack_rgba(tex->format, color.ui, data, 1); 1340 1341 if (screen->is_format_supported(screen, tex->format, tex->target, 0, 0, 1342 PIPE_BIND_RENDER_TARGET)) { 1343 si_clear_render_target(pipe, sf, &color, box->x, box->y, box->width, box->height, false); 1344 } else { 1345 /* Software fallback - just for R9G9B9E5_FLOAT */ 1346 util_clear_render_target(pipe, sf, &color, box->x, box->y, box->width, box->height); 1347 } 1348 } 1349 pipe_surface_reference(&sf, NULL); 1350} 1351 1352void si_init_clear_functions(struct si_context *sctx) 1353{ 1354 sctx->b.clear_render_target = si_clear_render_target; 1355 sctx->b.clear_texture = si_clear_texture; 1356 1357 if (sctx->has_graphics) { 1358 sctx->b.clear = si_clear; 1359 sctx->b.clear_depth_stencil = si_clear_depth_stencil; 1360 } 1361} 1362