1/*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25#include "si_pipe.h"
26#include "sid.h"
27#include "util/format/u_format.h"
28#include "util/u_pack_color.h"
29#include "util/u_surface.h"
30
31enum
32{
33   SI_CLEAR = SI_SAVE_FRAGMENT_STATE,
34   SI_CLEAR_SURFACE = SI_SAVE_FRAMEBUFFER | SI_SAVE_FRAGMENT_STATE,
35};
36
37void si_init_buffer_clear(struct si_clear_info *info,
38                          struct pipe_resource *resource, uint64_t offset,
39                          uint32_t size, uint32_t clear_value)
40{
41   info->resource = resource;
42   info->offset = offset;
43   info->size = size;
44   info->clear_value = clear_value;
45   info->writemask = 0xffffffff;
46   info->is_dcc_msaa = false;
47}
48
49static void si_init_buffer_clear_rmw(struct si_clear_info *info,
50                                     struct pipe_resource *resource, uint64_t offset,
51                                     uint32_t size, uint32_t clear_value, uint32_t writemask)
52{
53   si_init_buffer_clear(info, resource, offset, size, clear_value);
54   info->writemask = writemask;
55}
56
57void si_execute_clears(struct si_context *sctx, struct si_clear_info *info,
58                       unsigned num_clears, unsigned types)
59{
60   if (!num_clears)
61      return;
62
63   /* Flush caches and wait for idle. */
64   if (types & (SI_CLEAR_TYPE_CMASK | SI_CLEAR_TYPE_DCC))
65      sctx->flags |= si_get_flush_flags(sctx, SI_COHERENCY_CB_META, L2_LRU);
66
67   if (types & SI_CLEAR_TYPE_HTILE)
68      sctx->flags |= si_get_flush_flags(sctx, SI_COHERENCY_DB_META, L2_LRU);
69
70   /* Flush caches in case we use compute. */
71   sctx->flags |= SI_CONTEXT_INV_VCACHE;
72
73   /* GFX6-8: CB and DB don't use L2. */
74   if (sctx->gfx_level <= GFX8)
75      sctx->flags |= SI_CONTEXT_INV_L2;
76
77   /* Execute clears. */
78   for (unsigned i = 0; i < num_clears; i++) {
79      if (info[i].is_dcc_msaa) {
80         gfx9_clear_dcc_msaa(sctx, info[i].resource, info[i].clear_value,
81                             SI_OP_SKIP_CACHE_INV_BEFORE, SI_COHERENCY_CP);
82         continue;
83      }
84
85      assert(info[i].size > 0);
86
87      if (info[i].writemask != 0xffffffff) {
88         si_compute_clear_buffer_rmw(sctx, info[i].resource, info[i].offset, info[i].size,
89                                     info[i].clear_value, info[i].writemask,
90                                     SI_OP_SKIP_CACHE_INV_BEFORE, SI_COHERENCY_CP);
91      } else {
92         /* Compute shaders are much faster on both dGPUs and APUs. Don't use CP DMA. */
93         si_clear_buffer(sctx, info[i].resource, info[i].offset, info[i].size,
94                         &info[i].clear_value, 4, SI_OP_SKIP_CACHE_INV_BEFORE,
95                         SI_COHERENCY_CP, SI_COMPUTE_CLEAR_METHOD);
96      }
97   }
98
99   /* Wait for idle. */
100   sctx->flags |= SI_CONTEXT_CS_PARTIAL_FLUSH;
101
102   /* GFX6-8: CB and DB don't use L2. */
103   if (sctx->gfx_level <= GFX8)
104      sctx->flags |= SI_CONTEXT_WB_L2;
105}
106
107static bool si_alloc_separate_cmask(struct si_screen *sscreen, struct si_texture *tex)
108{
109   assert(sscreen->info.gfx_level < GFX11);
110
111   /* CMASK for MSAA is allocated in advance or always disabled
112    * by "nofmask" option.
113    */
114   if (tex->cmask_buffer)
115      return true;
116
117   if (!tex->surface.cmask_size)
118      return false;
119
120   tex->cmask_buffer =
121      si_aligned_buffer_create(&sscreen->b, PIPE_RESOURCE_FLAG_UNMAPPABLE, PIPE_USAGE_DEFAULT,
122                               tex->surface.cmask_size, 1 << tex->surface.cmask_alignment_log2);
123   if (tex->cmask_buffer == NULL)
124      return false;
125
126   tex->cmask_base_address_reg = tex->cmask_buffer->gpu_address >> 8;
127   tex->cb_color_info |= S_028C70_FAST_CLEAR(1);
128
129   p_atomic_inc(&sscreen->compressed_colortex_counter);
130   return true;
131}
132
133static bool si_set_clear_color(struct si_texture *tex, enum pipe_format surface_format,
134                               const union pipe_color_union *color)
135{
136   union util_color uc;
137
138   memset(&uc, 0, sizeof(uc));
139
140   if (tex->surface.bpe == 16) {
141      /* DCC fast clear only:
142       *   CLEAR_WORD0 = R = G = B
143       *   CLEAR_WORD1 = A
144       */
145      assert(color->ui[0] == color->ui[1] && color->ui[0] == color->ui[2]);
146      uc.ui[0] = color->ui[0];
147      uc.ui[1] = color->ui[3];
148   } else {
149      if (tex->swap_rgb_to_bgr)
150         surface_format = util_format_rgb_to_bgr(surface_format);
151
152      util_pack_color_union(surface_format, &uc, color);
153   }
154
155   if (memcmp(tex->color_clear_value, &uc, 2 * sizeof(uint32_t)) == 0)
156      return false;
157
158   memcpy(tex->color_clear_value, &uc, 2 * sizeof(uint32_t));
159   return true;
160}
161
162/** Linearize and convert luminance/intensity to red. */
163enum pipe_format si_simplify_cb_format(enum pipe_format format)
164{
165   format = util_format_linear(format);
166   format = util_format_luminance_to_red(format);
167   return util_format_intensity_to_red(format);
168}
169
170bool vi_alpha_is_on_msb(struct si_screen *sscreen, enum pipe_format format)
171{
172   format = si_simplify_cb_format(format);
173   const struct util_format_description *desc = util_format_description(format);
174   unsigned comp_swap = si_translate_colorswap(sscreen->info.gfx_level, format, false);
175
176   /* The following code matches the hw behavior. */
177   if (desc->nr_channels == 1) {
178      return (comp_swap == V_028C70_SWAP_ALT_REV) != (sscreen->info.family == CHIP_RAVEN2 ||
179                                                      sscreen->info.family == CHIP_RENOIR);
180   }
181
182   return comp_swap != V_028C70_SWAP_STD_REV && comp_swap != V_028C70_SWAP_ALT_REV;
183}
184
185static bool gfx8_get_dcc_clear_parameters(struct si_screen *sscreen, enum pipe_format base_format,
186                                          enum pipe_format surface_format,
187                                          const union pipe_color_union *color, uint32_t *clear_value,
188                                          bool *eliminate_needed)
189{
190   /* If we want to clear without needing a fast clear eliminate step, we
191    * can set color and alpha independently to 0 or 1 (or 0/max for integer
192    * formats).
193    */
194   bool values[4] = {};      /* whether to clear to 0 or 1 */
195   bool color_value = false; /* clear color to 0 or 1 */
196   bool alpha_value = false; /* clear alpha to 0 or 1 */
197   int alpha_channel;        /* index of the alpha component */
198   bool has_color = false;
199   bool has_alpha = false;
200
201   const struct util_format_description *desc =
202      util_format_description(si_simplify_cb_format(surface_format));
203
204   /* 128-bit fast clear with different R,G,B values is unsupported. */
205   if (desc->block.bits == 128 && (color->ui[0] != color->ui[1] || color->ui[0] != color->ui[2]))
206      return false;
207
208   *eliminate_needed = true;
209   *clear_value = GFX8_DCC_CLEAR_REG;
210
211   if (desc->layout != UTIL_FORMAT_LAYOUT_PLAIN)
212      return true; /* need ELIMINATE_FAST_CLEAR */
213
214   bool base_alpha_is_on_msb = vi_alpha_is_on_msb(sscreen, base_format);
215   bool surf_alpha_is_on_msb = vi_alpha_is_on_msb(sscreen, surface_format);
216
217   /* Formats with 3 channels can't have alpha. */
218   if (desc->nr_channels == 3)
219      alpha_channel = -1;
220   else if (surf_alpha_is_on_msb)
221      alpha_channel = desc->nr_channels - 1;
222   else
223      alpha_channel = 0;
224
225   for (int i = 0; i < 4; ++i) {
226      if (desc->swizzle[i] >= PIPE_SWIZZLE_0)
227         continue;
228
229      if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
230         /* Use the maximum value for clamping the clear color. */
231         int max = u_bit_consecutive(0, desc->channel[i].size - 1);
232
233         values[i] = color->i[i] != 0;
234         if (color->i[i] != 0 && MIN2(color->i[i], max) != max)
235            return true; /* need ELIMINATE_FAST_CLEAR */
236      } else if (desc->channel[i].pure_integer &&
237                 desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
238         /* Use the maximum value for clamping the clear color. */
239         unsigned max = u_bit_consecutive(0, desc->channel[i].size);
240
241         values[i] = color->ui[i] != 0U;
242         if (color->ui[i] != 0U && MIN2(color->ui[i], max) != max)
243            return true; /* need ELIMINATE_FAST_CLEAR */
244      } else {
245         values[i] = color->f[i] != 0.0F;
246         if (color->f[i] != 0.0F && color->f[i] != 1.0F)
247            return true; /* need ELIMINATE_FAST_CLEAR */
248      }
249
250      if (desc->swizzle[i] == alpha_channel) {
251         alpha_value = values[i];
252         has_alpha = true;
253      } else {
254         color_value = values[i];
255         has_color = true;
256      }
257   }
258
259   /* If alpha isn't present, make it the same as color, and vice versa. */
260   if (!has_alpha)
261      alpha_value = color_value;
262   else if (!has_color)
263      color_value = alpha_value;
264
265   if (color_value != alpha_value && base_alpha_is_on_msb != surf_alpha_is_on_msb)
266      return true; /* require ELIMINATE_FAST_CLEAR */
267
268   /* Check if all color values are equal if they are present. */
269   for (int i = 0; i < 4; ++i) {
270      if (desc->swizzle[i] <= PIPE_SWIZZLE_W && desc->swizzle[i] != alpha_channel &&
271          values[i] != color_value)
272         return true; /* require ELIMINATE_FAST_CLEAR */
273   }
274
275   /* This doesn't need ELIMINATE_FAST_CLEAR.
276    * On chips predating Raven2, the DCC clear codes and the CB clear
277    * color registers must match.
278    */
279   *eliminate_needed = false;
280
281   if (color_value) {
282      if (alpha_value)
283         *clear_value = GFX8_DCC_CLEAR_1111;
284      else
285         *clear_value = GFX8_DCC_CLEAR_1110;
286   } else {
287      if (alpha_value)
288         *clear_value = GFX8_DCC_CLEAR_0001;
289      else
290         *clear_value = GFX8_DCC_CLEAR_0000;
291   }
292   return true;
293}
294
295static bool gfx11_get_dcc_clear_parameters(struct si_screen *sscreen, enum pipe_format surface_format,
296                                           const union pipe_color_union *color, uint32_t *clear_value)
297{
298   const struct util_format_description *desc =
299      util_format_description(si_simplify_cb_format(surface_format));
300   unsigned start_bit = UINT_MAX;
301   unsigned end_bit = 0;
302
303   /* TODO: 8bpp and 16bpp fast DCC clears don't work. */
304   if (desc->block.bits <= 16)
305      return false;
306
307   /* Find the used bit range. */
308   for (unsigned i = 0; i < 4; i++) {
309      unsigned swizzle = desc->swizzle[i];
310
311      if (swizzle >= PIPE_SWIZZLE_0)
312         continue;
313
314      start_bit = MIN2(start_bit, desc->channel[swizzle].shift);
315      end_bit = MAX2(end_bit, desc->channel[swizzle].shift + desc->channel[swizzle].size);
316   }
317
318   union {
319      uint8_t ub[16];
320      uint16_t us[8];
321      uint32_t ui[4];
322   } value = {};
323   util_pack_color_union(surface_format, (union util_color*)&value, color);
324
325   /* Check the cases where all components or bits are either all 0 or all 1. */
326   bool all_bits_are_0 = true;
327   bool all_bits_are_1 = true;
328   bool all_words_are_fp16_1 = false;
329   bool all_words_are_fp32_1 = false;
330
331   for (unsigned i = start_bit; i < end_bit; i++) {
332      bool bit = value.ub[i / 8] & BITFIELD_BIT(i % 8);
333
334      all_bits_are_0 &= !bit;
335      all_bits_are_1 &= bit;
336   }
337
338   if (start_bit % 16 == 0 && end_bit % 16 == 0) {
339      all_words_are_fp16_1 = true;
340      for (unsigned i = start_bit / 16; i < end_bit / 16; i++)
341         all_words_are_fp16_1 &= value.us[i] == 0x3c00;
342   }
343
344   if (start_bit % 32 == 0 && end_bit % 32 == 0) {
345      all_words_are_fp32_1 = true;
346      for (unsigned i = start_bit / 32; i < end_bit / 32; i++)
347         all_words_are_fp32_1 &= value.ui[i] == 0x3f800000;
348   }
349
350#if 0 /* debug code */
351   int i = util_format_get_first_non_void_channel(surface_format);
352   if (desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED && desc->channel[i].pure_integer) {
353      printf("%i %i %i %i\n", color->i[0], color->i[1], color->i[2], color->i[3]);
354   } else if (desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED && desc->channel[i].pure_integer) {
355      printf("%u %u %u %u\n", color->ui[0], color->ui[1], color->ui[2], color->ui[3]);
356   } else {
357      printf("%f %f %f %f\n", color->f[0], color->f[1], color->f[2], color->f[3]);
358   }
359   for (unsigned i = 0; i < end_bit / 8; i++)
360      printf("%02x", value.ub[i]);
361   printf("\n");
362   printf("bits=[%u..%u)%s%s%s%s\n", start_bit, end_bit,
363          all_bits_are_0 ? ", all 0" : "",
364          all_bits_are_1 ? ", all 1" : "",
365          all_words_are_fp16_1 ? ", all fp16 1" : "",
366          all_words_are_fp32_1 ? ", all fp32 1" : "");
367#endif
368
369   *clear_value = 0;
370
371   if (all_bits_are_0 || all_bits_are_1 || all_words_are_fp16_1 || all_words_are_fp32_1) {
372      if (all_bits_are_0)
373         *clear_value = GFX11_DCC_CLEAR_0000;
374      else if (all_bits_are_1)
375         *clear_value = GFX11_DCC_CLEAR_1111_UNORM;
376      else if (all_words_are_fp16_1)
377         *clear_value = GFX11_DCC_CLEAR_1111_FP16;
378      else if (all_words_are_fp32_1)
379         *clear_value = GFX11_DCC_CLEAR_1111_FP32;
380
381      return true;
382   }
383
384   /* Check 0001 and 1110 cases. */
385   if (vi_alpha_is_on_msb(sscreen, surface_format)) {
386      if (desc->nr_channels == 2 && desc->channel[0].size == 8) {
387         if (value.ub[0] == 0x00 && value.ub[1] == 0xff) {
388            *clear_value = GFX11_DCC_CLEAR_0001_UNORM;
389            return true;
390         } else if (value.ub[0] == 0xff && value.ub[1] == 0x00) {
391            *clear_value = GFX11_DCC_CLEAR_1110_UNORM;
392            return true;
393         }
394      } else if (desc->nr_channels == 4 && desc->channel[0].size == 8) {
395         if (value.ub[0] == 0x00 && value.ub[1] == 0x00 &&
396             value.ub[2] == 0x00 && value.ub[3] == 0xff) {
397            *clear_value = GFX11_DCC_CLEAR_0001_UNORM;
398            return true;
399         } else if (value.ub[0] == 0xff && value.ub[1] == 0xff &&
400                    value.ub[2] == 0xff && value.ub[3] == 0x00) {
401            *clear_value = GFX11_DCC_CLEAR_1110_UNORM;
402            return true;
403         }
404      } else if (desc->nr_channels == 4 && desc->channel[0].size == 16) {
405         if (value.us[0] == 0x0000 && value.us[1] == 0x0000 &&
406             value.us[2] == 0x0000 && value.us[3] == 0xffff) {
407            *clear_value = GFX11_DCC_CLEAR_0001_UNORM;
408            return true;
409         } else if (value.us[0] == 0xffff && value.us[1] == 0xffff &&
410                    value.us[2] == 0xffff && value.us[3] == 0x0000) {
411            *clear_value = GFX11_DCC_CLEAR_1110_UNORM;
412            return true;
413         }
414      }
415   }
416
417   return false;
418}
419
420bool vi_dcc_get_clear_info(struct si_context *sctx, struct si_texture *tex, unsigned level,
421                           unsigned clear_value, struct si_clear_info *out)
422{
423   struct pipe_resource *dcc_buffer = &tex->buffer.b.b;
424   uint64_t dcc_offset = tex->surface.meta_offset;
425   uint32_t clear_size;
426
427   assert(vi_dcc_enabled(tex, level));
428
429   if (sctx->gfx_level >= GFX10) {
430      /* 4x and 8x MSAA needs a sophisticated compute shader for
431       * the clear. GFX11 doesn't need that.
432       */
433      if (sctx->gfx_level < GFX11 && tex->buffer.b.b.nr_storage_samples >= 4)
434         return false;
435
436      unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);
437
438      if (num_layers == 1) {
439         /* Clear a specific level. */
440         dcc_offset += tex->surface.u.gfx9.meta_levels[level].offset;
441         clear_size = tex->surface.u.gfx9.meta_levels[level].size;
442      } else if (tex->buffer.b.b.last_level == 0) {
443         /* Clear all layers having only 1 level. */
444         clear_size = tex->surface.meta_size;
445      } else {
446         /* Clearing DCC with both multiple levels and multiple layers is not
447          * implemented.
448          */
449         return false;
450      }
451   } else if (sctx->gfx_level == GFX9) {
452      /* TODO: Implement DCC fast clear for level 0 of mipmapped textures. Mipmapped
453       * DCC has to clear a rectangular area of DCC for level 0 (because the whole miptree
454       * is organized in a 2D plane).
455       */
456      if (tex->buffer.b.b.last_level > 0)
457         return false;
458
459      /* 4x and 8x MSAA need to clear only sample 0 and 1 in a compute shader and leave other
460       * samples untouched. (only the first 2 samples are compressed) */
461      if (tex->buffer.b.b.nr_storage_samples >= 4) {
462         si_init_buffer_clear(out, dcc_buffer, 0, 0, clear_value);
463         out->is_dcc_msaa = true;
464         return true;
465      }
466
467      clear_size = tex->surface.meta_size;
468   } else {
469      unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);
470
471      /* If this is 0, fast clear isn't possible. (can occur with MSAA) */
472      if (!tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size)
473         return false;
474
475      /* Layered 4x and 8x MSAA DCC fast clears need to clear
476       * dcc_fast_clear_size bytes for each layer. A compute shader
477       * would be more efficient than separate per-layer clear operations.
478       */
479      if (tex->buffer.b.b.nr_storage_samples >= 4 && num_layers > 1)
480         return false;
481
482      dcc_offset += tex->surface.u.legacy.color.dcc_level[level].dcc_offset;
483      clear_size = tex->surface.u.legacy.color.dcc_level[level].dcc_fast_clear_size;
484   }
485
486   si_init_buffer_clear(out, dcc_buffer, dcc_offset, clear_size, clear_value);
487   return true;
488}
489
490/* Set the same micro tile mode as the destination of the last MSAA resolve.
491 * This allows hitting the MSAA resolve fast path, which requires that both
492 * src and dst micro tile modes match.
493 */
494static void si_set_optimal_micro_tile_mode(struct si_screen *sscreen, struct si_texture *tex)
495{
496   if (sscreen->info.gfx_level >= GFX10 || tex->buffer.b.is_shared ||
497       tex->buffer.b.b.nr_samples <= 1 ||
498       tex->surface.micro_tile_mode == tex->last_msaa_resolve_target_micro_mode)
499      return;
500
501   assert(sscreen->info.gfx_level >= GFX9 ||
502          tex->surface.u.legacy.level[0].mode == RADEON_SURF_MODE_2D);
503   assert(tex->buffer.b.b.last_level == 0);
504
505   if (sscreen->info.gfx_level >= GFX9) {
506      /* 4K or larger tiles only. 0 is linear. 1-3 are 256B tiles. */
507      assert(tex->surface.u.gfx9.swizzle_mode >= 4);
508
509      /* If you do swizzle_mode % 4, you'll get:
510       *   0 = Depth
511       *   1 = Standard,
512       *   2 = Displayable
513       *   3 = Rotated
514       *
515       * Depth-sample order isn't allowed:
516       */
517      assert(tex->surface.u.gfx9.swizzle_mode % 4 != 0);
518
519      switch (tex->last_msaa_resolve_target_micro_mode) {
520      case RADEON_MICRO_MODE_DISPLAY:
521         tex->surface.u.gfx9.swizzle_mode &= ~0x3;
522         tex->surface.u.gfx9.swizzle_mode += 2; /* D */
523         break;
524      case RADEON_MICRO_MODE_STANDARD:
525         tex->surface.u.gfx9.swizzle_mode &= ~0x3;
526         tex->surface.u.gfx9.swizzle_mode += 1; /* S */
527         break;
528      case RADEON_MICRO_MODE_RENDER:
529         tex->surface.u.gfx9.swizzle_mode &= ~0x3;
530         tex->surface.u.gfx9.swizzle_mode += 3; /* R */
531         break;
532      default: /* depth */
533         assert(!"unexpected micro mode");
534         return;
535      }
536   } else if (sscreen->info.gfx_level >= GFX7) {
537      /* These magic numbers were copied from addrlib. It doesn't use
538       * any definitions for them either. They are all 2D_TILED_THIN1
539       * modes with different bpp and micro tile mode.
540       */
541      switch (tex->last_msaa_resolve_target_micro_mode) {
542      case RADEON_MICRO_MODE_DISPLAY:
543         tex->surface.u.legacy.tiling_index[0] = 10;
544         break;
545      case RADEON_MICRO_MODE_STANDARD:
546         tex->surface.u.legacy.tiling_index[0] = 14;
547         break;
548      case RADEON_MICRO_MODE_RENDER:
549         tex->surface.u.legacy.tiling_index[0] = 28;
550         break;
551      default: /* depth, thick */
552         assert(!"unexpected micro mode");
553         return;
554      }
555   } else { /* GFX6 */
556      switch (tex->last_msaa_resolve_target_micro_mode) {
557      case RADEON_MICRO_MODE_DISPLAY:
558         switch (tex->surface.bpe) {
559         case 1:
560            tex->surface.u.legacy.tiling_index[0] = 10;
561            break;
562         case 2:
563            tex->surface.u.legacy.tiling_index[0] = 11;
564            break;
565         default: /* 4, 8 */
566            tex->surface.u.legacy.tiling_index[0] = 12;
567            break;
568         }
569         break;
570      case RADEON_MICRO_MODE_STANDARD:
571         switch (tex->surface.bpe) {
572         case 1:
573            tex->surface.u.legacy.tiling_index[0] = 14;
574            break;
575         case 2:
576            tex->surface.u.legacy.tiling_index[0] = 15;
577            break;
578         case 4:
579            tex->surface.u.legacy.tiling_index[0] = 16;
580            break;
581         default: /* 8, 16 */
582            tex->surface.u.legacy.tiling_index[0] = 17;
583            break;
584         }
585         break;
586      default: /* depth, thick */
587         assert(!"unexpected micro mode");
588         return;
589      }
590   }
591
592   tex->surface.micro_tile_mode = tex->last_msaa_resolve_target_micro_mode;
593
594   p_atomic_inc(&sscreen->dirty_tex_counter);
595}
596
597static uint32_t si_get_htile_clear_value(struct si_texture *tex, float depth)
598{
599   /* Maximum 14-bit UINT value. */
600   const uint32_t max_z_value = 0x3FFF;
601
602   /* For clears, Zmask and Smem will always be set to zero. */
603   const uint32_t zmask = 0;
604   const uint32_t smem  = 0;
605
606   /* Convert depthValue to 14-bit zmin/zmax uint values. */
607   const uint32_t zmin = lroundf(depth * max_z_value);
608   const uint32_t zmax = zmin;
609
610   if (tex->htile_stencil_disabled) {
611      /* Z-only HTILE is laid out as follows:
612       * |31     18|17      4|3     0|
613       * +---------+---------+-------+
614       * |  Max Z  |  Min Z  | ZMask |
615       */
616      return ((zmax & 0x3FFF) << 18) |
617             ((zmin & 0x3FFF) << 4) |
618             ((zmask & 0xF) << 0);
619   } else {
620      /* Z+S HTILE is laid out as-follows:
621       * |31       12|11 10|9    8|7   6|5   4|3     0|
622       * +-----------+-----+------+-----+-----+-------+
623       * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
624       *
625       * The base value for zRange is either zMax or zMin, depending on ZRANGE_PRECISION.
626       * For a fast clear, zMin == zMax == clearValue. This means that the base will
627       * always be the clear value (converted to 14-bit UINT).
628       *
629       * When abs(zMax-zMin) < 16, the delta is equal to the difference. In the case of
630       * fast clears, where zMax == zMin, the delta is always zero.
631       */
632      const uint32_t delta = 0;
633      const uint32_t zrange = (zmax << 6) | delta;
634
635      /* SResults 0 & 1 are set based on the stencil compare state.
636       * For fast-clear, the default value of sr0 and sr1 are both 0x3.
637       */
638      const uint32_t sresults = 0xf;
639
640      return ((zrange & 0xFFFFF) << 12) |
641             ((smem & 0x3) <<  8) |
642             ((sresults & 0xF) <<  4) |
643             ((zmask & 0xF) <<  0);
644   }
645}
646
647static bool si_can_fast_clear_depth(struct si_texture *zstex, unsigned level, float depth,
648                                    unsigned buffers)
649{
650   /* TC-compatible HTILE only supports depth clears to 0 or 1. */
651   return buffers & PIPE_CLEAR_DEPTH &&
652          si_htile_enabled(zstex, level, PIPE_MASK_Z) &&
653          (!zstex->tc_compatible_htile || depth == 0 || depth == 1);
654}
655
656static bool si_can_fast_clear_stencil(struct si_texture *zstex, unsigned level, uint8_t stencil,
657                                      unsigned buffers)
658{
659   /* TC-compatible HTILE only supports stencil clears to 0. */
660   return buffers & PIPE_CLEAR_STENCIL &&
661          si_htile_enabled(zstex, level, PIPE_MASK_S) &&
662          (!zstex->tc_compatible_htile || stencil == 0);
663}
664
665static void si_fast_clear(struct si_context *sctx, unsigned *buffers,
666                          const union pipe_color_union *color, float depth, uint8_t stencil)
667{
668   struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
669   struct si_clear_info info[8 * 2 + 1]; /* MRTs * (CMASK + DCC) + ZS */
670   unsigned num_clears = 0;
671   unsigned clear_types = 0;
672   unsigned num_pixels = fb->width * fb->height;
673
674   /* This function is broken in BE, so just disable this path for now */
675#if UTIL_ARCH_BIG_ENDIAN
676   return;
677#endif
678
679   if (sctx->render_cond)
680      return;
681
682   /* Gather information about what to clear. */
683   unsigned color_buffer_mask = (*buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0);
684   while (color_buffer_mask) {
685      unsigned i = u_bit_scan(&color_buffer_mask);
686
687      struct si_texture *tex = (struct si_texture *)fb->cbufs[i]->texture;
688      unsigned level = fb->cbufs[i]->u.tex.level;
689      unsigned num_layers = util_num_layers(&tex->buffer.b.b, level);
690
691      /* the clear is allowed if all layers are bound */
692      if (fb->cbufs[i]->u.tex.first_layer != 0 ||
693          fb->cbufs[i]->u.tex.last_layer != num_layers - 1) {
694         continue;
695      }
696
697      /* We can change the micro tile mode before a full clear. */
698      /* This is only used for MSAA textures when clearing all layers. */
699      si_set_optimal_micro_tile_mode(sctx->screen, tex);
700
701      if (tex->swap_rgb_to_bgr_on_next_clear) {
702         assert(!tex->swap_rgb_to_bgr);
703         assert(tex->buffer.b.b.nr_samples >= 2);
704         tex->swap_rgb_to_bgr = true;
705         tex->swap_rgb_to_bgr_on_next_clear = false;
706
707         /* Update all sampler views and images. */
708         p_atomic_inc(&sctx->screen->dirty_tex_counter);
709      }
710
711      /* only supported on tiled surfaces */
712      if (tex->surface.is_linear) {
713         continue;
714      }
715
716      /* Use a slow clear for small surfaces where the cost of
717       * the eliminate pass can be higher than the benefit of fast
718       * clear. The closed driver does this, but the numbers may differ.
719       *
720       * This helps on both dGPUs and APUs, even small APUs like Mullins.
721       */
722      bool fb_too_small = num_pixels * num_layers <= 512 * 512;
723      bool too_small = tex->buffer.b.b.nr_samples <= 1 && fb_too_small;
724      bool eliminate_needed = false;
725      bool fmask_decompress_needed = false;
726
727      /* Try to clear DCC first, otherwise try CMASK. */
728      if (vi_dcc_enabled(tex, level)) {
729         uint32_t reset_value;
730
731         if (sctx->screen->debug_flags & DBG(NO_DCC_CLEAR))
732            continue;
733
734         if (sctx->gfx_level >= GFX11) {
735            if (!gfx11_get_dcc_clear_parameters(sctx->screen, fb->cbufs[i]->format, color,
736                                                &reset_value))
737               continue;
738         } else {
739            if (!gfx8_get_dcc_clear_parameters(sctx->screen, tex->buffer.b.b.format,
740                                               fb->cbufs[i]->format, color, &reset_value,
741                                               &eliminate_needed))
742               continue;
743         }
744
745         /* Shared textures can't use fast clear without an explicit flush
746          * because the clear color is not exported.
747          *
748          * Chips without DCC constant encoding must set the clear color registers
749          * correctly even if the fast clear eliminate pass is not needed.
750          */
751         if ((eliminate_needed || !sctx->screen->info.has_dcc_constant_encode) &&
752             tex->buffer.b.is_shared &&
753             !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
754            continue;
755
756         if (eliminate_needed && too_small)
757            continue;
758
759         /* We can clear any level, but we only set up the clear value registers for the first
760          * level. Therefore, all other levels can be cleared only if the clear value registers
761          * are not used, which is only the case with DCC constant encoding and 0/1 clear values.
762          */
763         if (level > 0 && (eliminate_needed || !sctx->screen->info.has_dcc_constant_encode))
764            continue;
765
766         if (tex->buffer.b.b.nr_samples >= 2 && eliminate_needed &&
767             !sctx->screen->allow_dcc_msaa_clear_to_reg_for_bpp[util_logbase2(tex->surface.bpe)])
768            continue;
769
770         assert(num_clears < ARRAY_SIZE(info));
771
772         if (!vi_dcc_get_clear_info(sctx, tex, level, reset_value, &info[num_clears]))
773            continue;
774
775         num_clears++;
776         clear_types |= SI_CLEAR_TYPE_DCC;
777
778         si_mark_display_dcc_dirty(sctx, tex);
779
780         /* DCC fast clear with MSAA should clear CMASK to 0xC. */
781         if (tex->buffer.b.b.nr_samples >= 2 && tex->cmask_buffer) {
782            assert(sctx->gfx_level < GFX11); /* no FMASK/CMASK on GFX11 */
783            assert(num_clears < ARRAY_SIZE(info));
784            si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b,
785                                 tex->surface.cmask_offset, tex->surface.cmask_size, 0xCCCCCCCC);
786            clear_types |= SI_CLEAR_TYPE_CMASK;
787            fmask_decompress_needed = true;
788         }
789      } else {
790         /* No CMASK on GFX11. */
791         if (sctx->gfx_level >= GFX11)
792            continue;
793
794         if (level > 0)
795            continue;
796
797         /* Shared textures can't use fast clear without an explicit flush
798          * because the clear color is not exported.
799          */
800         if (tex->buffer.b.is_shared &&
801             !(tex->buffer.external_usage & PIPE_HANDLE_USAGE_EXPLICIT_FLUSH))
802            continue;
803
804         if (too_small)
805            continue;
806
807         /* 128-bit formats are unsupported */
808         if (tex->surface.bpe > 8) {
809            continue;
810         }
811
812         /* RB+ doesn't work with CMASK fast clear on Stoney. */
813         if (sctx->family == CHIP_STONEY)
814            continue;
815
816         /* Disable fast clear if tex is encrypted */
817         if (tex->buffer.flags & RADEON_FLAG_ENCRYPTED)
818            continue;
819
820         uint64_t cmask_offset = 0;
821         unsigned clear_size = 0;
822
823         if (sctx->gfx_level >= GFX10) {
824            assert(level == 0);
825
826            /* Clearing CMASK with both multiple levels and multiple layers is not
827             * implemented.
828             */
829            if (num_layers > 1 && tex->buffer.b.b.last_level > 0)
830               continue;
831
832            if (!si_alloc_separate_cmask(sctx->screen, tex))
833               continue;
834
835            if (num_layers == 1) {
836               /* Clear level 0. */
837               cmask_offset = tex->surface.cmask_offset + tex->surface.u.gfx9.color.cmask_level0.offset;
838               clear_size = tex->surface.u.gfx9.color.cmask_level0.size;
839            } else if (tex->buffer.b.b.last_level == 0) {
840               /* Clear all layers having only 1 level. */
841               cmask_offset = tex->surface.cmask_offset;
842               clear_size = tex->surface.cmask_size;
843            } else {
844               assert(0); /* this is prevented above */
845            }
846         } else if (sctx->gfx_level == GFX9) {
847            /* TODO: Implement CMASK fast clear for level 0 of mipmapped textures. Mipmapped
848             * CMASK has to clear a rectangular area of CMASK for level 0 (because the whole
849             * miptree is organized in a 2D plane).
850             */
851            if (tex->buffer.b.b.last_level > 0)
852               continue;
853
854            if (!si_alloc_separate_cmask(sctx->screen, tex))
855               continue;
856
857            cmask_offset = tex->surface.cmask_offset;
858            clear_size = tex->surface.cmask_size;
859         } else {
860            if (!si_alloc_separate_cmask(sctx->screen, tex))
861               continue;
862
863            /* GFX6-8: This only covers mipmap level 0. */
864            cmask_offset = tex->surface.cmask_offset;
865            clear_size = tex->surface.cmask_size;
866         }
867
868         /* Do the fast clear. */
869         assert(num_clears < ARRAY_SIZE(info));
870         si_init_buffer_clear(&info[num_clears++], &tex->cmask_buffer->b.b,
871                              cmask_offset, clear_size, 0);
872         clear_types |= SI_CLEAR_TYPE_CMASK;
873         eliminate_needed = true;
874      }
875
876      if ((eliminate_needed || fmask_decompress_needed) &&
877          !(tex->dirty_level_mask & (1 << level))) {
878         assert(sctx->gfx_level < GFX11); /* no decompression needed on GFX11 */
879         tex->dirty_level_mask |= 1 << level;
880         si_set_sampler_depth_decompress_mask(sctx, tex);
881         p_atomic_inc(&sctx->screen->compressed_colortex_counter);
882      }
883
884      *buffers &= ~(PIPE_CLEAR_COLOR0 << i);
885
886      /* Chips with DCC constant encoding don't need to set the clear
887       * color registers for DCC clear values 0 and 1.
888       */
889      if (sctx->screen->info.has_dcc_constant_encode && !eliminate_needed)
890         continue;
891
892      /* There are no clear color registers on GFX11. */
893      assert(sctx->gfx_level < GFX11);
894
895      if (si_set_clear_color(tex, fb->cbufs[i]->format, color)) {
896         sctx->framebuffer.dirty_cbufs |= 1 << i;
897         si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
898      }
899   }
900
901   /* Depth/stencil clears. */
902   struct pipe_surface *zsbuf = fb->zsbuf;
903   struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL;
904   unsigned zs_num_layers = zstex ? util_num_layers(&zstex->buffer.b.b, zsbuf->u.tex.level) : 0;
905
906   if (zstex && zsbuf->u.tex.first_layer == 0 &&
907       zsbuf->u.tex.last_layer == zs_num_layers - 1 &&
908       si_htile_enabled(zstex, zsbuf->u.tex.level, PIPE_MASK_ZS)) {
909      unsigned level = zsbuf->u.tex.level;
910      bool update_db_depth_clear = false;
911      bool update_db_stencil_clear = false;
912      bool fb_too_small = num_pixels * zs_num_layers <= 512 * 512;
913
914      /* Transition from TC-incompatible to TC-compatible HTILE if requested. */
915      if (zstex->enable_tc_compatible_htile_next_clear) {
916          /* If both depth and stencil are present, they must be cleared together. */
917         if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL ||
918             (*buffers & PIPE_CLEAR_DEPTH && (!zstex->surface.has_stencil ||
919                                              zstex->htile_stencil_disabled))) {
920            /* The conversion from TC-incompatible to TC-compatible can only be done in one clear. */
921            assert(zstex->buffer.b.b.last_level == 0);
922            assert(!zstex->tc_compatible_htile);
923
924            /* Enable TC-compatible HTILE. */
925            zstex->enable_tc_compatible_htile_next_clear = false;
926            zstex->tc_compatible_htile = true;
927
928            /* Update the framebuffer state to reflect the change. */
929            sctx->framebuffer.DB_has_shader_readable_metadata = true;
930            sctx->framebuffer.dirty_zsbuf = true;
931            si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
932
933            /* Update all sampler views and shader images in all contexts. */
934            p_atomic_inc(&sctx->screen->dirty_tex_counter);
935
936            /* Perform the clear here if possible, else clear to uncompressed. */
937            uint32_t clear_value;
938
939            if (zstex->htile_stencil_disabled || !zstex->surface.has_stencil) {
940               if (si_can_fast_clear_depth(zstex, level, depth, *buffers)) {
941                  /* Z-only clear. */
942                  clear_value = si_get_htile_clear_value(zstex, depth);
943                  *buffers &= ~PIPE_CLEAR_DEPTH;
944                  zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
945                  zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
946                  update_db_depth_clear = true;
947               }
948            } else if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
949               if (si_can_fast_clear_depth(zstex, level, depth, *buffers) &&
950                   si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {
951                  /* Combined Z+S clear. */
952                  clear_value = si_get_htile_clear_value(zstex, depth);
953                  *buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
954                  zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
955                  zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
956                  zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level);
957                  update_db_depth_clear = true;
958                  update_db_stencil_clear = true;
959               }
960            }
961
962            if (!update_db_depth_clear) {
963               /* Clear to uncompressed, so that it doesn't contain values incompatible
964                * with the new TC-compatible HTILE setting.
965                *
966                * 0xfffff30f = uncompressed Z + S
967                * 0xfffc000f = uncompressed Z only
968                */
969               clear_value = !zstex->htile_stencil_disabled ? 0xfffff30f : 0xfffc000f;
970            }
971
972            zstex->need_flush_after_depth_decompression = sctx->gfx_level == GFX10_3;
973
974            assert(num_clears < ARRAY_SIZE(info));
975            si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b,
976                                 zstex->surface.meta_offset, zstex->surface.meta_size, clear_value);
977            clear_types |= SI_CLEAR_TYPE_HTILE;
978         }
979      } else if (num_clears || !fb_too_small) {
980         /* This is where the HTILE buffer clear is done.
981          *
982          * If there is no clear scheduled and the framebuffer size is too small, we should use
983          * the draw-based clear that is without waits. If there is some other clear scheduled,
984          * we will have to wait anyway, so add the HTILE buffer clear to the batch here.
985          * If the framebuffer size is large enough, use this codepath too.
986          */
987         uint64_t htile_offset = zstex->surface.meta_offset;
988         unsigned htile_size = 0;
989
990         /* Determine the HTILE subset to clear. */
991         if (sctx->gfx_level >= GFX10) {
992            /* This can only clear a layered texture with 1 level or a mipmap texture
993             * with 1 layer. Other cases are unimplemented.
994             */
995            if (zs_num_layers == 1) {
996               /* Clear a specific level. */
997               htile_offset += zstex->surface.u.gfx9.meta_levels[level].offset;
998               htile_size = zstex->surface.u.gfx9.meta_levels[level].size;
999            } else if (zstex->buffer.b.b.last_level == 0) {
1000               /* Clear all layers having only 1 level. */
1001               htile_size = zstex->surface.meta_size;
1002            }
1003         } else {
1004            /* This can only clear a layered texture with 1 level. Other cases are
1005             * unimplemented.
1006             */
1007            if (zstex->buffer.b.b.last_level == 0)
1008               htile_size = zstex->surface.meta_size;
1009         }
1010
1011         /* Perform the clear if it's possible. */
1012         if (zstex->htile_stencil_disabled || !zstex->surface.has_stencil) {
1013            if (htile_size &&
1014                si_can_fast_clear_depth(zstex, level, depth, *buffers)) {
1015               /* Z-only clear. */
1016               assert(num_clears < ARRAY_SIZE(info));
1017               si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
1018                                    htile_size, si_get_htile_clear_value(zstex, depth));
1019               clear_types |= SI_CLEAR_TYPE_HTILE;
1020               *buffers &= ~PIPE_CLEAR_DEPTH;
1021               zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
1022               zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
1023               update_db_depth_clear = true;
1024            }
1025         } else if ((*buffers & PIPE_CLEAR_DEPTHSTENCIL) == PIPE_CLEAR_DEPTHSTENCIL) {
1026            if (htile_size &&
1027                si_can_fast_clear_depth(zstex, level, depth, *buffers) &&
1028                si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {
1029               /* Combined Z+S clear. */
1030               assert(num_clears < ARRAY_SIZE(info));
1031               si_init_buffer_clear(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
1032                                    htile_size, si_get_htile_clear_value(zstex, depth));
1033               clear_types |= SI_CLEAR_TYPE_HTILE;
1034               *buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
1035               zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
1036               zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
1037               zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level);
1038               update_db_depth_clear = true;
1039               update_db_stencil_clear = true;
1040            }
1041         } else {
1042            /* Z-only or S-only clear when both Z/S are present using a read-modify-write
1043             * compute shader.
1044             *
1045             * If we get both clears but only one of them can be fast-cleared, we use
1046             * the draw-based fast clear to do both at the same time.
1047             */
1048            const uint32_t htile_depth_writemask = 0xfffffc0f;
1049            const uint32_t htile_stencil_writemask = 0x000003f0;
1050
1051            if (htile_size &&
1052                !(*buffers & PIPE_CLEAR_STENCIL) &&
1053                si_can_fast_clear_depth(zstex, level, depth, *buffers)) {
1054               /* Z-only clear with stencil left intact. */
1055               assert(num_clears < ARRAY_SIZE(info));
1056               si_init_buffer_clear_rmw(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
1057                                        htile_size, si_get_htile_clear_value(zstex, depth),
1058                                        htile_depth_writemask);
1059               clear_types |= SI_CLEAR_TYPE_HTILE;
1060               *buffers &= ~PIPE_CLEAR_DEPTH;
1061               zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(level);
1062               zstex->depth_cleared_level_mask |= BITFIELD_BIT(level);
1063               update_db_depth_clear = true;
1064            } else if (htile_size &&
1065                       !(*buffers & PIPE_CLEAR_DEPTH) &&
1066                       si_can_fast_clear_stencil(zstex, level, stencil, *buffers)) {
1067               /* Stencil-only clear with depth left intact. */
1068               assert(num_clears < ARRAY_SIZE(info));
1069               si_init_buffer_clear_rmw(&info[num_clears++], &zstex->buffer.b.b, htile_offset,
1070                                        htile_size, si_get_htile_clear_value(zstex, depth),
1071                                        htile_stencil_writemask);
1072               clear_types |= SI_CLEAR_TYPE_HTILE;
1073               *buffers &= ~PIPE_CLEAR_STENCIL;
1074               zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(level);
1075               update_db_stencil_clear = true;
1076            }
1077         }
1078
1079         zstex->need_flush_after_depth_decompression = update_db_depth_clear && sctx->gfx_level == GFX10_3;
1080
1081         /* Update DB_DEPTH_CLEAR. */
1082         if (update_db_depth_clear &&
1083             zstex->depth_clear_value[level] != (float)depth) {
1084            zstex->depth_clear_value[level] = depth;
1085            sctx->framebuffer.dirty_zsbuf = true;
1086            si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1087         }
1088
1089         /* Update DB_STENCIL_CLEAR. */
1090         if (update_db_stencil_clear &&
1091             zstex->stencil_clear_value[level] != stencil) {
1092            zstex->stencil_clear_value[level] = stencil;
1093            sctx->framebuffer.dirty_zsbuf = true;
1094            si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1095         }
1096      }
1097   }
1098
1099   si_execute_clears(sctx, info, num_clears, clear_types);
1100}
1101
1102static void si_clear(struct pipe_context *ctx, unsigned buffers,
1103                     const struct pipe_scissor_state *scissor_state,
1104                     const union pipe_color_union *color, double depth, unsigned stencil)
1105{
1106   struct si_context *sctx = (struct si_context *)ctx;
1107   struct pipe_framebuffer_state *fb = &sctx->framebuffer.state;
1108   struct pipe_surface *zsbuf = fb->zsbuf;
1109   struct si_texture *zstex = zsbuf ? (struct si_texture *)zsbuf->texture : NULL;
1110   bool needs_db_flush = false;
1111
1112   /* Unset clear flags for non-existent buffers. */
1113   for (unsigned i = 0; i < 8; i++) {
1114      if (i >= fb->nr_cbufs || !fb->cbufs[i])
1115         buffers &= ~(PIPE_CLEAR_COLOR0 << i);
1116   }
1117   if (!zsbuf)
1118      buffers &= ~PIPE_CLEAR_DEPTHSTENCIL;
1119   else if (!util_format_has_stencil(util_format_description(zsbuf->format)))
1120      buffers &= ~PIPE_CLEAR_STENCIL;
1121
1122   si_fast_clear(sctx, &buffers, color, depth, stencil);
1123   if (!buffers)
1124      return; /* all buffers have been cleared */
1125
1126   if (buffers & PIPE_CLEAR_COLOR) {
1127      /* These buffers cannot use fast clear, make sure to disable expansion. */
1128      unsigned color_buffer_mask = (buffers & PIPE_CLEAR_COLOR) >> util_logbase2(PIPE_CLEAR_COLOR0);
1129      while (color_buffer_mask) {
1130         unsigned i = u_bit_scan(&color_buffer_mask);
1131         struct si_texture *tex = (struct si_texture *)fb->cbufs[i]->texture;
1132         if (tex->surface.fmask_size == 0)
1133            tex->dirty_level_mask &= ~(1 << fb->cbufs[i]->u.tex.level);
1134      }
1135   }
1136
1137   if (zstex && zsbuf->u.tex.first_layer == 0 &&
1138       zsbuf->u.tex.last_layer == util_max_layer(&zstex->buffer.b.b, 0)) {
1139      unsigned level = zsbuf->u.tex.level;
1140
1141      if (si_can_fast_clear_depth(zstex, level, depth, buffers)) {
1142         /* Need to disable EXPCLEAR temporarily if clearing
1143          * to a new value. */
1144         if (!(zstex->depth_cleared_level_mask_once & BITFIELD_BIT(level)) ||
1145             zstex->depth_clear_value[level] != depth) {
1146            sctx->db_depth_disable_expclear = true;
1147         }
1148
1149         if (zstex->depth_clear_value[level] != (float)depth) {
1150            if ((zstex->depth_clear_value[level] != 0) != (depth != 0)) {
1151               /* ZRANGE_PRECISION register of a bound surface will change so we
1152                * must flush the DB caches. */
1153               needs_db_flush = true;
1154            }
1155            /* Update DB_DEPTH_CLEAR. */
1156            zstex->depth_clear_value[level] = depth;
1157            sctx->framebuffer.dirty_zsbuf = true;
1158            si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1159         }
1160         sctx->db_depth_clear = true;
1161         si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1162      }
1163
1164      if (si_can_fast_clear_stencil(zstex, level, stencil, buffers)) {
1165         stencil &= 0xff;
1166
1167         /* Need to disable EXPCLEAR temporarily if clearing
1168          * to a new value. */
1169         if (!(zstex->stencil_cleared_level_mask_once & BITFIELD_BIT(level)) ||
1170             zstex->stencil_clear_value[level] != stencil) {
1171            sctx->db_stencil_disable_expclear = true;
1172         }
1173
1174         if (zstex->stencil_clear_value[level] != (uint8_t)stencil) {
1175            /* Update DB_STENCIL_CLEAR. */
1176            zstex->stencil_clear_value[level] = stencil;
1177            sctx->framebuffer.dirty_zsbuf = true;
1178            si_mark_atom_dirty(sctx, &sctx->atoms.s.framebuffer);
1179         }
1180         sctx->db_stencil_clear = true;
1181         si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1182      }
1183
1184      if (needs_db_flush)
1185         sctx->flags |= SI_CONTEXT_FLUSH_AND_INV_DB;
1186   }
1187
1188   if (unlikely(sctx->thread_trace_enabled)) {
1189      if (buffers & PIPE_CLEAR_COLOR)
1190         sctx->sqtt_next_event = EventCmdClearColorImage;
1191      else if (buffers & PIPE_CLEAR_DEPTHSTENCIL)
1192         sctx->sqtt_next_event = EventCmdClearDepthStencilImage;
1193   }
1194
1195   si_blitter_begin(sctx, SI_CLEAR);
1196   util_blitter_clear(sctx->blitter, fb->width, fb->height, util_framebuffer_get_num_layers(fb),
1197                      buffers, color, depth, stencil, sctx->framebuffer.nr_samples > 1);
1198   si_blitter_end(sctx);
1199
1200   if (sctx->db_depth_clear) {
1201      sctx->db_depth_clear = false;
1202      sctx->db_depth_disable_expclear = false;
1203      zstex->depth_cleared_level_mask_once |= BITFIELD_BIT(zsbuf->u.tex.level);
1204      zstex->depth_cleared_level_mask |= BITFIELD_BIT(zsbuf->u.tex.level);
1205      si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1206   }
1207
1208   if (sctx->db_stencil_clear) {
1209      sctx->db_stencil_clear = false;
1210      sctx->db_stencil_disable_expclear = false;
1211      zstex->stencil_cleared_level_mask_once |= BITFIELD_BIT(zsbuf->u.tex.level);
1212      si_mark_atom_dirty(sctx, &sctx->atoms.s.db_render_state);
1213   }
1214}
1215
1216static bool si_try_normal_clear(struct si_context *sctx, struct pipe_surface *dst,
1217                                unsigned dstx, unsigned dsty, unsigned width, unsigned height,
1218                                bool render_condition_enabled, unsigned buffers,
1219                                const union pipe_color_union *color,
1220                                float depth, unsigned stencil)
1221{
1222   /* This is worth it only if it's a whole image clear, so that we just clear DCC/HTILE. */
1223   if (dstx == 0 && dsty == 0 &&
1224       width == dst->width &&
1225       height == dst->height &&
1226       dst->u.tex.first_layer == 0 &&
1227       dst->u.tex.last_layer == util_max_layer(dst->texture, dst->u.tex.level) &&
1228       /* pipe->clear honors render_condition, so only use it if it's unset or if it's set and enabled. */
1229       (!sctx->render_cond || render_condition_enabled) &&
1230       sctx->has_graphics) {
1231      struct pipe_context *ctx = &sctx->b;
1232      struct pipe_framebuffer_state saved_fb = {}, fb = {};
1233
1234      util_copy_framebuffer_state(&saved_fb, &sctx->framebuffer.state);
1235
1236      if (buffers & PIPE_CLEAR_COLOR) {
1237         fb.cbufs[0] = dst;
1238         fb.nr_cbufs = 1;
1239      } else {
1240         fb.zsbuf = dst;
1241      }
1242
1243      fb.width = dst->width;
1244      fb.height = dst->height;
1245
1246      ctx->set_framebuffer_state(ctx, &fb);
1247      ctx->clear(ctx, buffers, NULL, color, depth, stencil);
1248      ctx->set_framebuffer_state(ctx, &saved_fb);
1249
1250      util_copy_framebuffer_state(&saved_fb, NULL);
1251
1252      return true;
1253   }
1254
1255   return false;
1256}
1257
1258static void si_clear_render_target(struct pipe_context *ctx, struct pipe_surface *dst,
1259                                   const union pipe_color_union *color, unsigned dstx,
1260                                   unsigned dsty, unsigned width, unsigned height,
1261                                   bool render_condition_enabled)
1262{
1263   struct si_context *sctx = (struct si_context *)ctx;
1264   struct si_texture *sdst = (struct si_texture *)dst->texture;
1265
1266   /* Fast path that just clears DCC. */
1267   if (si_try_normal_clear(sctx, dst, dstx, dsty, width, height, render_condition_enabled,
1268                           PIPE_CLEAR_COLOR0, color, 0, 0))
1269      return;
1270
1271   if (dst->texture->nr_samples <= 1 &&
1272       (sctx->gfx_level >= GFX10 || !vi_dcc_enabled(sdst, dst->u.tex.level))) {
1273      si_compute_clear_render_target(ctx, dst, color, dstx, dsty, width, height,
1274                                     render_condition_enabled);
1275      return;
1276   }
1277
1278   si_blitter_begin(sctx,
1279                    SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
1280   util_blitter_clear_render_target(sctx->blitter, dst, color, dstx, dsty, width, height);
1281   si_blitter_end(sctx);
1282}
1283
1284static void si_clear_depth_stencil(struct pipe_context *ctx, struct pipe_surface *dst,
1285                                   unsigned clear_flags, double depth, unsigned stencil,
1286                                   unsigned dstx, unsigned dsty, unsigned width, unsigned height,
1287                                   bool render_condition_enabled)
1288{
1289   struct si_context *sctx = (struct si_context *)ctx;
1290   union pipe_color_union unused = {};
1291
1292   /* Fast path that just clears HTILE. */
1293   if (si_try_normal_clear(sctx, dst, dstx, dsty, width, height, render_condition_enabled,
1294                           clear_flags, &unused, depth, stencil))
1295      return;
1296
1297   si_blitter_begin(sctx,
1298                    SI_CLEAR_SURFACE | (render_condition_enabled ? 0 : SI_DISABLE_RENDER_COND));
1299   util_blitter_clear_depth_stencil(sctx->blitter, dst, clear_flags, depth, stencil, dstx, dsty,
1300                                    width, height);
1301   si_blitter_end(sctx);
1302}
1303
1304static void si_clear_texture(struct pipe_context *pipe, struct pipe_resource *tex, unsigned level,
1305                             const struct pipe_box *box, const void *data)
1306{
1307   struct pipe_screen *screen = pipe->screen;
1308   struct si_texture *stex = (struct si_texture *)tex;
1309   struct pipe_surface tmpl = {{0}};
1310   struct pipe_surface *sf;
1311
1312   tmpl.format = tex->format;
1313   tmpl.u.tex.first_layer = box->z;
1314   tmpl.u.tex.last_layer = box->z + box->depth - 1;
1315   tmpl.u.tex.level = level;
1316   sf = pipe->create_surface(pipe, tex, &tmpl);
1317   if (!sf)
1318      return;
1319
1320   if (stex->is_depth) {
1321      unsigned clear;
1322      float depth;
1323      uint8_t stencil = 0;
1324
1325      /* Depth is always present. */
1326      clear = PIPE_CLEAR_DEPTH;
1327      util_format_unpack_z_float(tex->format, &depth, data, 1);
1328
1329      if (stex->surface.has_stencil) {
1330         clear |= PIPE_CLEAR_STENCIL;
1331         util_format_unpack_s_8uint(tex->format, &stencil, data, 1);
1332      }
1333
1334      si_clear_depth_stencil(pipe, sf, clear, depth, stencil, box->x, box->y, box->width,
1335                             box->height, false);
1336   } else {
1337      union pipe_color_union color;
1338
1339      util_format_unpack_rgba(tex->format, color.ui, data, 1);
1340
1341      if (screen->is_format_supported(screen, tex->format, tex->target, 0, 0,
1342                                      PIPE_BIND_RENDER_TARGET)) {
1343         si_clear_render_target(pipe, sf, &color, box->x, box->y, box->width, box->height, false);
1344      } else {
1345         /* Software fallback - just for R9G9B9E5_FLOAT */
1346         util_clear_render_target(pipe, sf, &color, box->x, box->y, box->width, box->height);
1347      }
1348   }
1349   pipe_surface_reference(&sf, NULL);
1350}
1351
1352void si_init_clear_functions(struct si_context *sctx)
1353{
1354   sctx->b.clear_render_target = si_clear_render_target;
1355   sctx->b.clear_texture = si_clear_texture;
1356
1357   if (sctx->has_graphics) {
1358      sctx->b.clear = si_clear;
1359      sctx->b.clear_depth_stencil = si_clear_depth_stencil;
1360   }
1361}
1362