1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2019-2020 Valve Corporation
3bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * Authors:
6bf215546Sopenharmony_ci *    Jonathan Marek <jonathan@marek.ca>
7bf215546Sopenharmony_ci */
8bf215546Sopenharmony_ci
9bf215546Sopenharmony_ci#include "tu_clear_blit.h"
10bf215546Sopenharmony_ci
11bf215546Sopenharmony_ci#include "ir3/ir3_nir.h"
12bf215546Sopenharmony_ci
13bf215546Sopenharmony_ci#include "util/format_r11g11b10f.h"
14bf215546Sopenharmony_ci#include "util/format_rgb9e5.h"
15bf215546Sopenharmony_ci#include "util/format_srgb.h"
16bf215546Sopenharmony_ci#include "util/half_float.h"
17bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h"
18bf215546Sopenharmony_ci
19bf215546Sopenharmony_ci#include "tu_cmd_buffer.h"
20bf215546Sopenharmony_ci#include "tu_cs.h"
21bf215546Sopenharmony_ci#include "tu_formats.h"
22bf215546Sopenharmony_ci#include "tu_image.h"
23bf215546Sopenharmony_ci#include "tu_tracepoints.h"
24bf215546Sopenharmony_ci
25bf215546Sopenharmony_cistatic uint32_t
26bf215546Sopenharmony_citu_pack_float32_for_unorm(float val, int bits)
27bf215546Sopenharmony_ci{
28bf215546Sopenharmony_ci   return _mesa_lroundevenf(CLAMP(val, 0.0f, 1.0f) * (float) ((1 << bits) - 1));
29bf215546Sopenharmony_ci}
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_ci/* r2d_ = BLIT_OP_SCALE operations */
32bf215546Sopenharmony_ci
33bf215546Sopenharmony_cistatic enum a6xx_2d_ifmt
34bf215546Sopenharmony_ciformat_to_ifmt(enum pipe_format format)
35bf215546Sopenharmony_ci{
36bf215546Sopenharmony_ci   if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
37bf215546Sopenharmony_ci       format == PIPE_FORMAT_Z24X8_UNORM)
38bf215546Sopenharmony_ci      return R2D_UNORM8;
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci   /* get_component_bits doesn't work with depth/stencil formats: */
41bf215546Sopenharmony_ci   if (format == PIPE_FORMAT_Z16_UNORM || format == PIPE_FORMAT_Z32_FLOAT)
42bf215546Sopenharmony_ci      return R2D_FLOAT32;
43bf215546Sopenharmony_ci   if (format == PIPE_FORMAT_S8_UINT)
44bf215546Sopenharmony_ci      return R2D_INT8;
45bf215546Sopenharmony_ci   if (format == PIPE_FORMAT_A8_UNORM)
46bf215546Sopenharmony_ci      return R2D_UNORM8;
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci   /* use the size of the red channel to find the corresponding "ifmt" */
49bf215546Sopenharmony_ci   bool is_int = util_format_is_pure_integer(format);
50bf215546Sopenharmony_ci   switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
51bf215546Sopenharmony_ci   case 4: case 5: case 8:
52bf215546Sopenharmony_ci      return is_int ? R2D_INT8 : R2D_UNORM8;
53bf215546Sopenharmony_ci   case 10: case 11:
54bf215546Sopenharmony_ci      return is_int ? R2D_INT16 : R2D_FLOAT16;
55bf215546Sopenharmony_ci   case 16:
56bf215546Sopenharmony_ci      if (util_format_is_float(format))
57bf215546Sopenharmony_ci         return R2D_FLOAT16;
58bf215546Sopenharmony_ci      return is_int ? R2D_INT16 : R2D_FLOAT32;
59bf215546Sopenharmony_ci   case 32:
60bf215546Sopenharmony_ci      return is_int ? R2D_INT32 : R2D_FLOAT32;
61bf215546Sopenharmony_ci    default:
62bf215546Sopenharmony_ci      unreachable("bad format");
63bf215546Sopenharmony_ci      return 0;
64bf215546Sopenharmony_ci   }
65bf215546Sopenharmony_ci}
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_cistatic void
68bf215546Sopenharmony_cir2d_coords(struct tu_cs *cs,
69bf215546Sopenharmony_ci           const VkOffset2D *dst,
70bf215546Sopenharmony_ci           const VkOffset2D *src,
71bf215546Sopenharmony_ci           const VkExtent2D *extent)
72bf215546Sopenharmony_ci{
73bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
74bf215546Sopenharmony_ci      A6XX_GRAS_2D_DST_TL(.x = dst->x,                     .y = dst->y),
75bf215546Sopenharmony_ci      A6XX_GRAS_2D_DST_BR(.x = dst->x + extent->width - 1, .y = dst->y + extent->height - 1));
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   if (!src)
78bf215546Sopenharmony_ci      return;
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
81bf215546Sopenharmony_ci                   A6XX_GRAS_2D_SRC_TL_X(src->x),
82bf215546Sopenharmony_ci                   A6XX_GRAS_2D_SRC_BR_X(src->x + extent->width - 1),
83bf215546Sopenharmony_ci                   A6XX_GRAS_2D_SRC_TL_Y(src->y),
84bf215546Sopenharmony_ci                   A6XX_GRAS_2D_SRC_BR_Y(src->y + extent->height - 1));
85bf215546Sopenharmony_ci}
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_cistatic void
88bf215546Sopenharmony_cir2d_clear_value(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val)
89bf215546Sopenharmony_ci{
90bf215546Sopenharmony_ci   uint32_t clear_value[4] = {};
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci   switch (format) {
93bf215546Sopenharmony_ci   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
94bf215546Sopenharmony_ci   case PIPE_FORMAT_Z24X8_UNORM:
95bf215546Sopenharmony_ci      /* cleared as r8g8b8a8_unorm using special format */
96bf215546Sopenharmony_ci      clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24);
97bf215546Sopenharmony_ci      clear_value[1] = clear_value[0] >> 8;
98bf215546Sopenharmony_ci      clear_value[2] = clear_value[0] >> 16;
99bf215546Sopenharmony_ci      clear_value[3] = val->depthStencil.stencil;
100bf215546Sopenharmony_ci      break;
101bf215546Sopenharmony_ci   case PIPE_FORMAT_Z16_UNORM:
102bf215546Sopenharmony_ci   case PIPE_FORMAT_Z32_FLOAT:
103bf215546Sopenharmony_ci      /* R2D_FLOAT32 */
104bf215546Sopenharmony_ci      clear_value[0] = fui(val->depthStencil.depth);
105bf215546Sopenharmony_ci      break;
106bf215546Sopenharmony_ci   case PIPE_FORMAT_S8_UINT:
107bf215546Sopenharmony_ci      clear_value[0] = val->depthStencil.stencil;
108bf215546Sopenharmony_ci      break;
109bf215546Sopenharmony_ci   case PIPE_FORMAT_R9G9B9E5_FLOAT:
110bf215546Sopenharmony_ci      /* cleared as UINT32 */
111bf215546Sopenharmony_ci      clear_value[0] = float3_to_rgb9e5(val->color.float32);
112bf215546Sopenharmony_ci      break;
113bf215546Sopenharmony_ci   default:
114bf215546Sopenharmony_ci      assert(!util_format_is_depth_or_stencil(format));
115bf215546Sopenharmony_ci      const struct util_format_description *desc = util_format_description(format);
116bf215546Sopenharmony_ci      enum a6xx_2d_ifmt ifmt = format_to_ifmt(format);
117bf215546Sopenharmony_ci
118bf215546Sopenharmony_ci      assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
119bf215546Sopenharmony_ci             format == PIPE_FORMAT_R11G11B10_FLOAT);
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_ci      for (unsigned i = 0; i < desc->nr_channels; i++) {
122bf215546Sopenharmony_ci         const struct util_format_channel_description *ch = &desc->channel[i];
123bf215546Sopenharmony_ci         if (ifmt == R2D_UNORM8) {
124bf215546Sopenharmony_ci            float linear = val->color.float32[i];
125bf215546Sopenharmony_ci            if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && i < 3)
126bf215546Sopenharmony_ci               linear = util_format_linear_to_srgb_float(val->color.float32[i]);
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci            if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
129bf215546Sopenharmony_ci               clear_value[i] = _mesa_lroundevenf(CLAMP(linear, -1.0f, 1.0f) * 127.0f);
130bf215546Sopenharmony_ci            else
131bf215546Sopenharmony_ci               clear_value[i] = tu_pack_float32_for_unorm(linear, 8);
132bf215546Sopenharmony_ci         } else if (ifmt == R2D_FLOAT16) {
133bf215546Sopenharmony_ci            clear_value[i] = _mesa_float_to_half(val->color.float32[i]);
134bf215546Sopenharmony_ci         } else {
135bf215546Sopenharmony_ci            assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 ||
136bf215546Sopenharmony_ci                   ifmt == R2D_INT16 || ifmt == R2D_INT8);
137bf215546Sopenharmony_ci            clear_value[i] = val->color.uint32[i];
138bf215546Sopenharmony_ci         }
139bf215546Sopenharmony_ci      }
140bf215546Sopenharmony_ci      break;
141bf215546Sopenharmony_ci   }
142bf215546Sopenharmony_ci
143bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
144bf215546Sopenharmony_ci   tu_cs_emit_array(cs, clear_value, 4);
145bf215546Sopenharmony_ci}
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_cistatic void
148bf215546Sopenharmony_cifixup_src_format(enum pipe_format *src_format, enum pipe_format dst_format,
149bf215546Sopenharmony_ci                 enum a6xx_format *fmt)
150bf215546Sopenharmony_ci{
151bf215546Sopenharmony_ci   /* When blitting S8 -> D24S8 or vice versa, we have to override S8, which
152bf215546Sopenharmony_ci    * is normally R8_UINT for sampling/blitting purposes, to a unorm format.
153bf215546Sopenharmony_ci    * We also have to move stencil, which is normally in the .w channel, into
154bf215546Sopenharmony_ci    * the right channel. Reintepreting the S8 texture as A8_UNORM solves both
155bf215546Sopenharmony_ci    * problems, and avoids using a swap, which seems to sometimes not work
156bf215546Sopenharmony_ci    * with a D24S8 source, or a texture swizzle which is only supported with
157bf215546Sopenharmony_ci    * the 3d path. Sometimes this blit happens on already-constructed
158bf215546Sopenharmony_ci    * fdl6_view's, e.g. for sysmem resolves, so this has to happen as a fixup.
159bf215546Sopenharmony_ci    */
160bf215546Sopenharmony_ci   if (*src_format == PIPE_FORMAT_S8_UINT &&
161bf215546Sopenharmony_ci       (dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
162bf215546Sopenharmony_ci        dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)) {
163bf215546Sopenharmony_ci      *fmt = FMT6_A8_UNORM;
164bf215546Sopenharmony_ci      *src_format = PIPE_FORMAT_A8_UNORM;
165bf215546Sopenharmony_ci   }
166bf215546Sopenharmony_ci}
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_cistatic void
169bf215546Sopenharmony_cifixup_dst_format(enum pipe_format src_format, enum pipe_format *dst_format,
170bf215546Sopenharmony_ci                 enum a6xx_format *fmt)
171bf215546Sopenharmony_ci{
172bf215546Sopenharmony_ci   if (*dst_format == PIPE_FORMAT_S8_UINT &&
173bf215546Sopenharmony_ci       (src_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
174bf215546Sopenharmony_ci        src_format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)) {
175bf215546Sopenharmony_ci      *dst_format = PIPE_FORMAT_A8_UNORM;
176bf215546Sopenharmony_ci      *fmt = FMT6_A8_UNORM;
177bf215546Sopenharmony_ci   }
178bf215546Sopenharmony_ci}
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_cistatic void
181bf215546Sopenharmony_cir2d_src(struct tu_cmd_buffer *cmd,
182bf215546Sopenharmony_ci        struct tu_cs *cs,
183bf215546Sopenharmony_ci        const struct fdl6_view *iview,
184bf215546Sopenharmony_ci        uint32_t layer,
185bf215546Sopenharmony_ci        VkFilter filter,
186bf215546Sopenharmony_ci        enum pipe_format dst_format)
187bf215546Sopenharmony_ci{
188bf215546Sopenharmony_ci   uint32_t src_info = iview->SP_PS_2D_SRC_INFO;
189bf215546Sopenharmony_ci   if (filter != VK_FILTER_NEAREST)
190bf215546Sopenharmony_ci      src_info |= A6XX_SP_PS_2D_SRC_INFO_FILTER;
191bf215546Sopenharmony_ci
192bf215546Sopenharmony_ci   enum a6xx_format fmt = (src_info & A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK);
193bf215546Sopenharmony_ci   enum pipe_format src_format = iview->format;
194bf215546Sopenharmony_ci   fixup_src_format(&src_format, dst_format, &fmt);
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   src_info =
197bf215546Sopenharmony_ci      (src_info & ~A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK) |
198bf215546Sopenharmony_ci      A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(fmt);
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5);
201bf215546Sopenharmony_ci   tu_cs_emit(cs, src_info);
202bf215546Sopenharmony_ci   tu_cs_emit(cs, iview->SP_PS_2D_SRC_SIZE);
203bf215546Sopenharmony_ci   tu_cs_image_ref_2d(cs, iview, layer, true);
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS, 3);
206bf215546Sopenharmony_ci   tu_cs_image_flag_ref(cs, iview, layer);
207bf215546Sopenharmony_ci}
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_cistatic void
210bf215546Sopenharmony_cir2d_src_depth(struct tu_cmd_buffer *cmd,
211bf215546Sopenharmony_ci                struct tu_cs *cs,
212bf215546Sopenharmony_ci                const struct tu_image_view *iview,
213bf215546Sopenharmony_ci                uint32_t layer,
214bf215546Sopenharmony_ci                VkFilter filter)
215bf215546Sopenharmony_ci{
216bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5);
217bf215546Sopenharmony_ci   tu_cs_emit(cs, tu_image_view_depth(iview, SP_PS_2D_SRC_INFO));
218bf215546Sopenharmony_ci   tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE);
219bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer);
220bf215546Sopenharmony_ci   /* SP_PS_2D_SRC_PITCH has shifted pitch field */
221bf215546Sopenharmony_ci   tu_cs_emit(cs, iview->depth_PITCH << 9);
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS, 3);
224bf215546Sopenharmony_ci   tu_cs_image_flag_ref(cs, &iview->view, layer);
225bf215546Sopenharmony_ci}
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_cistatic void
228bf215546Sopenharmony_cir2d_src_stencil(struct tu_cmd_buffer *cmd,
229bf215546Sopenharmony_ci                struct tu_cs *cs,
230bf215546Sopenharmony_ci                const struct tu_image_view *iview,
231bf215546Sopenharmony_ci                uint32_t layer,
232bf215546Sopenharmony_ci                VkFilter filter)
233bf215546Sopenharmony_ci{
234bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5);
235bf215546Sopenharmony_ci   tu_cs_emit(cs, tu_image_view_stencil(iview, SP_PS_2D_SRC_INFO) & ~A6XX_SP_PS_2D_SRC_INFO_FLAGS);
236bf215546Sopenharmony_ci   tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE);
237bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
238bf215546Sopenharmony_ci   /* SP_PS_2D_SRC_PITCH has shifted pitch field */
239bf215546Sopenharmony_ci   tu_cs_emit(cs, iview->stencil_PITCH << 9);
240bf215546Sopenharmony_ci}
241bf215546Sopenharmony_ci
242bf215546Sopenharmony_cistatic void
243bf215546Sopenharmony_cir2d_src_buffer(struct tu_cmd_buffer *cmd,
244bf215546Sopenharmony_ci               struct tu_cs *cs,
245bf215546Sopenharmony_ci               enum pipe_format format,
246bf215546Sopenharmony_ci               uint64_t va, uint32_t pitch,
247bf215546Sopenharmony_ci               uint32_t width, uint32_t height,
248bf215546Sopenharmony_ci               enum pipe_format dst_format)
249bf215546Sopenharmony_ci{
250bf215546Sopenharmony_ci   struct tu_native_format fmt = tu6_format_texture(format, TILE6_LINEAR);
251bf215546Sopenharmony_ci   enum a6xx_format color_format = fmt.fmt;
252bf215546Sopenharmony_ci   fixup_src_format(&format, dst_format, &color_format);
253bf215546Sopenharmony_ci
254bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
255bf215546Sopenharmony_ci                   A6XX_SP_PS_2D_SRC_INFO(
256bf215546Sopenharmony_ci                      .color_format = color_format,
257bf215546Sopenharmony_ci                      .color_swap = fmt.swap,
258bf215546Sopenharmony_ci                      .srgb = util_format_is_srgb(format),
259bf215546Sopenharmony_ci                      .unk20 = 1,
260bf215546Sopenharmony_ci                      .unk22 = 1),
261bf215546Sopenharmony_ci                   A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height),
262bf215546Sopenharmony_ci                   A6XX_SP_PS_2D_SRC(.qword = va),
263bf215546Sopenharmony_ci                   A6XX_SP_PS_2D_SRC_PITCH(.pitch = pitch));
264bf215546Sopenharmony_ci}
265bf215546Sopenharmony_ci
266bf215546Sopenharmony_cistatic void
267bf215546Sopenharmony_cir2d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer,
268bf215546Sopenharmony_ci        enum pipe_format src_format)
269bf215546Sopenharmony_ci{
270bf215546Sopenharmony_ci   uint32_t dst_info = iview->RB_2D_DST_INFO;
271bf215546Sopenharmony_ci   enum a6xx_format fmt = dst_info & A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK;
272bf215546Sopenharmony_ci   enum pipe_format dst_format = iview->format;
273bf215546Sopenharmony_ci   fixup_dst_format(src_format, &dst_format, &fmt);
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_ci   dst_info =
276bf215546Sopenharmony_ci         (dst_info & ~A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK) | fmt;
277bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
278bf215546Sopenharmony_ci   tu_cs_emit(cs, dst_info);
279bf215546Sopenharmony_ci   tu_cs_image_ref_2d(cs, iview, layer, false);
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS, 3);
282bf215546Sopenharmony_ci   tu_cs_image_flag_ref(cs, iview, layer);
283bf215546Sopenharmony_ci}
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_cistatic void
286bf215546Sopenharmony_cir2d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
287bf215546Sopenharmony_ci{
288bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
289bf215546Sopenharmony_ci   tu_cs_emit(cs, tu_image_view_depth(iview, RB_2D_DST_INFO));
290bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer);
291bf215546Sopenharmony_ci   tu_cs_emit(cs, iview->depth_PITCH);
292bf215546Sopenharmony_ci
293bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS, 3);
294bf215546Sopenharmony_ci   tu_cs_image_flag_ref(cs, &iview->view, layer);
295bf215546Sopenharmony_ci}
296bf215546Sopenharmony_ci
297bf215546Sopenharmony_cistatic void
298bf215546Sopenharmony_cir2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
299bf215546Sopenharmony_ci{
300bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
301bf215546Sopenharmony_ci   tu_cs_emit(cs, tu_image_view_stencil(iview, RB_2D_DST_INFO) & ~A6XX_RB_2D_DST_INFO_FLAGS);
302bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
303bf215546Sopenharmony_ci   tu_cs_emit(cs, iview->stencil_PITCH);
304bf215546Sopenharmony_ci}
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_cistatic void
307bf215546Sopenharmony_cir2d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch,
308bf215546Sopenharmony_ci               enum pipe_format src_format)
309bf215546Sopenharmony_ci{
310bf215546Sopenharmony_ci   struct tu_native_format fmt = tu6_format_color(format, TILE6_LINEAR);
311bf215546Sopenharmony_ci   enum a6xx_format color_fmt = fmt.fmt;
312bf215546Sopenharmony_ci   fixup_dst_format(src_format, &format, &color_fmt);
313bf215546Sopenharmony_ci   fmt.fmt = color_fmt;
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
316bf215546Sopenharmony_ci                   A6XX_RB_2D_DST_INFO(
317bf215546Sopenharmony_ci                      .color_format = fmt.fmt,
318bf215546Sopenharmony_ci                      .color_swap = fmt.swap,
319bf215546Sopenharmony_ci                      .srgb = util_format_is_srgb(format)),
320bf215546Sopenharmony_ci                   A6XX_RB_2D_DST(.qword = va),
321bf215546Sopenharmony_ci                   A6XX_RB_2D_DST_PITCH(pitch));
322bf215546Sopenharmony_ci}
323bf215546Sopenharmony_ci
324bf215546Sopenharmony_cistatic void
325bf215546Sopenharmony_cir2d_setup_common(struct tu_cmd_buffer *cmd,
326bf215546Sopenharmony_ci                 struct tu_cs *cs,
327bf215546Sopenharmony_ci                 enum pipe_format src_format,
328bf215546Sopenharmony_ci                 enum pipe_format dst_format,
329bf215546Sopenharmony_ci                 VkImageAspectFlags aspect_mask,
330bf215546Sopenharmony_ci                 unsigned blit_param,
331bf215546Sopenharmony_ci                 bool clear,
332bf215546Sopenharmony_ci                 bool ubwc,
333bf215546Sopenharmony_ci                 bool scissor)
334bf215546Sopenharmony_ci{
335bf215546Sopenharmony_ci   enum a6xx_format fmt = tu6_base_format(dst_format);
336bf215546Sopenharmony_ci   fixup_dst_format(src_format, &dst_format, &fmt);
337bf215546Sopenharmony_ci   enum a6xx_2d_ifmt ifmt = format_to_ifmt(dst_format);
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci   uint32_t unknown_8c01 = 0;
340bf215546Sopenharmony_ci
341bf215546Sopenharmony_ci   if ((dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
342bf215546Sopenharmony_ci       dst_format == PIPE_FORMAT_Z24X8_UNORM) && ubwc) {
343bf215546Sopenharmony_ci      fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
344bf215546Sopenharmony_ci   }
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci   /* note: the only format with partial clearing is D24S8 */
347bf215546Sopenharmony_ci   if (dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
348bf215546Sopenharmony_ci      /* preserve stencil channel */
349bf215546Sopenharmony_ci      if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
350bf215546Sopenharmony_ci         unknown_8c01 = 0x08000041;
351bf215546Sopenharmony_ci      /* preserve depth channels */
352bf215546Sopenharmony_ci      if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
353bf215546Sopenharmony_ci         unknown_8c01 = 0x00084001;
354bf215546Sopenharmony_ci   }
355bf215546Sopenharmony_ci
356bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
357bf215546Sopenharmony_ci   tu_cs_emit(cs, unknown_8c01);
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci   uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL(
360bf215546Sopenharmony_ci         .scissor = scissor,
361bf215546Sopenharmony_ci         .rotate = blit_param,
362bf215546Sopenharmony_ci         .solid_color = clear,
363bf215546Sopenharmony_ci         .d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear,
364bf215546Sopenharmony_ci         .color_format = fmt,
365bf215546Sopenharmony_ci         .mask = 0xf,
366bf215546Sopenharmony_ci         .ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt,
367bf215546Sopenharmony_ci      ).value;
368bf215546Sopenharmony_ci
369bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
370bf215546Sopenharmony_ci   tu_cs_emit(cs, blit_cntl);
371bf215546Sopenharmony_ci
372bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
373bf215546Sopenharmony_ci   tu_cs_emit(cs, blit_cntl);
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_ci   if (fmt == FMT6_10_10_10_2_UNORM_DEST)
376bf215546Sopenharmony_ci      fmt = FMT6_16_16_16_16_FLOAT;
377bf215546Sopenharmony_ci
378bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_SP_2D_DST_FORMAT(
379bf215546Sopenharmony_ci         .sint = util_format_is_pure_sint(dst_format),
380bf215546Sopenharmony_ci         .uint = util_format_is_pure_uint(dst_format),
381bf215546Sopenharmony_ci         .color_format = fmt,
382bf215546Sopenharmony_ci         .srgb = util_format_is_srgb(dst_format),
383bf215546Sopenharmony_ci         .mask = 0xf));
384bf215546Sopenharmony_ci}
385bf215546Sopenharmony_ci
386bf215546Sopenharmony_cistatic void
387bf215546Sopenharmony_cir2d_setup(struct tu_cmd_buffer *cmd,
388bf215546Sopenharmony_ci          struct tu_cs *cs,
389bf215546Sopenharmony_ci          enum pipe_format src_format,
390bf215546Sopenharmony_ci          enum pipe_format dst_format,
391bf215546Sopenharmony_ci          VkImageAspectFlags aspect_mask,
392bf215546Sopenharmony_ci          unsigned blit_param,
393bf215546Sopenharmony_ci          bool clear,
394bf215546Sopenharmony_ci          bool ubwc,
395bf215546Sopenharmony_ci          VkSampleCountFlagBits samples)
396bf215546Sopenharmony_ci{
397bf215546Sopenharmony_ci   assert(samples == VK_SAMPLE_COUNT_1_BIT);
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_ci   if (!cmd->state.pass) {
400bf215546Sopenharmony_ci      tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
401bf215546Sopenharmony_ci   }
402bf215546Sopenharmony_ci
403bf215546Sopenharmony_ci   r2d_setup_common(cmd, cs, src_format, dst_format, aspect_mask, blit_param, clear, ubwc, false);
404bf215546Sopenharmony_ci}
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_cistatic void
407bf215546Sopenharmony_cir2d_teardown(struct tu_cmd_buffer *cmd,
408bf215546Sopenharmony_ci             struct tu_cs *cs)
409bf215546Sopenharmony_ci{
410bf215546Sopenharmony_ci   /* nothing to do here */
411bf215546Sopenharmony_ci}
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_cistatic void
414bf215546Sopenharmony_cir2d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
415bf215546Sopenharmony_ci{
416bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_BLIT, 1);
417bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
418bf215546Sopenharmony_ci}
419bf215546Sopenharmony_ci
420bf215546Sopenharmony_ci/* r3d_ = shader path operations */
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_cistatic nir_ssa_def *
423bf215546Sopenharmony_ciload_const(nir_builder *b, unsigned base, unsigned components)
424bf215546Sopenharmony_ci{
425bf215546Sopenharmony_ci   return nir_load_uniform(b, components, 32, nir_imm_int(b, 0),
426bf215546Sopenharmony_ci                           .base = base);
427bf215546Sopenharmony_ci}
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_cistatic nir_shader *
430bf215546Sopenharmony_cibuild_blit_vs_shader(void)
431bf215546Sopenharmony_ci{
432bf215546Sopenharmony_ci   nir_builder _b =
433bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "blit vs");
434bf215546Sopenharmony_ci   nir_builder *b = &_b;
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci   nir_variable *out_pos =
437bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(),
438bf215546Sopenharmony_ci                          "gl_Position");
439bf215546Sopenharmony_ci   out_pos->data.location = VARYING_SLOT_POS;
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci   nir_ssa_def *vert0_pos = load_const(b, 0, 2);
442bf215546Sopenharmony_ci   nir_ssa_def *vert1_pos = load_const(b, 4, 2);
443bf215546Sopenharmony_ci   nir_ssa_def *vertex = nir_load_vertex_id(b);
444bf215546Sopenharmony_ci
445bf215546Sopenharmony_ci   nir_ssa_def *pos = nir_bcsel(b, nir_i2b1(b, vertex), vert1_pos, vert0_pos);
446bf215546Sopenharmony_ci   pos = nir_vec4(b, nir_channel(b, pos, 0),
447bf215546Sopenharmony_ci                     nir_channel(b, pos, 1),
448bf215546Sopenharmony_ci                     nir_imm_float(b, 0.0),
449bf215546Sopenharmony_ci                     nir_imm_float(b, 1.0));
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci   nir_store_var(b, out_pos, pos, 0xf);
452bf215546Sopenharmony_ci
453bf215546Sopenharmony_ci   nir_variable *out_coords =
454bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_out, glsl_vec_type(3),
455bf215546Sopenharmony_ci                          "coords");
456bf215546Sopenharmony_ci   out_coords->data.location = VARYING_SLOT_VAR0;
457bf215546Sopenharmony_ci
458bf215546Sopenharmony_ci   nir_ssa_def *vert0_coords = load_const(b, 2, 2);
459bf215546Sopenharmony_ci   nir_ssa_def *vert1_coords = load_const(b, 6, 2);
460bf215546Sopenharmony_ci
461bf215546Sopenharmony_ci   /* Only used with "z scale" blit path which uses a 3d texture */
462bf215546Sopenharmony_ci   nir_ssa_def *z_coord = load_const(b, 8, 1);
463bf215546Sopenharmony_ci
464bf215546Sopenharmony_ci   nir_ssa_def *coords = nir_bcsel(b, nir_i2b1(b, vertex), vert1_coords, vert0_coords);
465bf215546Sopenharmony_ci   coords = nir_vec3(b, nir_channel(b, coords, 0), nir_channel(b, coords, 1),
466bf215546Sopenharmony_ci                     z_coord);
467bf215546Sopenharmony_ci
468bf215546Sopenharmony_ci   nir_store_var(b, out_coords, coords, 0x7);
469bf215546Sopenharmony_ci
470bf215546Sopenharmony_ci   return b->shader;
471bf215546Sopenharmony_ci}
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_cistatic nir_shader *
474bf215546Sopenharmony_cibuild_clear_vs_shader(void)
475bf215546Sopenharmony_ci{
476bf215546Sopenharmony_ci   nir_builder _b =
477bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "blit vs");
478bf215546Sopenharmony_ci   nir_builder *b = &_b;
479bf215546Sopenharmony_ci
480bf215546Sopenharmony_ci   nir_variable *out_pos =
481bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(),
482bf215546Sopenharmony_ci                          "gl_Position");
483bf215546Sopenharmony_ci   out_pos->data.location = VARYING_SLOT_POS;
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci   nir_ssa_def *vert0_pos = load_const(b, 0, 2);
486bf215546Sopenharmony_ci   nir_ssa_def *vert1_pos = load_const(b, 4, 2);
487bf215546Sopenharmony_ci   /* c0.z is used to clear depth */
488bf215546Sopenharmony_ci   nir_ssa_def *depth = load_const(b, 2, 1);
489bf215546Sopenharmony_ci   nir_ssa_def *vertex = nir_load_vertex_id(b);
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ci   nir_ssa_def *pos = nir_bcsel(b, nir_i2b1(b, vertex), vert1_pos, vert0_pos);
492bf215546Sopenharmony_ci   pos = nir_vec4(b, nir_channel(b, pos, 0),
493bf215546Sopenharmony_ci                     nir_channel(b, pos, 1),
494bf215546Sopenharmony_ci                     depth, nir_imm_float(b, 1.0));
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci   nir_store_var(b, out_pos, pos, 0xf);
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci   nir_variable *out_layer =
499bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_out, glsl_uint_type(),
500bf215546Sopenharmony_ci                          "gl_Layer");
501bf215546Sopenharmony_ci   out_layer->data.location = VARYING_SLOT_LAYER;
502bf215546Sopenharmony_ci   nir_ssa_def *layer = load_const(b, 3, 1);
503bf215546Sopenharmony_ci   nir_store_var(b, out_layer, layer, 1);
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci   return b->shader;
506bf215546Sopenharmony_ci}
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_cistatic nir_shader *
509bf215546Sopenharmony_cibuild_blit_fs_shader(bool zscale)
510bf215546Sopenharmony_ci{
511bf215546Sopenharmony_ci   nir_builder _b =
512bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
513bf215546Sopenharmony_ci                                     zscale ? "zscale blit fs" : "blit fs");
514bf215546Sopenharmony_ci   nir_builder *b = &_b;
515bf215546Sopenharmony_ci
516bf215546Sopenharmony_ci   nir_variable *out_color =
517bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(),
518bf215546Sopenharmony_ci                          "color0");
519bf215546Sopenharmony_ci   out_color->data.location = FRAG_RESULT_DATA0;
520bf215546Sopenharmony_ci
521bf215546Sopenharmony_ci   unsigned coord_components = zscale ? 3 : 2;
522bf215546Sopenharmony_ci   nir_variable *in_coords =
523bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_in,
524bf215546Sopenharmony_ci                          glsl_vec_type(coord_components),
525bf215546Sopenharmony_ci                          "coords");
526bf215546Sopenharmony_ci   in_coords->data.location = VARYING_SLOT_VAR0;
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_ci   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1);
529bf215546Sopenharmony_ci   /* Note: since we're just copying data, we rely on the HW ignoring the
530bf215546Sopenharmony_ci    * dest_type.
531bf215546Sopenharmony_ci    */
532bf215546Sopenharmony_ci   tex->dest_type = nir_type_int32;
533bf215546Sopenharmony_ci   tex->is_array = false;
534bf215546Sopenharmony_ci   tex->is_shadow = false;
535bf215546Sopenharmony_ci   tex->sampler_dim = zscale ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci   tex->texture_index = 0;
538bf215546Sopenharmony_ci   tex->sampler_index = 0;
539bf215546Sopenharmony_ci
540bf215546Sopenharmony_ci   b->shader->info.num_textures = 1;
541bf215546Sopenharmony_ci   BITSET_SET(b->shader->info.textures_used, 0);
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci   tex->src[0].src_type = nir_tex_src_coord;
544bf215546Sopenharmony_ci   tex->src[0].src = nir_src_for_ssa(nir_load_var(b, in_coords));
545bf215546Sopenharmony_ci   tex->coord_components = coord_components;
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_ci   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
548bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &tex->instr);
549bf215546Sopenharmony_ci
550bf215546Sopenharmony_ci   nir_store_var(b, out_color, &tex->dest.ssa, 0xf);
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci   return b->shader;
553bf215546Sopenharmony_ci}
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_ci/* We can only read multisample textures via txf_ms, so we need a separate
556bf215546Sopenharmony_ci * variant for them.
557bf215546Sopenharmony_ci */
558bf215546Sopenharmony_cistatic nir_shader *
559bf215546Sopenharmony_cibuild_ms_copy_fs_shader(void)
560bf215546Sopenharmony_ci{
561bf215546Sopenharmony_ci   nir_builder _b =
562bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
563bf215546Sopenharmony_ci                                     "multisample copy fs");
564bf215546Sopenharmony_ci   nir_builder *b = &_b;
565bf215546Sopenharmony_ci
566bf215546Sopenharmony_ci   nir_variable *out_color =
567bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(),
568bf215546Sopenharmony_ci                          "color0");
569bf215546Sopenharmony_ci   out_color->data.location = FRAG_RESULT_DATA0;
570bf215546Sopenharmony_ci
571bf215546Sopenharmony_ci   nir_variable *in_coords =
572bf215546Sopenharmony_ci      nir_variable_create(b->shader, nir_var_shader_in,
573bf215546Sopenharmony_ci                          glsl_vec_type(2),
574bf215546Sopenharmony_ci                          "coords");
575bf215546Sopenharmony_ci   in_coords->data.location = VARYING_SLOT_VAR0;
576bf215546Sopenharmony_ci
577bf215546Sopenharmony_ci   nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2);
578bf215546Sopenharmony_ci
579bf215546Sopenharmony_ci   tex->op = nir_texop_txf_ms;
580bf215546Sopenharmony_ci
581bf215546Sopenharmony_ci   /* Note: since we're just copying data, we rely on the HW ignoring the
582bf215546Sopenharmony_ci    * dest_type.
583bf215546Sopenharmony_ci    */
584bf215546Sopenharmony_ci   tex->dest_type = nir_type_int32;
585bf215546Sopenharmony_ci   tex->is_array = false;
586bf215546Sopenharmony_ci   tex->is_shadow = false;
587bf215546Sopenharmony_ci   tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
588bf215546Sopenharmony_ci
589bf215546Sopenharmony_ci   tex->texture_index = 0;
590bf215546Sopenharmony_ci   tex->sampler_index = 0;
591bf215546Sopenharmony_ci
592bf215546Sopenharmony_ci   b->shader->info.num_textures = 1;
593bf215546Sopenharmony_ci   BITSET_SET(b->shader->info.textures_used, 0);
594bf215546Sopenharmony_ci   BITSET_SET(b->shader->info.textures_used_by_txf, 0);
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci   nir_ssa_def *coord = nir_f2i32(b, nir_load_var(b, in_coords));
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   tex->src[0].src_type = nir_tex_src_coord;
599bf215546Sopenharmony_ci   tex->src[0].src = nir_src_for_ssa(coord);
600bf215546Sopenharmony_ci   tex->coord_components = 2;
601bf215546Sopenharmony_ci
602bf215546Sopenharmony_ci   tex->src[1].src_type = nir_tex_src_ms_index;
603bf215546Sopenharmony_ci   tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(b));
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci   nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL);
606bf215546Sopenharmony_ci   nir_builder_instr_insert(b, &tex->instr);
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci   nir_store_var(b, out_color, &tex->dest.ssa, 0xf);
609bf215546Sopenharmony_ci
610bf215546Sopenharmony_ci   return b->shader;
611bf215546Sopenharmony_ci}
612bf215546Sopenharmony_ci
613bf215546Sopenharmony_cistatic nir_shader *
614bf215546Sopenharmony_cibuild_clear_fs_shader(unsigned mrts)
615bf215546Sopenharmony_ci{
616bf215546Sopenharmony_ci   nir_builder _b =
617bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,
618bf215546Sopenharmony_ci                                     "mrt%u clear fs", mrts);
619bf215546Sopenharmony_ci   nir_builder *b = &_b;
620bf215546Sopenharmony_ci
621bf215546Sopenharmony_ci   for (unsigned i = 0; i < mrts; i++) {
622bf215546Sopenharmony_ci      nir_variable *out_color =
623bf215546Sopenharmony_ci         nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(),
624bf215546Sopenharmony_ci                             "color");
625bf215546Sopenharmony_ci      out_color->data.location = FRAG_RESULT_DATA0 + i;
626bf215546Sopenharmony_ci
627bf215546Sopenharmony_ci      nir_ssa_def *color = load_const(b, 4 * i, 4);
628bf215546Sopenharmony_ci      nir_store_var(b, out_color, color, 0xf);
629bf215546Sopenharmony_ci   }
630bf215546Sopenharmony_ci
631bf215546Sopenharmony_ci   return b->shader;
632bf215546Sopenharmony_ci}
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_cistatic void
635bf215546Sopenharmony_cicompile_shader(struct tu_device *dev, struct nir_shader *nir,
636bf215546Sopenharmony_ci               unsigned consts, unsigned *offset, enum global_shader idx)
637bf215546Sopenharmony_ci{
638bf215546Sopenharmony_ci   nir->options = ir3_get_compiler_options(dev->compiler);
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_ci   nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);
641bf215546Sopenharmony_ci   nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci   ir3_finalize_nir(dev->compiler, nir);
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci   struct ir3_shader *sh =
646bf215546Sopenharmony_ci      ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) {
647bf215546Sopenharmony_ci                              .api_wavesize = IR3_SINGLE_OR_DOUBLE,
648bf215546Sopenharmony_ci                              .real_wavesize = IR3_SINGLE_OR_DOUBLE,
649bf215546Sopenharmony_ci                              .reserved_user_consts = align(consts, 4),
650bf215546Sopenharmony_ci                          }, NULL);
651bf215546Sopenharmony_ci
652bf215546Sopenharmony_ci   struct ir3_shader_key key = {};
653bf215546Sopenharmony_ci   bool created;
654bf215546Sopenharmony_ci   struct ir3_shader_variant *so =
655bf215546Sopenharmony_ci      ir3_shader_get_variant(sh, &key, false, false, &created);
656bf215546Sopenharmony_ci
657bf215546Sopenharmony_ci   struct tu6_global *global = dev->global_bo->map;
658bf215546Sopenharmony_ci
659bf215546Sopenharmony_ci   assert(*offset + so->info.sizedwords <= ARRAY_SIZE(global->shaders));
660bf215546Sopenharmony_ci   dev->global_shaders[idx] = sh;
661bf215546Sopenharmony_ci   dev->global_shader_variants[idx] = so;
662bf215546Sopenharmony_ci   memcpy(&global->shaders[*offset], so->bin,
663bf215546Sopenharmony_ci          sizeof(uint32_t) * so->info.sizedwords);
664bf215546Sopenharmony_ci   dev->global_shader_va[idx] = dev->global_bo->iova +
665bf215546Sopenharmony_ci      gb_offset(shaders[*offset]);
666bf215546Sopenharmony_ci   *offset += align(so->info.sizedwords, 32);
667bf215546Sopenharmony_ci}
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_civoid
670bf215546Sopenharmony_citu_init_clear_blit_shaders(struct tu_device *dev)
671bf215546Sopenharmony_ci{
672bf215546Sopenharmony_ci   unsigned offset = 0;
673bf215546Sopenharmony_ci   compile_shader(dev, build_blit_vs_shader(), 3, &offset, GLOBAL_SH_VS_BLIT);
674bf215546Sopenharmony_ci   compile_shader(dev, build_clear_vs_shader(), 2, &offset, GLOBAL_SH_VS_CLEAR);
675bf215546Sopenharmony_ci   compile_shader(dev, build_blit_fs_shader(false), 0, &offset, GLOBAL_SH_FS_BLIT);
676bf215546Sopenharmony_ci   compile_shader(dev, build_blit_fs_shader(true), 0, &offset, GLOBAL_SH_FS_BLIT_ZSCALE);
677bf215546Sopenharmony_ci   compile_shader(dev, build_ms_copy_fs_shader(), 0, &offset, GLOBAL_SH_FS_COPY_MS);
678bf215546Sopenharmony_ci
679bf215546Sopenharmony_ci   for (uint32_t num_rts = 0; num_rts <= MAX_RTS; num_rts++) {
680bf215546Sopenharmony_ci      compile_shader(dev, build_clear_fs_shader(num_rts), num_rts, &offset,
681bf215546Sopenharmony_ci                     GLOBAL_SH_FS_CLEAR0 + num_rts);
682bf215546Sopenharmony_ci   }
683bf215546Sopenharmony_ci}
684bf215546Sopenharmony_ci
685bf215546Sopenharmony_civoid
686bf215546Sopenharmony_citu_destroy_clear_blit_shaders(struct tu_device *dev)
687bf215546Sopenharmony_ci{
688bf215546Sopenharmony_ci   for (unsigned i = 0; i < GLOBAL_SH_COUNT; i++) {
689bf215546Sopenharmony_ci      if (dev->global_shaders[i])
690bf215546Sopenharmony_ci         ir3_shader_destroy(dev->global_shaders[i]);
691bf215546Sopenharmony_ci   }
692bf215546Sopenharmony_ci}
693bf215546Sopenharmony_ci
694bf215546Sopenharmony_cistatic void
695bf215546Sopenharmony_cir3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit,
696bf215546Sopenharmony_ci           uint32_t rts_mask, bool z_scale, VkSampleCountFlagBits samples)
697bf215546Sopenharmony_ci{
698bf215546Sopenharmony_ci   enum global_shader vs_id =
699bf215546Sopenharmony_ci      blit ? GLOBAL_SH_VS_BLIT : GLOBAL_SH_VS_CLEAR;
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci   struct ir3_shader_variant *vs = cmd->device->global_shader_variants[vs_id];
702bf215546Sopenharmony_ci   uint64_t vs_iova = cmd->device->global_shader_va[vs_id];
703bf215546Sopenharmony_ci
704bf215546Sopenharmony_ci   enum global_shader fs_id = GLOBAL_SH_FS_BLIT;
705bf215546Sopenharmony_ci
706bf215546Sopenharmony_ci   if (z_scale)
707bf215546Sopenharmony_ci      fs_id = GLOBAL_SH_FS_BLIT_ZSCALE;
708bf215546Sopenharmony_ci   else if (samples != VK_SAMPLE_COUNT_1_BIT)
709bf215546Sopenharmony_ci      fs_id = GLOBAL_SH_FS_COPY_MS;
710bf215546Sopenharmony_ci
711bf215546Sopenharmony_ci   unsigned num_rts = util_bitcount(rts_mask);
712bf215546Sopenharmony_ci   if (!blit)
713bf215546Sopenharmony_ci      fs_id = GLOBAL_SH_FS_CLEAR0 + num_rts;
714bf215546Sopenharmony_ci
715bf215546Sopenharmony_ci   struct ir3_shader_variant *fs = cmd->device->global_shader_variants[fs_id];
716bf215546Sopenharmony_ci   uint64_t fs_iova = cmd->device->global_shader_va[fs_id];
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
719bf215546Sopenharmony_ci         .vs_state = true,
720bf215546Sopenharmony_ci         .hs_state = true,
721bf215546Sopenharmony_ci         .ds_state = true,
722bf215546Sopenharmony_ci         .gs_state = true,
723bf215546Sopenharmony_ci         .fs_state = true,
724bf215546Sopenharmony_ci         .cs_state = true,
725bf215546Sopenharmony_ci         .gfx_ibo = true,
726bf215546Sopenharmony_ci         .cs_ibo = true,
727bf215546Sopenharmony_ci         .gfx_shared_const = true,
728bf215546Sopenharmony_ci         .gfx_bindless = 0x1f,
729bf215546Sopenharmony_ci         .cs_bindless = 0x1f));
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_ci   tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, vs);
732bf215546Sopenharmony_ci   tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL);
733bf215546Sopenharmony_ci   tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL);
734bf215546Sopenharmony_ci   tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL);
735bf215546Sopenharmony_ci   tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, fs);
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_ci   struct tu_pvtmem_config pvtmem = {};
738bf215546Sopenharmony_ci   tu6_emit_xs(cs, MESA_SHADER_VERTEX, vs, &pvtmem, vs_iova);
739bf215546Sopenharmony_ci   tu6_emit_xs(cs, MESA_SHADER_FRAGMENT, fs, &pvtmem, fs_iova);
740bf215546Sopenharmony_ci
741bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0());
742bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0());
743bf215546Sopenharmony_ci
744bf215546Sopenharmony_ci   if (cmd->device->physical_device->info->a6xx.has_cp_reg_write) {
745bf215546Sopenharmony_ci   /* Copy what the blob does here. This will emit an extra 0x3f
746bf215546Sopenharmony_ci    * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what
747bf215546Sopenharmony_ci    * this is working around yet.
748bf215546Sopenharmony_ci    */
749bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
750bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE));
751bf215546Sopenharmony_ci   tu_cs_emit(cs, REG_A6XX_PC_MULTIVIEW_CNTL);
752bf215546Sopenharmony_ci   tu_cs_emit(cs, 0);
753bf215546Sopenharmony_ci   } else {
754bf215546Sopenharmony_ci      tu_cs_emit_regs(cs, A6XX_PC_MULTIVIEW_CNTL());
755bf215546Sopenharmony_ci   }
756bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_VFD_MULTIVIEW_CNTL());
757bf215546Sopenharmony_ci
758bf215546Sopenharmony_ci   tu6_emit_vpc(cs, vs, NULL, NULL, NULL, fs, 0);
759bf215546Sopenharmony_ci
760bf215546Sopenharmony_ci   /* REPL_MODE for varying with RECTLIST (2 vertices only) */
761bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_VPC_VARYING_INTERP_MODE(0, 0));
762bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0));
763bf215546Sopenharmony_ci
764bf215546Sopenharmony_ci   tu6_emit_fs_inputs(cs, fs);
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
767bf215546Sopenharmony_ci                   A6XX_GRAS_CL_CNTL(
768bf215546Sopenharmony_ci                      .persp_division_disable = 1,
769bf215546Sopenharmony_ci                      .vp_xform_disable = 1,
770bf215546Sopenharmony_ci                      .vp_clip_code_ignore = 1,
771bf215546Sopenharmony_ci                      .clip_disable = 1));
772bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
773bf215546Sopenharmony_ci
774bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_PC_RASTER_CNTL());
775bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_VPC_UNKNOWN_9107());
776bf215546Sopenharmony_ci
777bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
778bf215546Sopenharmony_ci                   A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x = 0, .y = 0),
779bf215546Sopenharmony_ci                   A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff));
780bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
781bf215546Sopenharmony_ci                   A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = 0, .y = 0),
782bf215546Sopenharmony_ci                   A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff));
783bf215546Sopenharmony_ci
784bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
785bf215546Sopenharmony_ci                   A6XX_VFD_INDEX_OFFSET(),
786bf215546Sopenharmony_ci                   A6XX_VFD_INSTANCE_START_OFFSET());
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_ci   if (rts_mask) {
789bf215546Sopenharmony_ci      unsigned rts_count = util_last_bit(rts_mask);
790bf215546Sopenharmony_ci      tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), rts_count);
791bf215546Sopenharmony_ci      unsigned rt = 0;
792bf215546Sopenharmony_ci      for (unsigned i = 0; i < rts_count; i++) {
793bf215546Sopenharmony_ci         unsigned regid = 0;
794bf215546Sopenharmony_ci         if (rts_mask & (1u << i))
795bf215546Sopenharmony_ci            regid = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + rt++);
796bf215546Sopenharmony_ci         tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(regid));
797bf215546Sopenharmony_ci      }
798bf215546Sopenharmony_ci   }
799bf215546Sopenharmony_ci
800bf215546Sopenharmony_ci   cmd->state.line_mode = RECTANGULAR;
801bf215546Sopenharmony_ci   tu6_emit_msaa(cs, samples, cmd->state.line_mode);
802bf215546Sopenharmony_ci}
803bf215546Sopenharmony_ci
804bf215546Sopenharmony_cistatic void
805bf215546Sopenharmony_cir3d_coords_raw(struct tu_cs *cs, const float *coords)
806bf215546Sopenharmony_ci{
807bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 8);
808bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
809bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
810bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
811bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
812bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_NUM_UNIT(2));
813bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
814bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
815bf215546Sopenharmony_ci   tu_cs_emit_array(cs, (const uint32_t *) coords, 8);
816bf215546Sopenharmony_ci}
817bf215546Sopenharmony_ci
818bf215546Sopenharmony_ci/* z coordinate for "z scale" blit path which uses a 3d texture */
819bf215546Sopenharmony_cistatic void
820bf215546Sopenharmony_cir3d_coord_z(struct tu_cs *cs, float z)
821bf215546Sopenharmony_ci{
822bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 4);
823bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(2) |
824bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
825bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
826bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
827bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_NUM_UNIT(1));
828bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
829bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
830bf215546Sopenharmony_ci   tu_cs_emit(cs, fui(z));
831bf215546Sopenharmony_ci   tu_cs_emit(cs, 0);
832bf215546Sopenharmony_ci   tu_cs_emit(cs, 0);
833bf215546Sopenharmony_ci   tu_cs_emit(cs, 0);
834bf215546Sopenharmony_ci}
835bf215546Sopenharmony_ci
836bf215546Sopenharmony_cistatic void
837bf215546Sopenharmony_cir3d_coords(struct tu_cs *cs,
838bf215546Sopenharmony_ci           const VkOffset2D *dst,
839bf215546Sopenharmony_ci           const VkOffset2D *src,
840bf215546Sopenharmony_ci           const VkExtent2D *extent)
841bf215546Sopenharmony_ci{
842bf215546Sopenharmony_ci   int32_t src_x1 = src ? src->x : 0;
843bf215546Sopenharmony_ci   int32_t src_y1 = src ? src->y : 0;
844bf215546Sopenharmony_ci   r3d_coords_raw(cs, (float[]) {
845bf215546Sopenharmony_ci      dst->x,                 dst->y,
846bf215546Sopenharmony_ci      src_x1,                 src_y1,
847bf215546Sopenharmony_ci      dst->x + extent->width, dst->y + extent->height,
848bf215546Sopenharmony_ci      src_x1 + extent->width, src_y1 + extent->height,
849bf215546Sopenharmony_ci   });
850bf215546Sopenharmony_ci}
851bf215546Sopenharmony_ci
852bf215546Sopenharmony_cistatic void
853bf215546Sopenharmony_cir3d_clear_value(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val)
854bf215546Sopenharmony_ci{
855bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4);
856bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
857bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
858bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
859bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) |
860bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_NUM_UNIT(1));
861bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
862bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
863bf215546Sopenharmony_ci   switch (format) {
864bf215546Sopenharmony_ci   case PIPE_FORMAT_Z24X8_UNORM:
865bf215546Sopenharmony_ci   case PIPE_FORMAT_Z24_UNORM_S8_UINT: {
866bf215546Sopenharmony_ci      /* cleared as r8g8b8a8_unorm using special format */
867bf215546Sopenharmony_ci      uint32_t tmp = tu_pack_float32_for_unorm(val->depthStencil.depth, 24);
868bf215546Sopenharmony_ci      tu_cs_emit(cs, fui((tmp & 0xff) / 255.0f));
869bf215546Sopenharmony_ci      tu_cs_emit(cs, fui((tmp >> 8 & 0xff) / 255.0f));
870bf215546Sopenharmony_ci      tu_cs_emit(cs, fui((tmp >> 16 & 0xff) / 255.0f));
871bf215546Sopenharmony_ci      tu_cs_emit(cs, fui((val->depthStencil.stencil & 0xff) / 255.0f));
872bf215546Sopenharmony_ci   } break;
873bf215546Sopenharmony_ci   case PIPE_FORMAT_Z16_UNORM:
874bf215546Sopenharmony_ci   case PIPE_FORMAT_Z32_FLOAT:
875bf215546Sopenharmony_ci      tu_cs_emit(cs, fui(val->depthStencil.depth));
876bf215546Sopenharmony_ci      tu_cs_emit(cs, 0);
877bf215546Sopenharmony_ci      tu_cs_emit(cs, 0);
878bf215546Sopenharmony_ci      tu_cs_emit(cs, 0);
879bf215546Sopenharmony_ci      break;
880bf215546Sopenharmony_ci   case PIPE_FORMAT_S8_UINT:
881bf215546Sopenharmony_ci      tu_cs_emit(cs, val->depthStencil.stencil & 0xff);
882bf215546Sopenharmony_ci      tu_cs_emit(cs, 0);
883bf215546Sopenharmony_ci      tu_cs_emit(cs, 0);
884bf215546Sopenharmony_ci      tu_cs_emit(cs, 0);
885bf215546Sopenharmony_ci      break;
886bf215546Sopenharmony_ci   default:
887bf215546Sopenharmony_ci      /* as color formats use clear value as-is */
888bf215546Sopenharmony_ci      assert(!util_format_is_depth_or_stencil(format));
889bf215546Sopenharmony_ci      tu_cs_emit_array(cs, val->color.uint32, 4);
890bf215546Sopenharmony_ci      break;
891bf215546Sopenharmony_ci   }
892bf215546Sopenharmony_ci}
893bf215546Sopenharmony_ci
894bf215546Sopenharmony_cistatic void
895bf215546Sopenharmony_cir3d_src_common(struct tu_cmd_buffer *cmd,
896bf215546Sopenharmony_ci               struct tu_cs *cs,
897bf215546Sopenharmony_ci               const uint32_t *tex_const,
898bf215546Sopenharmony_ci               uint32_t offset_base,
899bf215546Sopenharmony_ci               uint32_t offset_ubwc,
900bf215546Sopenharmony_ci               VkFilter filter)
901bf215546Sopenharmony_ci{
902bf215546Sopenharmony_ci   struct tu_cs_memory texture = { };
903bf215546Sopenharmony_ci   VkResult result = tu_cs_alloc(&cmd->sub_cs,
904bf215546Sopenharmony_ci                                 2, /* allocate space for a sampler too */
905bf215546Sopenharmony_ci                                 A6XX_TEX_CONST_DWORDS, &texture);
906bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
907bf215546Sopenharmony_ci      cmd->record_result = result;
908bf215546Sopenharmony_ci      return;
909bf215546Sopenharmony_ci   }
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_ci   memcpy(texture.map, tex_const, A6XX_TEX_CONST_DWORDS * 4);
912bf215546Sopenharmony_ci
913bf215546Sopenharmony_ci   /* patch addresses for layer offset */
914bf215546Sopenharmony_ci   *(uint64_t*) (texture.map + 4) += offset_base;
915bf215546Sopenharmony_ci   uint64_t ubwc_addr = (texture.map[7] | (uint64_t) texture.map[8] << 32) + offset_ubwc;
916bf215546Sopenharmony_ci   texture.map[7] = ubwc_addr;
917bf215546Sopenharmony_ci   texture.map[8] = ubwc_addr >> 32;
918bf215546Sopenharmony_ci
919bf215546Sopenharmony_ci   texture.map[A6XX_TEX_CONST_DWORDS + 0] =
920bf215546Sopenharmony_ci      A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(filter, false)) |
921bf215546Sopenharmony_ci      A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(filter, false)) |
922bf215546Sopenharmony_ci      A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE) |
923bf215546Sopenharmony_ci      A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE) |
924bf215546Sopenharmony_ci      A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE) |
925bf215546Sopenharmony_ci      0x60000; /* XXX used by blob, doesn't seem necessary */
926bf215546Sopenharmony_ci   texture.map[A6XX_TEX_CONST_DWORDS + 1] =
927bf215546Sopenharmony_ci      A6XX_TEX_SAMP_1_UNNORM_COORDS |
928bf215546Sopenharmony_ci      A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR;
929bf215546Sopenharmony_ci   texture.map[A6XX_TEX_CONST_DWORDS + 2] = 0;
930bf215546Sopenharmony_ci   texture.map[A6XX_TEX_CONST_DWORDS + 3] = 0;
931bf215546Sopenharmony_ci
932bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3);
933bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
934bf215546Sopenharmony_ci               CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
935bf215546Sopenharmony_ci               CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
936bf215546Sopenharmony_ci               CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) |
937bf215546Sopenharmony_ci               CP_LOAD_STATE6_0_NUM_UNIT(1));
938bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, texture.iova + A6XX_TEX_CONST_DWORDS * 4);
939bf215546Sopenharmony_ci
940bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_SAMP(.qword = texture.iova + A6XX_TEX_CONST_DWORDS * 4));
941bf215546Sopenharmony_ci
942bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3);
943bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
944bf215546Sopenharmony_ci      CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
945bf215546Sopenharmony_ci      CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
946bf215546Sopenharmony_ci      CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) |
947bf215546Sopenharmony_ci      CP_LOAD_STATE6_0_NUM_UNIT(1));
948bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, texture.iova);
949bf215546Sopenharmony_ci
950bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_CONST(.qword = texture.iova));
951bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_COUNT(1));
952bf215546Sopenharmony_ci}
953bf215546Sopenharmony_ci
954bf215546Sopenharmony_cistatic void
955bf215546Sopenharmony_cir3d_src(struct tu_cmd_buffer *cmd,
956bf215546Sopenharmony_ci        struct tu_cs *cs,
957bf215546Sopenharmony_ci        const struct fdl6_view *iview,
958bf215546Sopenharmony_ci        uint32_t layer,
959bf215546Sopenharmony_ci        VkFilter filter,
960bf215546Sopenharmony_ci        enum pipe_format dst_format)
961bf215546Sopenharmony_ci{
962bf215546Sopenharmony_ci   uint32_t desc[A6XX_TEX_CONST_DWORDS];
963bf215546Sopenharmony_ci   memcpy(desc, iview->descriptor, sizeof(desc));
964bf215546Sopenharmony_ci
965bf215546Sopenharmony_ci   enum a6xx_format fmt = (desc[0] & A6XX_TEX_CONST_0_FMT__MASK) >>
966bf215546Sopenharmony_ci         A6XX_TEX_CONST_0_FMT__SHIFT;
967bf215546Sopenharmony_ci   enum pipe_format src_format = iview->format;
968bf215546Sopenharmony_ci   fixup_src_format(&src_format, dst_format, &fmt);
969bf215546Sopenharmony_ci   desc[0] = (desc[0] & ~A6XX_TEX_CONST_0_FMT__MASK) |
970bf215546Sopenharmony_ci      A6XX_TEX_CONST_0_FMT(fmt);
971bf215546Sopenharmony_ci
972bf215546Sopenharmony_ci   r3d_src_common(cmd, cs, desc,
973bf215546Sopenharmony_ci                  iview->layer_size * layer,
974bf215546Sopenharmony_ci                  iview->ubwc_layer_size * layer,
975bf215546Sopenharmony_ci                  filter);
976bf215546Sopenharmony_ci}
977bf215546Sopenharmony_ci
978bf215546Sopenharmony_cistatic void
979bf215546Sopenharmony_cir3d_src_buffer(struct tu_cmd_buffer *cmd,
980bf215546Sopenharmony_ci               struct tu_cs *cs,
981bf215546Sopenharmony_ci               enum pipe_format format,
982bf215546Sopenharmony_ci               uint64_t va, uint32_t pitch,
983bf215546Sopenharmony_ci               uint32_t width, uint32_t height,
984bf215546Sopenharmony_ci               enum pipe_format dst_format)
985bf215546Sopenharmony_ci{
986bf215546Sopenharmony_ci   uint32_t desc[A6XX_TEX_CONST_DWORDS];
987bf215546Sopenharmony_ci
988bf215546Sopenharmony_ci   struct tu_native_format fmt = tu6_format_texture(format, TILE6_LINEAR);
989bf215546Sopenharmony_ci   enum a6xx_format color_format = fmt.fmt;
990bf215546Sopenharmony_ci   fixup_src_format(&format, dst_format, &color_format);
991bf215546Sopenharmony_ci
992bf215546Sopenharmony_ci   desc[0] =
993bf215546Sopenharmony_ci      COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) |
994bf215546Sopenharmony_ci      A6XX_TEX_CONST_0_FMT(color_format) |
995bf215546Sopenharmony_ci      A6XX_TEX_CONST_0_SWAP(fmt.swap) |
996bf215546Sopenharmony_ci      A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
997bf215546Sopenharmony_ci      A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) |
998bf215546Sopenharmony_ci      A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) |
999bf215546Sopenharmony_ci      A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
1000bf215546Sopenharmony_ci   desc[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
1001bf215546Sopenharmony_ci   desc[2] =
1002bf215546Sopenharmony_ci      A6XX_TEX_CONST_2_PITCH(pitch) |
1003bf215546Sopenharmony_ci      A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
1004bf215546Sopenharmony_ci   desc[3] = 0;
1005bf215546Sopenharmony_ci   desc[4] = va;
1006bf215546Sopenharmony_ci   desc[5] = va >> 32;
1007bf215546Sopenharmony_ci   for (uint32_t i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
1008bf215546Sopenharmony_ci      desc[i] = 0;
1009bf215546Sopenharmony_ci
1010bf215546Sopenharmony_ci   r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
1011bf215546Sopenharmony_ci}
1012bf215546Sopenharmony_ci
1013bf215546Sopenharmony_cistatic void
1014bf215546Sopenharmony_cir3d_src_gmem(struct tu_cmd_buffer *cmd,
1015bf215546Sopenharmony_ci             struct tu_cs *cs,
1016bf215546Sopenharmony_ci             const struct tu_image_view *iview,
1017bf215546Sopenharmony_ci             enum pipe_format format,
1018bf215546Sopenharmony_ci             enum pipe_format dst_format,
1019bf215546Sopenharmony_ci             uint32_t gmem_offset,
1020bf215546Sopenharmony_ci             uint32_t cpp)
1021bf215546Sopenharmony_ci{
1022bf215546Sopenharmony_ci   uint32_t desc[A6XX_TEX_CONST_DWORDS];
1023bf215546Sopenharmony_ci   memcpy(desc, iview->view.descriptor, sizeof(desc));
1024bf215546Sopenharmony_ci
1025bf215546Sopenharmony_ci   enum a6xx_format fmt = tu6_format_texture(format, TILE6_LINEAR).fmt;
1026bf215546Sopenharmony_ci   fixup_src_format(&format, dst_format, &fmt);
1027bf215546Sopenharmony_ci
1028bf215546Sopenharmony_ci   /* patch the format so that depth/stencil get the right format and swizzle */
1029bf215546Sopenharmony_ci   desc[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK |
1030bf215546Sopenharmony_ci                A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK |
1031bf215546Sopenharmony_ci                A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK);
1032bf215546Sopenharmony_ci   desc[0] |= A6XX_TEX_CONST_0_FMT(fmt) |
1033bf215546Sopenharmony_ci               A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
1034bf215546Sopenharmony_ci               A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) |
1035bf215546Sopenharmony_ci               A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) |
1036bf215546Sopenharmony_ci               A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W);
1037bf215546Sopenharmony_ci
1038bf215546Sopenharmony_ci   /* patched for gmem */
1039bf215546Sopenharmony_ci   desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK);
1040bf215546Sopenharmony_ci   desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2);
1041bf215546Sopenharmony_ci   desc[2] =
1042bf215546Sopenharmony_ci      A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) |
1043bf215546Sopenharmony_ci      A6XX_TEX_CONST_2_PITCH(cmd->state.tiling->tile0.width * cpp);
1044bf215546Sopenharmony_ci   desc[3] = 0;
1045bf215546Sopenharmony_ci   desc[4] = cmd->device->physical_device->gmem_base + gmem_offset;
1046bf215546Sopenharmony_ci   desc[5] = A6XX_TEX_CONST_5_DEPTH(1);
1047bf215546Sopenharmony_ci   for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
1048bf215546Sopenharmony_ci      desc[i] = 0;
1049bf215546Sopenharmony_ci
1050bf215546Sopenharmony_ci   r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
1051bf215546Sopenharmony_ci}
1052bf215546Sopenharmony_ci
1053bf215546Sopenharmony_cistatic void
1054bf215546Sopenharmony_cir3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer,
1055bf215546Sopenharmony_ci        enum pipe_format src_format)
1056bf215546Sopenharmony_ci{
1057bf215546Sopenharmony_ci   uint32_t mrt_buf_info = iview->RB_MRT_BUF_INFO;
1058bf215546Sopenharmony_ci
1059bf215546Sopenharmony_ci   enum a6xx_format fmt = mrt_buf_info & A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK;
1060bf215546Sopenharmony_ci   enum pipe_format dst_format = iview->format;
1061bf215546Sopenharmony_ci   fixup_dst_format(src_format, &dst_format, &fmt);
1062bf215546Sopenharmony_ci   mrt_buf_info =
1063bf215546Sopenharmony_ci      (mrt_buf_info & ~A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK) |
1064bf215546Sopenharmony_ci      A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(fmt);
1065bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
1066bf215546Sopenharmony_ci   tu_cs_emit(cs, mrt_buf_info);
1067bf215546Sopenharmony_ci   tu_cs_image_ref(cs, iview, layer);
1068bf215546Sopenharmony_ci   tu_cs_emit(cs, 0);
1069bf215546Sopenharmony_ci
1070bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
1071bf215546Sopenharmony_ci   tu_cs_image_flag_ref(cs, iview, layer);
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_ci   /* Use color format from RB_MRT_BUF_INFO. This register is relevant for
1074bf215546Sopenharmony_ci    * FMT6_NV12_Y.
1075bf215546Sopenharmony_ci    */
1076bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = fmt));
1077bf215546Sopenharmony_ci
1078bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled));
1079bf215546Sopenharmony_ci}
1080bf215546Sopenharmony_ci
1081bf215546Sopenharmony_cistatic void
1082bf215546Sopenharmony_cir3d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
1083bf215546Sopenharmony_ci{
1084bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
1085bf215546Sopenharmony_ci   tu_cs_emit(cs, tu_image_view_depth(iview, RB_MRT_BUF_INFO));
1086bf215546Sopenharmony_ci   tu_cs_image_depth_ref(cs, iview, layer);
1087bf215546Sopenharmony_ci   tu_cs_emit(cs, 0);
1088bf215546Sopenharmony_ci
1089bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
1090bf215546Sopenharmony_ci   tu_cs_image_flag_ref(cs, &iview->view, layer);
1091bf215546Sopenharmony_ci
1092bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->view.ubwc_enabled));
1093bf215546Sopenharmony_ci}
1094bf215546Sopenharmony_ci
1095bf215546Sopenharmony_cistatic void
1096bf215546Sopenharmony_cir3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
1097bf215546Sopenharmony_ci{
1098bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
1099bf215546Sopenharmony_ci   tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO));
1100bf215546Sopenharmony_ci   tu_cs_image_stencil_ref(cs, iview, layer);
1101bf215546Sopenharmony_ci   tu_cs_emit(cs, 0);
1102bf215546Sopenharmony_ci
1103bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
1104bf215546Sopenharmony_ci}
1105bf215546Sopenharmony_ci
1106bf215546Sopenharmony_cistatic void
1107bf215546Sopenharmony_cir3d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch,
1108bf215546Sopenharmony_ci               enum pipe_format src_format)
1109bf215546Sopenharmony_ci{
1110bf215546Sopenharmony_ci   struct tu_native_format fmt = tu6_format_color(format, TILE6_LINEAR);
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci   enum a6xx_format color_fmt = fmt.fmt;
1113bf215546Sopenharmony_ci   fixup_dst_format(src_format, &format, &color_fmt);
1114bf215546Sopenharmony_ci
1115bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
1116bf215546Sopenharmony_ci                   A6XX_RB_MRT_BUF_INFO(0, .color_format = color_fmt, .color_swap = fmt.swap),
1117bf215546Sopenharmony_ci                   A6XX_RB_MRT_PITCH(0, pitch),
1118bf215546Sopenharmony_ci                   A6XX_RB_MRT_ARRAY_PITCH(0, 0),
1119bf215546Sopenharmony_ci                   A6XX_RB_MRT_BASE(0, .qword = va),
1120bf215546Sopenharmony_ci                   A6XX_RB_MRT_BASE_GMEM(0, 0));
1121bf215546Sopenharmony_ci
1122bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
1123bf215546Sopenharmony_ci}
1124bf215546Sopenharmony_ci
1125bf215546Sopenharmony_cistatic uint8_t
1126bf215546Sopenharmony_ciaspect_write_mask(enum pipe_format format, VkImageAspectFlags aspect_mask)
1127bf215546Sopenharmony_ci{
1128bf215546Sopenharmony_ci   uint8_t mask = 0xf;
1129bf215546Sopenharmony_ci   assert(aspect_mask);
1130bf215546Sopenharmony_ci   /* note: the only format with partial writing is D24S8,
1131bf215546Sopenharmony_ci    * clear/blit uses the _AS_R8G8B8A8 format to access it
1132bf215546Sopenharmony_ci    */
1133bf215546Sopenharmony_ci   if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) {
1134bf215546Sopenharmony_ci      if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
1135bf215546Sopenharmony_ci         mask = 0x7;
1136bf215546Sopenharmony_ci      if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
1137bf215546Sopenharmony_ci         mask = 0x8;
1138bf215546Sopenharmony_ci   }
1139bf215546Sopenharmony_ci   return mask;
1140bf215546Sopenharmony_ci}
1141bf215546Sopenharmony_ci
1142bf215546Sopenharmony_cistatic void
1143bf215546Sopenharmony_cir3d_setup(struct tu_cmd_buffer *cmd,
1144bf215546Sopenharmony_ci          struct tu_cs *cs,
1145bf215546Sopenharmony_ci          enum pipe_format src_format,
1146bf215546Sopenharmony_ci          enum pipe_format dst_format,
1147bf215546Sopenharmony_ci          VkImageAspectFlags aspect_mask,
1148bf215546Sopenharmony_ci          unsigned blit_param,
1149bf215546Sopenharmony_ci          bool clear,
1150bf215546Sopenharmony_ci          bool ubwc,
1151bf215546Sopenharmony_ci          VkSampleCountFlagBits samples)
1152bf215546Sopenharmony_ci{
1153bf215546Sopenharmony_ci   enum a6xx_format fmt = tu6_base_format(dst_format);
1154bf215546Sopenharmony_ci   fixup_dst_format(src_format, &dst_format, &fmt);
1155bf215546Sopenharmony_ci
1156bf215546Sopenharmony_ci   if ((dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
1157bf215546Sopenharmony_ci        dst_format == PIPE_FORMAT_Z24X8_UNORM) && ubwc) {
1158bf215546Sopenharmony_ci      fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
1159bf215546Sopenharmony_ci   }
1160bf215546Sopenharmony_ci
1161bf215546Sopenharmony_ci   if (!cmd->state.pass) {
1162bf215546Sopenharmony_ci      tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
1163bf215546Sopenharmony_ci      tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff);
1164bf215546Sopenharmony_ci   }
1165bf215546Sopenharmony_ci
1166bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000));
1167bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.dword = 0xc00000));
1168bf215546Sopenharmony_ci
1169bf215546Sopenharmony_ci   r3d_common(cmd, cs, !clear, 1, blit_param, samples);
1170bf215546Sopenharmony_ci
1171bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
1172bf215546Sopenharmony_ci   tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
1173bf215546Sopenharmony_ci                  A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
1174bf215546Sopenharmony_ci                  0xfc000000);
1175bf215546Sopenharmony_ci   tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1));
1176bf215546Sopenharmony_ci
1177bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
1178bf215546Sopenharmony_ci                   A6XX_RB_FS_OUTPUT_CNTL0(),
1179bf215546Sopenharmony_ci                   A6XX_RB_FS_OUTPUT_CNTL1(.mrt = 1));
1180bf215546Sopenharmony_ci
1181bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL());
1182bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.sample_mask = 0xffff));
1183bf215546Sopenharmony_ci
1184bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL());
1185bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL());
1186bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
1187bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL());
1188bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK());
1189bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK());
1190bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_STENCILREF());
1191bf215546Sopenharmony_ci
1192bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_RENDER_COMPONENTS(.rt0 = 0xf));
1193bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf));
1194bf215546Sopenharmony_ci
1195bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0,
1196bf215546Sopenharmony_ci                        .color_format = fmt,
1197bf215546Sopenharmony_ci                        .color_sint = util_format_is_pure_sint(dst_format),
1198bf215546Sopenharmony_ci                        .color_uint = util_format_is_pure_uint(dst_format)));
1199bf215546Sopenharmony_ci
1200bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0,
1201bf215546Sopenharmony_ci      .component_enable = aspect_write_mask(dst_format, aspect_mask)));
1202bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(util_format_is_srgb(dst_format)));
1203bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(util_format_is_srgb(dst_format)));
1204bf215546Sopenharmony_ci
1205bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0));
1206bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0));
1207bf215546Sopenharmony_ci
1208bf215546Sopenharmony_ci   tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL,
1209bf215546Sopenharmony_ci                        A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2));
1210bf215546Sopenharmony_ci
1211bf215546Sopenharmony_ci   /* Disable sample counting in order to not affect occlusion query. */
1212bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = true));
1213bf215546Sopenharmony_ci
1214bf215546Sopenharmony_ci   if (cmd->state.prim_generated_query_running_before_rp) {
1215bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, STOP_PRIMITIVE_CTRS);
1216bf215546Sopenharmony_ci   }
1217bf215546Sopenharmony_ci
1218bf215546Sopenharmony_ci   if (cmd->state.predication_active) {
1219bf215546Sopenharmony_ci      tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
1220bf215546Sopenharmony_ci      tu_cs_emit(cs, 0);
1221bf215546Sopenharmony_ci   }
1222bf215546Sopenharmony_ci}
1223bf215546Sopenharmony_ci
1224bf215546Sopenharmony_cistatic void
1225bf215546Sopenharmony_cir3d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1226bf215546Sopenharmony_ci{
1227bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
1228bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST) |
1229bf215546Sopenharmony_ci                  CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) |
1230bf215546Sopenharmony_ci                  CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY));
1231bf215546Sopenharmony_ci   tu_cs_emit(cs, 1); /* instance count */
1232bf215546Sopenharmony_ci   tu_cs_emit(cs, 2); /* vertex count */
1233bf215546Sopenharmony_ci}
1234bf215546Sopenharmony_ci
1235bf215546Sopenharmony_cistatic void
1236bf215546Sopenharmony_cir3d_run_vis(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1237bf215546Sopenharmony_ci{
1238bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
1239bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST) |
1240bf215546Sopenharmony_ci                  CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) |
1241bf215546Sopenharmony_ci                  CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY));
1242bf215546Sopenharmony_ci   tu_cs_emit(cs, 1); /* instance count */
1243bf215546Sopenharmony_ci   tu_cs_emit(cs, 2); /* vertex count */
1244bf215546Sopenharmony_ci}
1245bf215546Sopenharmony_ci
1246bf215546Sopenharmony_cistatic void
1247bf215546Sopenharmony_cir3d_teardown(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
1248bf215546Sopenharmony_ci{
1249bf215546Sopenharmony_ci   if (cmd->state.predication_active) {
1250bf215546Sopenharmony_ci      tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
1251bf215546Sopenharmony_ci      tu_cs_emit(cs, 1);
1252bf215546Sopenharmony_ci   }
1253bf215546Sopenharmony_ci
1254bf215546Sopenharmony_ci   /* Re-enable sample counting. */
1255bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = false));
1256bf215546Sopenharmony_ci
1257bf215546Sopenharmony_ci   if (cmd->state.prim_generated_query_running_before_rp) {
1258bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, START_PRIMITIVE_CTRS);
1259bf215546Sopenharmony_ci   }
1260bf215546Sopenharmony_ci}
1261bf215546Sopenharmony_ci
1262bf215546Sopenharmony_ci/* blit ops - common interface for 2d/shader paths */
1263bf215546Sopenharmony_ci
1264bf215546Sopenharmony_cistruct blit_ops {
1265bf215546Sopenharmony_ci   void (*coords)(struct tu_cs *cs,
1266bf215546Sopenharmony_ci                  const VkOffset2D *dst,
1267bf215546Sopenharmony_ci                  const VkOffset2D *src,
1268bf215546Sopenharmony_ci                  const VkExtent2D *extent);
1269bf215546Sopenharmony_ci   void (*clear_value)(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val);
1270bf215546Sopenharmony_ci   void (*src)(
1271bf215546Sopenharmony_ci        struct tu_cmd_buffer *cmd,
1272bf215546Sopenharmony_ci        struct tu_cs *cs,
1273bf215546Sopenharmony_ci        const struct fdl6_view *iview,
1274bf215546Sopenharmony_ci        uint32_t layer,
1275bf215546Sopenharmony_ci        VkFilter filter,
1276bf215546Sopenharmony_ci        enum pipe_format dst_format);
1277bf215546Sopenharmony_ci   void (*src_buffer)(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
1278bf215546Sopenharmony_ci                      enum pipe_format format,
1279bf215546Sopenharmony_ci                      uint64_t va, uint32_t pitch,
1280bf215546Sopenharmony_ci                      uint32_t width, uint32_t height,
1281bf215546Sopenharmony_ci                      enum pipe_format dst_format);
1282bf215546Sopenharmony_ci   void (*dst)(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer,
1283bf215546Sopenharmony_ci               enum pipe_format src_format);
1284bf215546Sopenharmony_ci   void (*dst_depth)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
1285bf215546Sopenharmony_ci   void (*dst_stencil)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
1286bf215546Sopenharmony_ci   void (*dst_buffer)(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch,
1287bf215546Sopenharmony_ci                      enum pipe_format src_format);
1288bf215546Sopenharmony_ci   void (*setup)(struct tu_cmd_buffer *cmd,
1289bf215546Sopenharmony_ci                 struct tu_cs *cs,
1290bf215546Sopenharmony_ci                 enum pipe_format src_format,
1291bf215546Sopenharmony_ci                 enum pipe_format dst_format,
1292bf215546Sopenharmony_ci                 VkImageAspectFlags aspect_mask,
1293bf215546Sopenharmony_ci                 unsigned blit_param, /* CmdBlitImage: rotation in 2D path and z scaling in 3D path */
1294bf215546Sopenharmony_ci                 bool clear,
1295bf215546Sopenharmony_ci                 bool ubwc,
1296bf215546Sopenharmony_ci                 VkSampleCountFlagBits samples);
1297bf215546Sopenharmony_ci   void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
1298bf215546Sopenharmony_ci   void (*teardown)(struct tu_cmd_buffer *cmd,
1299bf215546Sopenharmony_ci                    struct tu_cs *cs);
1300bf215546Sopenharmony_ci};
1301bf215546Sopenharmony_ci
1302bf215546Sopenharmony_cistatic const struct blit_ops r2d_ops = {
1303bf215546Sopenharmony_ci   .coords = r2d_coords,
1304bf215546Sopenharmony_ci   .clear_value = r2d_clear_value,
1305bf215546Sopenharmony_ci   .src = r2d_src,
1306bf215546Sopenharmony_ci   .src_buffer = r2d_src_buffer,
1307bf215546Sopenharmony_ci   .dst = r2d_dst,
1308bf215546Sopenharmony_ci   .dst_depth = r2d_dst_depth,
1309bf215546Sopenharmony_ci   .dst_stencil = r2d_dst_stencil,
1310bf215546Sopenharmony_ci   .dst_buffer = r2d_dst_buffer,
1311bf215546Sopenharmony_ci   .setup = r2d_setup,
1312bf215546Sopenharmony_ci   .run = r2d_run,
1313bf215546Sopenharmony_ci   .teardown = r2d_teardown,
1314bf215546Sopenharmony_ci};
1315bf215546Sopenharmony_ci
1316bf215546Sopenharmony_cistatic const struct blit_ops r3d_ops = {
1317bf215546Sopenharmony_ci   .coords = r3d_coords,
1318bf215546Sopenharmony_ci   .clear_value = r3d_clear_value,
1319bf215546Sopenharmony_ci   .src = r3d_src,
1320bf215546Sopenharmony_ci   .src_buffer = r3d_src_buffer,
1321bf215546Sopenharmony_ci   .dst = r3d_dst,
1322bf215546Sopenharmony_ci   .dst_depth = r3d_dst_depth,
1323bf215546Sopenharmony_ci   .dst_stencil = r3d_dst_stencil,
1324bf215546Sopenharmony_ci   .dst_buffer = r3d_dst_buffer,
1325bf215546Sopenharmony_ci   .setup = r3d_setup,
1326bf215546Sopenharmony_ci   .run = r3d_run,
1327bf215546Sopenharmony_ci   .teardown = r3d_teardown,
1328bf215546Sopenharmony_ci};
1329bf215546Sopenharmony_ci
1330bf215546Sopenharmony_ci/* passthrough set coords from 3D extents */
1331bf215546Sopenharmony_cistatic void
1332bf215546Sopenharmony_cicoords(const struct blit_ops *ops,
1333bf215546Sopenharmony_ci       struct tu_cs *cs,
1334bf215546Sopenharmony_ci       const VkOffset3D *dst,
1335bf215546Sopenharmony_ci       const VkOffset3D *src,
1336bf215546Sopenharmony_ci       const VkExtent3D *extent)
1337bf215546Sopenharmony_ci{
1338bf215546Sopenharmony_ci   ops->coords(cs, (const VkOffset2D*) dst, (const VkOffset2D*) src, (const VkExtent2D*) extent);
1339bf215546Sopenharmony_ci}
1340bf215546Sopenharmony_ci
1341bf215546Sopenharmony_ci/* Decides the VK format to treat our data as for a memcpy-style blit. We have
1342bf215546Sopenharmony_ci * to be a bit careful because we have to pick a format with matching UBWC
1343bf215546Sopenharmony_ci * compression behavior, so no just returning R8_UINT/R16_UINT/R32_UINT for
1344bf215546Sopenharmony_ci * everything.
1345bf215546Sopenharmony_ci */
1346bf215546Sopenharmony_cistatic enum pipe_format
1347bf215546Sopenharmony_cicopy_format(VkFormat vk_format, VkImageAspectFlags aspect_mask)
1348bf215546Sopenharmony_ci{
1349bf215546Sopenharmony_ci   if (vk_format_is_compressed(vk_format)) {
1350bf215546Sopenharmony_ci      switch (vk_format_get_blocksize(vk_format)) {
1351bf215546Sopenharmony_ci      case 1: return PIPE_FORMAT_R8_UINT;
1352bf215546Sopenharmony_ci      case 2: return PIPE_FORMAT_R16_UINT;
1353bf215546Sopenharmony_ci      case 4: return PIPE_FORMAT_R32_UINT;
1354bf215546Sopenharmony_ci      case 8: return PIPE_FORMAT_R32G32_UINT;
1355bf215546Sopenharmony_ci      case 16:return PIPE_FORMAT_R32G32B32A32_UINT;
1356bf215546Sopenharmony_ci      default:
1357bf215546Sopenharmony_ci         unreachable("unhandled format size");
1358bf215546Sopenharmony_ci      }
1359bf215546Sopenharmony_ci   }
1360bf215546Sopenharmony_ci
1361bf215546Sopenharmony_ci   enum pipe_format format = tu_vk_format_to_pipe_format(vk_format);
1362bf215546Sopenharmony_ci
1363bf215546Sopenharmony_ci   /* For SNORM formats, copy them as the equivalent UNORM format.  If we treat
1364bf215546Sopenharmony_ci    * them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81
1365bf215546Sopenharmony_ci    * (also -1.0), when we're supposed to be memcpying the bits. See
1366bf215546Sopenharmony_ci    * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion.
1367bf215546Sopenharmony_ci    */
1368bf215546Sopenharmony_ci   format = util_format_snorm_to_unorm(format);
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_ci   switch (format) {
1371bf215546Sopenharmony_ci   case PIPE_FORMAT_R9G9B9E5_FLOAT:
1372bf215546Sopenharmony_ci      return PIPE_FORMAT_R32_UINT;
1373bf215546Sopenharmony_ci
1374bf215546Sopenharmony_ci   case PIPE_FORMAT_G8_B8R8_420_UNORM:
1375bf215546Sopenharmony_ci      if (aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT)
1376bf215546Sopenharmony_ci         return PIPE_FORMAT_R8G8_UNORM;
1377bf215546Sopenharmony_ci      else
1378bf215546Sopenharmony_ci         return PIPE_FORMAT_Y8_UNORM;
1379bf215546Sopenharmony_ci   case PIPE_FORMAT_G8_B8_R8_420_UNORM:
1380bf215546Sopenharmony_ci      return PIPE_FORMAT_R8_UNORM;
1381bf215546Sopenharmony_ci
1382bf215546Sopenharmony_ci   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
1383bf215546Sopenharmony_ci      if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
1384bf215546Sopenharmony_ci         return PIPE_FORMAT_S8_UINT;
1385bf215546Sopenharmony_ci      assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT);
1386bf215546Sopenharmony_ci      return PIPE_FORMAT_Z32_FLOAT;
1387bf215546Sopenharmony_ci
1388bf215546Sopenharmony_ci   default:
1389bf215546Sopenharmony_ci      return format;
1390bf215546Sopenharmony_ci   }
1391bf215546Sopenharmony_ci}
1392bf215546Sopenharmony_ci
1393bf215546Sopenharmony_civoid
1394bf215546Sopenharmony_citu6_clear_lrz(struct tu_cmd_buffer *cmd,
1395bf215546Sopenharmony_ci              struct tu_cs *cs,
1396bf215546Sopenharmony_ci              struct tu_image *image,
1397bf215546Sopenharmony_ci              const VkClearValue *value)
1398bf215546Sopenharmony_ci{
1399bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
1400bf215546Sopenharmony_ci
1401bf215546Sopenharmony_ci   /* It is assumed that LRZ cache is invalidated at this point for
1402bf215546Sopenharmony_ci    * the writes here to become visible to LRZ.
1403bf215546Sopenharmony_ci    *
1404bf215546Sopenharmony_ci    * LRZ writes are going through UCHE cache, flush UCHE before changing
1405bf215546Sopenharmony_ci    * LRZ via CCU. Don't need to invalidate CCU since we are presumably
1406bf215546Sopenharmony_ci    * writing whole cache lines we assume to be 64 bytes.
1407bf215546Sopenharmony_ci    */
1408bf215546Sopenharmony_ci   tu6_emit_event_write(cmd, &cmd->cs, CACHE_FLUSH_TS);
1409bf215546Sopenharmony_ci
1410bf215546Sopenharmony_ci   ops->setup(cmd, cs, PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_Z16_UNORM,
1411bf215546Sopenharmony_ci              VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false,
1412bf215546Sopenharmony_ci              VK_SAMPLE_COUNT_1_BIT);
1413bf215546Sopenharmony_ci   ops->clear_value(cs, PIPE_FORMAT_Z16_UNORM, value);
1414bf215546Sopenharmony_ci   ops->dst_buffer(cs, PIPE_FORMAT_Z16_UNORM,
1415bf215546Sopenharmony_ci                   image->iova + image->lrz_offset,
1416bf215546Sopenharmony_ci                   image->lrz_pitch * 2, PIPE_FORMAT_Z16_UNORM);
1417bf215546Sopenharmony_ci   ops->coords(cs, &(VkOffset2D) {}, NULL, &(VkExtent2D) {image->lrz_pitch, image->lrz_height});
1418bf215546Sopenharmony_ci   ops->run(cmd, cs);
1419bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
1420bf215546Sopenharmony_ci
1421bf215546Sopenharmony_ci   /* Clearing writes via CCU color in the PS stage, and LRZ is read via
1422bf215546Sopenharmony_ci    * UCHE in the earlier GRAS stage.
1423bf215546Sopenharmony_ci    */
1424bf215546Sopenharmony_ci   cmd->state.cache.flush_bits |=
1425bf215546Sopenharmony_ci      TU_CMD_FLAG_CCU_FLUSH_COLOR | TU_CMD_FLAG_CACHE_INVALIDATE |
1426bf215546Sopenharmony_ci      TU_CMD_FLAG_WAIT_FOR_IDLE;
1427bf215546Sopenharmony_ci}
1428bf215546Sopenharmony_ci
1429bf215546Sopenharmony_civoid
1430bf215546Sopenharmony_citu6_dirty_lrz_fc(struct tu_cmd_buffer *cmd,
1431bf215546Sopenharmony_ci                 struct tu_cs *cs,
1432bf215546Sopenharmony_ci                 struct tu_image *image)
1433bf215546Sopenharmony_ci{
1434bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
1435bf215546Sopenharmony_ci   VkClearValue clear = { .color = { .uint32[0] = 0xffffffff } };
1436bf215546Sopenharmony_ci
1437bf215546Sopenharmony_ci   /* LRZ fast-clear buffer is always allocated with 512 bytes size. */
1438bf215546Sopenharmony_ci   ops->setup(cmd, cs, PIPE_FORMAT_R32_UINT, PIPE_FORMAT_R32_UINT,
1439bf215546Sopenharmony_ci              VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false,
1440bf215546Sopenharmony_ci              VK_SAMPLE_COUNT_1_BIT);
1441bf215546Sopenharmony_ci   ops->clear_value(cs, PIPE_FORMAT_R32_UINT, &clear);
1442bf215546Sopenharmony_ci   ops->dst_buffer(cs, PIPE_FORMAT_R32_UINT,
1443bf215546Sopenharmony_ci                   image->iova + image->lrz_fc_offset, 512,
1444bf215546Sopenharmony_ci                   PIPE_FORMAT_R32_UINT);
1445bf215546Sopenharmony_ci   ops->coords(cs, &(VkOffset2D) {}, NULL, &(VkExtent2D) {128, 1});
1446bf215546Sopenharmony_ci   ops->run(cmd, cs);
1447bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
1448bf215546Sopenharmony_ci}
1449bf215546Sopenharmony_ci
1450bf215546Sopenharmony_cistatic void
1451bf215546Sopenharmony_citu_image_view_copy_blit(struct fdl6_view *iview,
1452bf215546Sopenharmony_ci                        struct tu_image *image,
1453bf215546Sopenharmony_ci                        enum pipe_format format,
1454bf215546Sopenharmony_ci                        const VkImageSubresourceLayers *subres,
1455bf215546Sopenharmony_ci                        uint32_t layer,
1456bf215546Sopenharmony_ci                        bool z_scale)
1457bf215546Sopenharmony_ci{
1458bf215546Sopenharmony_ci   VkImageAspectFlags aspect_mask = subres->aspectMask;
1459bf215546Sopenharmony_ci
1460bf215546Sopenharmony_ci   /* always use the AS_R8G8B8A8 format for these */
1461bf215546Sopenharmony_ci   if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT ||
1462bf215546Sopenharmony_ci       format == PIPE_FORMAT_Z24X8_UNORM) {
1463bf215546Sopenharmony_ci      aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
1464bf215546Sopenharmony_ci   }
1465bf215546Sopenharmony_ci
1466bf215546Sopenharmony_ci   const struct fdl_layout *layout =
1467bf215546Sopenharmony_ci      &image->layout[tu6_plane_index(image->vk.format, aspect_mask)];
1468bf215546Sopenharmony_ci
1469bf215546Sopenharmony_ci   fdl6_view_init(iview, &layout, &(struct fdl_view_args) {
1470bf215546Sopenharmony_ci      .iova = image->iova,
1471bf215546Sopenharmony_ci      .base_array_layer = subres->baseArrayLayer + layer,
1472bf215546Sopenharmony_ci      .layer_count = 1,
1473bf215546Sopenharmony_ci      .base_miplevel = subres->mipLevel,
1474bf215546Sopenharmony_ci      .level_count = 1,
1475bf215546Sopenharmony_ci      .format = tu_format_for_aspect(format, aspect_mask),
1476bf215546Sopenharmony_ci      .swiz = {
1477bf215546Sopenharmony_ci         PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W
1478bf215546Sopenharmony_ci      },
1479bf215546Sopenharmony_ci      .type = z_scale ? FDL_VIEW_TYPE_3D : FDL_VIEW_TYPE_2D,
1480bf215546Sopenharmony_ci   }, false);
1481bf215546Sopenharmony_ci}
1482bf215546Sopenharmony_ci
1483bf215546Sopenharmony_cistatic void
1484bf215546Sopenharmony_citu_image_view_copy(struct fdl6_view *iview,
1485bf215546Sopenharmony_ci                   struct tu_image *image,
1486bf215546Sopenharmony_ci                   enum pipe_format format,
1487bf215546Sopenharmony_ci                   const VkImageSubresourceLayers *subres,
1488bf215546Sopenharmony_ci                   uint32_t layer)
1489bf215546Sopenharmony_ci{
1490bf215546Sopenharmony_ci   tu_image_view_copy_blit(iview, image, format, subres, layer, false);
1491bf215546Sopenharmony_ci}
1492bf215546Sopenharmony_ci
1493bf215546Sopenharmony_cistatic void
1494bf215546Sopenharmony_citu_image_view_blit(struct fdl6_view *iview,
1495bf215546Sopenharmony_ci                   struct tu_image *image,
1496bf215546Sopenharmony_ci                   const VkImageSubresourceLayers *subres,
1497bf215546Sopenharmony_ci                   uint32_t layer)
1498bf215546Sopenharmony_ci{
1499bf215546Sopenharmony_ci   enum pipe_format format =
1500bf215546Sopenharmony_ci      tu6_plane_format(image->vk.format, tu6_plane_index(image->vk.format,
1501bf215546Sopenharmony_ci                                                         subres->aspectMask));
1502bf215546Sopenharmony_ci   tu_image_view_copy_blit(iview, image, format, subres, layer, false);
1503bf215546Sopenharmony_ci}
1504bf215546Sopenharmony_ci
1505bf215546Sopenharmony_cistatic void
1506bf215546Sopenharmony_citu6_blit_image(struct tu_cmd_buffer *cmd,
1507bf215546Sopenharmony_ci               struct tu_image *src_image,
1508bf215546Sopenharmony_ci               struct tu_image *dst_image,
1509bf215546Sopenharmony_ci               const VkImageBlit2 *info,
1510bf215546Sopenharmony_ci               VkFilter filter)
1511bf215546Sopenharmony_ci{
1512bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
1513bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->cs;
1514bf215546Sopenharmony_ci   bool z_scale = false;
1515bf215546Sopenharmony_ci   uint32_t layers = info->dstOffsets[1].z - info->dstOffsets[0].z;
1516bf215546Sopenharmony_ci
1517bf215546Sopenharmony_ci   /* 2D blit can't do rotation mirroring from just coordinates */
1518bf215546Sopenharmony_ci   static const enum a6xx_rotation rotate[2][2] = {
1519bf215546Sopenharmony_ci      {ROTATE_0, ROTATE_HFLIP},
1520bf215546Sopenharmony_ci      {ROTATE_VFLIP, ROTATE_180},
1521bf215546Sopenharmony_ci   };
1522bf215546Sopenharmony_ci
1523bf215546Sopenharmony_ci   bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) !=
1524bf215546Sopenharmony_ci                   (info->dstOffsets[1].x < info->dstOffsets[0].x);
1525bf215546Sopenharmony_ci   bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) !=
1526bf215546Sopenharmony_ci                   (info->dstOffsets[1].y < info->dstOffsets[0].y);
1527bf215546Sopenharmony_ci
1528bf215546Sopenharmony_ci   int32_t src0_z = info->srcOffsets[0].z;
1529bf215546Sopenharmony_ci   int32_t src1_z = info->srcOffsets[1].z;
1530bf215546Sopenharmony_ci
1531bf215546Sopenharmony_ci   if ((info->srcOffsets[1].z - info->srcOffsets[0].z !=
1532bf215546Sopenharmony_ci        info->dstOffsets[1].z - info->dstOffsets[0].z) ||
1533bf215546Sopenharmony_ci       info->srcOffsets[1].z < info->srcOffsets[0].z) {
1534bf215546Sopenharmony_ci      z_scale = true;
1535bf215546Sopenharmony_ci   }
1536bf215546Sopenharmony_ci
1537bf215546Sopenharmony_ci   if (info->dstOffsets[1].z < info->dstOffsets[0].z) {
1538bf215546Sopenharmony_ci      layers = info->dstOffsets[0].z - info->dstOffsets[1].z;
1539bf215546Sopenharmony_ci      src0_z = info->srcOffsets[1].z;
1540bf215546Sopenharmony_ci      src1_z = info->srcOffsets[0].z;
1541bf215546Sopenharmony_ci   }
1542bf215546Sopenharmony_ci
1543bf215546Sopenharmony_ci   if (info->dstSubresource.layerCount > 1) {
1544bf215546Sopenharmony_ci      assert(layers <= 1);
1545bf215546Sopenharmony_ci      layers = info->dstSubresource.layerCount;
1546bf215546Sopenharmony_ci   }
1547bf215546Sopenharmony_ci
1548bf215546Sopenharmony_ci   /* BC1_RGB_* formats need to have their last components overriden with 1
1549bf215546Sopenharmony_ci    * when sampling, which is normally handled with the texture descriptor
1550bf215546Sopenharmony_ci    * swizzle. The 2d path can't handle that, so use the 3d path.
1551bf215546Sopenharmony_ci    *
1552bf215546Sopenharmony_ci    * TODO: we could use RB_2D_BLIT_CNTL::MASK to make these formats work with
1553bf215546Sopenharmony_ci    * the 2d path.
1554bf215546Sopenharmony_ci    */
1555bf215546Sopenharmony_ci
1556bf215546Sopenharmony_ci   unsigned blit_param = rotate[mirror_y][mirror_x];
1557bf215546Sopenharmony_ci   if (dst_image->layout[0].nr_samples > 1 ||
1558bf215546Sopenharmony_ci       src_image->vk.format == VK_FORMAT_BC1_RGB_UNORM_BLOCK ||
1559bf215546Sopenharmony_ci       src_image->vk.format == VK_FORMAT_BC1_RGB_SRGB_BLOCK ||
1560bf215546Sopenharmony_ci       filter == VK_FILTER_CUBIC_EXT ||
1561bf215546Sopenharmony_ci       z_scale) {
1562bf215546Sopenharmony_ci      ops = &r3d_ops;
1563bf215546Sopenharmony_ci      blit_param = z_scale;
1564bf215546Sopenharmony_ci   }
1565bf215546Sopenharmony_ci
1566bf215546Sopenharmony_ci   /* use the right format in setup() for D32_S8
1567bf215546Sopenharmony_ci    * TODO: this probably should use a helper
1568bf215546Sopenharmony_ci    */
1569bf215546Sopenharmony_ci   enum pipe_format src_format =
1570bf215546Sopenharmony_ci      tu6_plane_format(src_image->vk.format,
1571bf215546Sopenharmony_ci                       tu6_plane_index(src_image->vk.format,
1572bf215546Sopenharmony_ci                                       info->srcSubresource.aspectMask));
1573bf215546Sopenharmony_ci   enum pipe_format dst_format =
1574bf215546Sopenharmony_ci      tu6_plane_format(dst_image->vk.format,
1575bf215546Sopenharmony_ci                       tu6_plane_index(src_image->vk.format,
1576bf215546Sopenharmony_ci                                       info->srcSubresource.aspectMask));
1577bf215546Sopenharmony_ci   trace_start_blit(&cmd->trace, cs);
1578bf215546Sopenharmony_ci
1579bf215546Sopenharmony_ci   ops->setup(cmd, cs, src_format, dst_format, info->dstSubresource.aspectMask,
1580bf215546Sopenharmony_ci              blit_param, false, dst_image->layout[0].ubwc,
1581bf215546Sopenharmony_ci              dst_image->layout[0].nr_samples);
1582bf215546Sopenharmony_ci
1583bf215546Sopenharmony_ci   if (ops == &r3d_ops) {
1584bf215546Sopenharmony_ci      r3d_coords_raw(cs, (float[]) {
1585bf215546Sopenharmony_ci         info->dstOffsets[0].x, info->dstOffsets[0].y,
1586bf215546Sopenharmony_ci         info->srcOffsets[0].x, info->srcOffsets[0].y,
1587bf215546Sopenharmony_ci         info->dstOffsets[1].x, info->dstOffsets[1].y,
1588bf215546Sopenharmony_ci         info->srcOffsets[1].x, info->srcOffsets[1].y
1589bf215546Sopenharmony_ci      });
1590bf215546Sopenharmony_ci   } else {
1591bf215546Sopenharmony_ci      tu_cs_emit_regs(cs,
1592bf215546Sopenharmony_ci         A6XX_GRAS_2D_DST_TL(.x = MIN2(info->dstOffsets[0].x, info->dstOffsets[1].x),
1593bf215546Sopenharmony_ci                             .y = MIN2(info->dstOffsets[0].y, info->dstOffsets[1].y)),
1594bf215546Sopenharmony_ci         A6XX_GRAS_2D_DST_BR(.x = MAX2(info->dstOffsets[0].x, info->dstOffsets[1].x) - 1,
1595bf215546Sopenharmony_ci                             .y = MAX2(info->dstOffsets[0].y, info->dstOffsets[1].y) - 1));
1596bf215546Sopenharmony_ci      tu_cs_emit_regs(cs,
1597bf215546Sopenharmony_ci         A6XX_GRAS_2D_SRC_TL_X(MIN2(info->srcOffsets[0].x, info->srcOffsets[1].x)),
1598bf215546Sopenharmony_ci         A6XX_GRAS_2D_SRC_BR_X(MAX2(info->srcOffsets[0].x, info->srcOffsets[1].x) - 1),
1599bf215546Sopenharmony_ci         A6XX_GRAS_2D_SRC_TL_Y(MIN2(info->srcOffsets[0].y, info->srcOffsets[1].y)),
1600bf215546Sopenharmony_ci         A6XX_GRAS_2D_SRC_BR_Y(MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1));
1601bf215546Sopenharmony_ci   }
1602bf215546Sopenharmony_ci
1603bf215546Sopenharmony_ci   struct fdl6_view dst, src;
1604bf215546Sopenharmony_ci   tu_image_view_blit(&dst, dst_image, &info->dstSubresource,
1605bf215546Sopenharmony_ci                      MIN2(info->dstOffsets[0].z, info->dstOffsets[1].z));
1606bf215546Sopenharmony_ci
1607bf215546Sopenharmony_ci   if (z_scale) {
1608bf215546Sopenharmony_ci      tu_image_view_copy_blit(&src, src_image, src_format,
1609bf215546Sopenharmony_ci                              &info->srcSubresource, 0, true);
1610bf215546Sopenharmony_ci      ops->src(cmd, cs, &src, 0, filter, dst_format);
1611bf215546Sopenharmony_ci   } else {
1612bf215546Sopenharmony_ci      tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffsets[0].z);
1613bf215546Sopenharmony_ci   }
1614bf215546Sopenharmony_ci
1615bf215546Sopenharmony_ci   for (uint32_t i = 0; i < layers; i++) {
1616bf215546Sopenharmony_ci      if (z_scale) {
1617bf215546Sopenharmony_ci         float t = ((float) i + 0.5f) / (float) layers;
1618bf215546Sopenharmony_ci         r3d_coord_z(cs, t * (src1_z - src0_z) + src0_z);
1619bf215546Sopenharmony_ci      } else {
1620bf215546Sopenharmony_ci         ops->src(cmd, cs, &src, i, filter, dst_format);
1621bf215546Sopenharmony_ci      }
1622bf215546Sopenharmony_ci      ops->dst(cs, &dst, i, src_format);
1623bf215546Sopenharmony_ci      ops->run(cmd, cs);
1624bf215546Sopenharmony_ci   }
1625bf215546Sopenharmony_ci
1626bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
1627bf215546Sopenharmony_ci
1628bf215546Sopenharmony_ci   trace_end_blit(&cmd->trace, cs,
1629bf215546Sopenharmony_ci                  ops == &r3d_ops,
1630bf215546Sopenharmony_ci                  src_image->vk.format,
1631bf215546Sopenharmony_ci                  dst_image->vk.format,
1632bf215546Sopenharmony_ci                  layers);
1633bf215546Sopenharmony_ci}
1634bf215546Sopenharmony_ci
1635bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
1636bf215546Sopenharmony_citu_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
1637bf215546Sopenharmony_ci                    const VkBlitImageInfo2* pBlitImageInfo)
1638bf215546Sopenharmony_ci
1639bf215546Sopenharmony_ci{
1640bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1641bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_image, src_image, pBlitImageInfo->srcImage);
1642bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_image, dst_image, pBlitImageInfo->dstImage);
1643bf215546Sopenharmony_ci
1644bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pBlitImageInfo->regionCount; ++i) {
1645bf215546Sopenharmony_ci      /* can't blit both depth and stencil at once with D32_S8
1646bf215546Sopenharmony_ci       * TODO: more advanced 3D blit path to support it instead?
1647bf215546Sopenharmony_ci       */
1648bf215546Sopenharmony_ci      if (src_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
1649bf215546Sopenharmony_ci          dst_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
1650bf215546Sopenharmony_ci         VkImageBlit2 region = pBlitImageInfo->pRegions[i];
1651bf215546Sopenharmony_ci         u_foreach_bit(b, region.dstSubresource.aspectMask) {
1652bf215546Sopenharmony_ci            region.srcSubresource.aspectMask = BIT(b);
1653bf215546Sopenharmony_ci            region.dstSubresource.aspectMask = BIT(b);
1654bf215546Sopenharmony_ci            tu6_blit_image(cmd, src_image, dst_image, &region, pBlitImageInfo->filter);
1655bf215546Sopenharmony_ci         }
1656bf215546Sopenharmony_ci         continue;
1657bf215546Sopenharmony_ci      }
1658bf215546Sopenharmony_ci      tu6_blit_image(cmd, src_image, dst_image, pBlitImageInfo->pRegions + i,
1659bf215546Sopenharmony_ci                     pBlitImageInfo->filter);
1660bf215546Sopenharmony_ci   }
1661bf215546Sopenharmony_ci
1662bf215546Sopenharmony_ci   if (dst_image->lrz_height) {
1663bf215546Sopenharmony_ci      tu_disable_lrz(cmd, &cmd->cs, dst_image);
1664bf215546Sopenharmony_ci   }
1665bf215546Sopenharmony_ci}
1666bf215546Sopenharmony_ci
1667bf215546Sopenharmony_cistatic void
1668bf215546Sopenharmony_cicopy_compressed(VkFormat format,
1669bf215546Sopenharmony_ci                VkOffset3D *offset,
1670bf215546Sopenharmony_ci                VkExtent3D *extent,
1671bf215546Sopenharmony_ci                uint32_t *width,
1672bf215546Sopenharmony_ci                uint32_t *height)
1673bf215546Sopenharmony_ci{
1674bf215546Sopenharmony_ci   if (!vk_format_is_compressed(format))
1675bf215546Sopenharmony_ci      return;
1676bf215546Sopenharmony_ci
1677bf215546Sopenharmony_ci   uint32_t block_width = vk_format_get_blockwidth(format);
1678bf215546Sopenharmony_ci   uint32_t block_height = vk_format_get_blockheight(format);
1679bf215546Sopenharmony_ci
1680bf215546Sopenharmony_ci   offset->x /= block_width;
1681bf215546Sopenharmony_ci   offset->y /= block_height;
1682bf215546Sopenharmony_ci
1683bf215546Sopenharmony_ci   if (extent) {
1684bf215546Sopenharmony_ci      extent->width = DIV_ROUND_UP(extent->width, block_width);
1685bf215546Sopenharmony_ci      extent->height = DIV_ROUND_UP(extent->height, block_height);
1686bf215546Sopenharmony_ci   }
1687bf215546Sopenharmony_ci   if (width)
1688bf215546Sopenharmony_ci      *width = DIV_ROUND_UP(*width, block_width);
1689bf215546Sopenharmony_ci   if (height)
1690bf215546Sopenharmony_ci      *height = DIV_ROUND_UP(*height, block_height);
1691bf215546Sopenharmony_ci}
1692bf215546Sopenharmony_ci
1693bf215546Sopenharmony_cistatic void
1694bf215546Sopenharmony_citu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
1695bf215546Sopenharmony_ci                        struct tu_buffer *src_buffer,
1696bf215546Sopenharmony_ci                        struct tu_image *dst_image,
1697bf215546Sopenharmony_ci                        const VkBufferImageCopy2 *info)
1698bf215546Sopenharmony_ci{
1699bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->cs;
1700bf215546Sopenharmony_ci   uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount);
1701bf215546Sopenharmony_ci   enum pipe_format src_format =
1702bf215546Sopenharmony_ci      copy_format(dst_image->vk.format, info->imageSubresource.aspectMask);
1703bf215546Sopenharmony_ci   enum pipe_format dst_format =
1704bf215546Sopenharmony_ci      copy_format(dst_image->vk.format, info->imageSubresource.aspectMask);
1705bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
1706bf215546Sopenharmony_ci
1707bf215546Sopenharmony_ci   /* special case for buffer to stencil */
1708bf215546Sopenharmony_ci   if (dst_image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
1709bf215546Sopenharmony_ci       info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1710bf215546Sopenharmony_ci      src_format = PIPE_FORMAT_S8_UINT;
1711bf215546Sopenharmony_ci   }
1712bf215546Sopenharmony_ci
1713bf215546Sopenharmony_ci   /* note: could use "R8_UNORM" when no UBWC */
1714bf215546Sopenharmony_ci   if (src_format == PIPE_FORMAT_Y8_UNORM)
1715bf215546Sopenharmony_ci      ops = &r3d_ops;
1716bf215546Sopenharmony_ci
1717bf215546Sopenharmony_ci   VkOffset3D offset = info->imageOffset;
1718bf215546Sopenharmony_ci   VkExtent3D extent = info->imageExtent;
1719bf215546Sopenharmony_ci   uint32_t src_width = info->bufferRowLength ?: extent.width;
1720bf215546Sopenharmony_ci   uint32_t src_height = info->bufferImageHeight ?: extent.height;
1721bf215546Sopenharmony_ci
1722bf215546Sopenharmony_ci   copy_compressed(dst_image->vk.format, &offset, &extent, &src_width, &src_height);
1723bf215546Sopenharmony_ci
1724bf215546Sopenharmony_ci   uint32_t pitch = src_width * util_format_get_blocksize(src_format);
1725bf215546Sopenharmony_ci   uint32_t layer_size = src_height * pitch;
1726bf215546Sopenharmony_ci
1727bf215546Sopenharmony_ci   ops->setup(cmd, cs, src_format, dst_format,
1728bf215546Sopenharmony_ci              info->imageSubresource.aspectMask, 0, false, dst_image->layout[0].ubwc,
1729bf215546Sopenharmony_ci              dst_image->layout[0].nr_samples);
1730bf215546Sopenharmony_ci
1731bf215546Sopenharmony_ci   struct fdl6_view dst;
1732bf215546Sopenharmony_ci   tu_image_view_copy(&dst, dst_image, dst_format, &info->imageSubresource, offset.z);
1733bf215546Sopenharmony_ci
1734bf215546Sopenharmony_ci   for (uint32_t i = 0; i < layers; i++) {
1735bf215546Sopenharmony_ci      ops->dst(cs, &dst, i, src_format);
1736bf215546Sopenharmony_ci
1737bf215546Sopenharmony_ci      uint64_t src_va = src_buffer->iova + info->bufferOffset + layer_size * i;
1738bf215546Sopenharmony_ci      if ((src_va & 63) || (pitch & 63)) {
1739bf215546Sopenharmony_ci         for (uint32_t y = 0; y < extent.height; y++) {
1740bf215546Sopenharmony_ci            uint32_t x = (src_va & 63) / util_format_get_blocksize(src_format);
1741bf215546Sopenharmony_ci            ops->src_buffer(cmd, cs, src_format, src_va & ~63, pitch,
1742bf215546Sopenharmony_ci                            x + extent.width, 1, dst_format);
1743bf215546Sopenharmony_ci            ops->coords(cs, &(VkOffset2D){offset.x, offset.y + y},  &(VkOffset2D){x},
1744bf215546Sopenharmony_ci                        &(VkExtent2D) {extent.width, 1});
1745bf215546Sopenharmony_ci            ops->run(cmd, cs);
1746bf215546Sopenharmony_ci            src_va += pitch;
1747bf215546Sopenharmony_ci         }
1748bf215546Sopenharmony_ci      } else {
1749bf215546Sopenharmony_ci         ops->src_buffer(cmd, cs, src_format, src_va, pitch, extent.width, extent.height, dst_format);
1750bf215546Sopenharmony_ci         coords(ops, cs, &offset, &(VkOffset3D){}, &extent);
1751bf215546Sopenharmony_ci         ops->run(cmd, cs);
1752bf215546Sopenharmony_ci      }
1753bf215546Sopenharmony_ci   }
1754bf215546Sopenharmony_ci
1755bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
1756bf215546Sopenharmony_ci}
1757bf215546Sopenharmony_ci
1758bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
1759bf215546Sopenharmony_citu_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
1760bf215546Sopenharmony_ci                            const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1761bf215546Sopenharmony_ci{
1762bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1763bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_image, dst_image, pCopyBufferToImageInfo->dstImage);
1764bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
1765bf215546Sopenharmony_ci
1766bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; ++i)
1767bf215546Sopenharmony_ci      tu_copy_buffer_to_image(cmd, src_buffer, dst_image,
1768bf215546Sopenharmony_ci                              pCopyBufferToImageInfo->pRegions + i);
1769bf215546Sopenharmony_ci
1770bf215546Sopenharmony_ci   if (dst_image->lrz_height) {
1771bf215546Sopenharmony_ci      tu_disable_lrz(cmd, &cmd->cs, dst_image);
1772bf215546Sopenharmony_ci   }
1773bf215546Sopenharmony_ci}
1774bf215546Sopenharmony_ci
1775bf215546Sopenharmony_cistatic void
1776bf215546Sopenharmony_citu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
1777bf215546Sopenharmony_ci                        struct tu_image *src_image,
1778bf215546Sopenharmony_ci                        struct tu_buffer *dst_buffer,
1779bf215546Sopenharmony_ci                        const VkBufferImageCopy2 *info)
1780bf215546Sopenharmony_ci{
1781bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->cs;
1782bf215546Sopenharmony_ci   uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount);
1783bf215546Sopenharmony_ci   enum pipe_format dst_format =
1784bf215546Sopenharmony_ci      copy_format(src_image->vk.format, info->imageSubresource.aspectMask);
1785bf215546Sopenharmony_ci   enum pipe_format src_format =
1786bf215546Sopenharmony_ci      copy_format(src_image->vk.format, info->imageSubresource.aspectMask);
1787bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
1788bf215546Sopenharmony_ci
1789bf215546Sopenharmony_ci   if (src_image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
1790bf215546Sopenharmony_ci       info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1791bf215546Sopenharmony_ci      dst_format = PIPE_FORMAT_S8_UINT;
1792bf215546Sopenharmony_ci   }
1793bf215546Sopenharmony_ci
1794bf215546Sopenharmony_ci   /* note: could use "R8_UNORM" when no UBWC */
1795bf215546Sopenharmony_ci   if (dst_format == PIPE_FORMAT_Y8_UNORM)
1796bf215546Sopenharmony_ci      ops = &r3d_ops;
1797bf215546Sopenharmony_ci
1798bf215546Sopenharmony_ci   VkOffset3D offset = info->imageOffset;
1799bf215546Sopenharmony_ci   VkExtent3D extent = info->imageExtent;
1800bf215546Sopenharmony_ci   uint32_t dst_width = info->bufferRowLength ?: extent.width;
1801bf215546Sopenharmony_ci   uint32_t dst_height = info->bufferImageHeight ?: extent.height;
1802bf215546Sopenharmony_ci
1803bf215546Sopenharmony_ci   copy_compressed(src_image->vk.format, &offset, &extent, &dst_width, &dst_height);
1804bf215546Sopenharmony_ci
1805bf215546Sopenharmony_ci   uint32_t pitch = dst_width * util_format_get_blocksize(dst_format);
1806bf215546Sopenharmony_ci   uint32_t layer_size = pitch * dst_height;
1807bf215546Sopenharmony_ci
1808bf215546Sopenharmony_ci   ops->setup(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false,
1809bf215546Sopenharmony_ci              VK_SAMPLE_COUNT_1_BIT);
1810bf215546Sopenharmony_ci
1811bf215546Sopenharmony_ci   struct fdl6_view src;
1812bf215546Sopenharmony_ci   tu_image_view_copy(&src, src_image, src_format, &info->imageSubresource, offset.z);
1813bf215546Sopenharmony_ci
1814bf215546Sopenharmony_ci   for (uint32_t i = 0; i < layers; i++) {
1815bf215546Sopenharmony_ci      ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, dst_format);
1816bf215546Sopenharmony_ci
1817bf215546Sopenharmony_ci      uint64_t dst_va = dst_buffer->iova + info->bufferOffset + layer_size * i;
1818bf215546Sopenharmony_ci      if ((dst_va & 63) || (pitch & 63)) {
1819bf215546Sopenharmony_ci         for (uint32_t y = 0; y < extent.height; y++) {
1820bf215546Sopenharmony_ci            uint32_t x = (dst_va & 63) / util_format_get_blocksize(dst_format);
1821bf215546Sopenharmony_ci            ops->dst_buffer(cs, dst_format, dst_va & ~63, 0, src_format);
1822bf215546Sopenharmony_ci            ops->coords(cs, &(VkOffset2D) {x}, &(VkOffset2D){offset.x, offset.y + y},
1823bf215546Sopenharmony_ci                        &(VkExtent2D) {extent.width, 1});
1824bf215546Sopenharmony_ci            ops->run(cmd, cs);
1825bf215546Sopenharmony_ci            dst_va += pitch;
1826bf215546Sopenharmony_ci         }
1827bf215546Sopenharmony_ci      } else {
1828bf215546Sopenharmony_ci         ops->dst_buffer(cs, dst_format, dst_va, pitch, src_format);
1829bf215546Sopenharmony_ci         coords(ops, cs, &(VkOffset3D) {0, 0}, &offset, &extent);
1830bf215546Sopenharmony_ci         ops->run(cmd, cs);
1831bf215546Sopenharmony_ci      }
1832bf215546Sopenharmony_ci   }
1833bf215546Sopenharmony_ci
1834bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
1835bf215546Sopenharmony_ci}
1836bf215546Sopenharmony_ci
1837bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
1838bf215546Sopenharmony_citu_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
1839bf215546Sopenharmony_ci                            const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo)
1840bf215546Sopenharmony_ci{
1841bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1842bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_image, src_image, pCopyImageToBufferInfo->srcImage);
1843bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
1844bf215546Sopenharmony_ci
1845bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; ++i)
1846bf215546Sopenharmony_ci      tu_copy_image_to_buffer(cmd, src_image, dst_buffer,
1847bf215546Sopenharmony_ci                              pCopyImageToBufferInfo->pRegions + i);
1848bf215546Sopenharmony_ci}
1849bf215546Sopenharmony_ci
1850bf215546Sopenharmony_ci/* Tiled formats don't support swapping, which means that we can't support
1851bf215546Sopenharmony_ci * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some
1852bf215546Sopenharmony_ci * formats like B5G5R5A1 have a separate linear-only format when sampling.
1853bf215546Sopenharmony_ci * Currently we fake support for tiled swapped formats and use the unswapped
1854bf215546Sopenharmony_ci * format instead, but this means that reinterpreting copies to and from
1855bf215546Sopenharmony_ci * swapped formats can't be performed correctly unless we can swizzle the
1856bf215546Sopenharmony_ci * components by reinterpreting the other image as the "correct" swapped
1857bf215546Sopenharmony_ci * format, i.e. only when the other image is linear.
1858bf215546Sopenharmony_ci */
1859bf215546Sopenharmony_ci
1860bf215546Sopenharmony_cistatic bool
1861bf215546Sopenharmony_ciis_swapped_format(enum pipe_format format)
1862bf215546Sopenharmony_ci{
1863bf215546Sopenharmony_ci   struct tu_native_format linear = tu6_format_texture(format, TILE6_LINEAR);
1864bf215546Sopenharmony_ci   struct tu_native_format tiled = tu6_format_texture(format, TILE6_3);
1865bf215546Sopenharmony_ci   return linear.fmt != tiled.fmt || linear.swap != tiled.swap;
1866bf215546Sopenharmony_ci}
1867bf215546Sopenharmony_ci
1868bf215546Sopenharmony_ci/* R8G8_* formats have a different tiling layout than other cpp=2 formats, and
1869bf215546Sopenharmony_ci * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice
1870bf215546Sopenharmony_ci * versa). This should mirror the logic in fdl6_layout.
1871bf215546Sopenharmony_ci */
1872bf215546Sopenharmony_cistatic bool
1873bf215546Sopenharmony_ciimage_is_r8g8(struct tu_image *image)
1874bf215546Sopenharmony_ci{
1875bf215546Sopenharmony_ci   return image->layout[0].cpp == 2 &&
1876bf215546Sopenharmony_ci      vk_format_get_nr_components(image->vk.format) == 2;
1877bf215546Sopenharmony_ci}
1878bf215546Sopenharmony_ci
1879bf215546Sopenharmony_cistatic void
1880bf215546Sopenharmony_citu_copy_image_to_image(struct tu_cmd_buffer *cmd,
1881bf215546Sopenharmony_ci                       struct tu_image *src_image,
1882bf215546Sopenharmony_ci                       struct tu_image *dst_image,
1883bf215546Sopenharmony_ci                       const VkImageCopy2 *info)
1884bf215546Sopenharmony_ci{
1885bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
1886bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->cs;
1887bf215546Sopenharmony_ci
1888bf215546Sopenharmony_ci   if (dst_image->layout[0].nr_samples > 1)
1889bf215546Sopenharmony_ci      ops = &r3d_ops;
1890bf215546Sopenharmony_ci
1891bf215546Sopenharmony_ci   enum pipe_format format = PIPE_FORMAT_NONE;
1892bf215546Sopenharmony_ci   VkOffset3D src_offset = info->srcOffset;
1893bf215546Sopenharmony_ci   VkOffset3D dst_offset = info->dstOffset;
1894bf215546Sopenharmony_ci   VkExtent3D extent = info->extent;
1895bf215546Sopenharmony_ci   uint32_t layers_to_copy = MAX2(info->extent.depth, info->srcSubresource.layerCount);
1896bf215546Sopenharmony_ci
1897bf215546Sopenharmony_ci   /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between
1898bf215546Sopenharmony_ci    * Images":
1899bf215546Sopenharmony_ci    *
1900bf215546Sopenharmony_ci    *    When copying between compressed and uncompressed formats the extent
1901bf215546Sopenharmony_ci    *    members represent the texel dimensions of the source image and not
1902bf215546Sopenharmony_ci    *    the destination. When copying from a compressed image to an
1903bf215546Sopenharmony_ci    *    uncompressed image the image texel dimensions written to the
1904bf215546Sopenharmony_ci    *    uncompressed image will be source extent divided by the compressed
1905bf215546Sopenharmony_ci    *    texel block dimensions. When copying from an uncompressed image to a
1906bf215546Sopenharmony_ci    *    compressed image the image texel dimensions written to the compressed
1907bf215546Sopenharmony_ci    *    image will be the source extent multiplied by the compressed texel
1908bf215546Sopenharmony_ci    *    block dimensions.
1909bf215546Sopenharmony_ci    *
1910bf215546Sopenharmony_ci    * This means we only have to adjust the extent if the source image is
1911bf215546Sopenharmony_ci    * compressed.
1912bf215546Sopenharmony_ci    */
1913bf215546Sopenharmony_ci   copy_compressed(src_image->vk.format, &src_offset, &extent, NULL, NULL);
1914bf215546Sopenharmony_ci   copy_compressed(dst_image->vk.format, &dst_offset, NULL, NULL, NULL);
1915bf215546Sopenharmony_ci
1916bf215546Sopenharmony_ci   enum pipe_format dst_format = copy_format(dst_image->vk.format, info->dstSubresource.aspectMask);
1917bf215546Sopenharmony_ci   enum pipe_format src_format = copy_format(src_image->vk.format, info->srcSubresource.aspectMask);
1918bf215546Sopenharmony_ci
1919bf215546Sopenharmony_ci   /* note: could use "R8_UNORM" when no UBWC */
1920bf215546Sopenharmony_ci   if (dst_format == PIPE_FORMAT_Y8_UNORM ||
1921bf215546Sopenharmony_ci       src_format == PIPE_FORMAT_Y8_UNORM)
1922bf215546Sopenharmony_ci      ops = &r3d_ops;
1923bf215546Sopenharmony_ci
1924bf215546Sopenharmony_ci   bool use_staging_blit = false;
1925bf215546Sopenharmony_ci
1926bf215546Sopenharmony_ci   if (src_format == dst_format) {
1927bf215546Sopenharmony_ci      /* Images that share a format can always be copied directly because it's
1928bf215546Sopenharmony_ci       * the same as a blit.
1929bf215546Sopenharmony_ci       */
1930bf215546Sopenharmony_ci      format = src_format;
1931bf215546Sopenharmony_ci   } else if (!src_image->layout[0].tile_mode) {
1932bf215546Sopenharmony_ci      /* If an image is linear, we can always safely reinterpret it with the
1933bf215546Sopenharmony_ci       * other image's format and then do a regular blit.
1934bf215546Sopenharmony_ci       */
1935bf215546Sopenharmony_ci      format = dst_format;
1936bf215546Sopenharmony_ci   } else if (!dst_image->layout[0].tile_mode) {
1937bf215546Sopenharmony_ci      format = src_format;
1938bf215546Sopenharmony_ci   } else if (image_is_r8g8(src_image) != image_is_r8g8(dst_image)) {
1939bf215546Sopenharmony_ci      /* We can't currently copy r8g8 images to/from other cpp=2 images,
1940bf215546Sopenharmony_ci       * due to the different tile layout.
1941bf215546Sopenharmony_ci       */
1942bf215546Sopenharmony_ci      use_staging_blit = true;
1943bf215546Sopenharmony_ci   } else if (is_swapped_format(src_format) ||
1944bf215546Sopenharmony_ci              is_swapped_format(dst_format)) {
1945bf215546Sopenharmony_ci      /* If either format has a non-identity swap, then we can't copy
1946bf215546Sopenharmony_ci       * to/from it.
1947bf215546Sopenharmony_ci       */
1948bf215546Sopenharmony_ci      use_staging_blit = true;
1949bf215546Sopenharmony_ci   } else if (!src_image->layout[0].ubwc) {
1950bf215546Sopenharmony_ci      format = dst_format;
1951bf215546Sopenharmony_ci   } else if (!dst_image->layout[0].ubwc) {
1952bf215546Sopenharmony_ci      format = src_format;
1953bf215546Sopenharmony_ci   } else {
1954bf215546Sopenharmony_ci      /* Both formats use UBWC and so neither can be reinterpreted.
1955bf215546Sopenharmony_ci       * TODO: We could do an in-place decompression of the dst instead.
1956bf215546Sopenharmony_ci       */
1957bf215546Sopenharmony_ci      perf_debug(cmd->device, "TODO: Do in-place UBWC decompression for UBWC->UBWC blits");
1958bf215546Sopenharmony_ci      use_staging_blit = true;
1959bf215546Sopenharmony_ci   }
1960bf215546Sopenharmony_ci
1961bf215546Sopenharmony_ci   struct fdl6_view dst, src;
1962bf215546Sopenharmony_ci
1963bf215546Sopenharmony_ci   if (use_staging_blit) {
1964bf215546Sopenharmony_ci      tu_image_view_copy(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z);
1965bf215546Sopenharmony_ci      tu_image_view_copy(&src, src_image, src_format, &info->srcSubresource, src_offset.z);
1966bf215546Sopenharmony_ci
1967bf215546Sopenharmony_ci      struct fdl_layout staging_layout = { 0 };
1968bf215546Sopenharmony_ci      VkOffset3D staging_offset = { 0 };
1969bf215546Sopenharmony_ci
1970bf215546Sopenharmony_ci      staging_layout.tile_mode = TILE6_LINEAR;
1971bf215546Sopenharmony_ci      staging_layout.ubwc = false;
1972bf215546Sopenharmony_ci
1973bf215546Sopenharmony_ci      fdl6_layout(&staging_layout,
1974bf215546Sopenharmony_ci                  src_format,
1975bf215546Sopenharmony_ci                  src_image->layout[0].nr_samples,
1976bf215546Sopenharmony_ci                  extent.width,
1977bf215546Sopenharmony_ci                  extent.height,
1978bf215546Sopenharmony_ci                  extent.depth,
1979bf215546Sopenharmony_ci                  1,
1980bf215546Sopenharmony_ci                  info->srcSubresource.layerCount,
1981bf215546Sopenharmony_ci                  extent.depth > 1,
1982bf215546Sopenharmony_ci                  NULL);
1983bf215546Sopenharmony_ci
1984bf215546Sopenharmony_ci      struct tu_bo *staging_bo;
1985bf215546Sopenharmony_ci      VkResult result = tu_get_scratch_bo(cmd->device,
1986bf215546Sopenharmony_ci                                          staging_layout.size,
1987bf215546Sopenharmony_ci                                          &staging_bo);
1988bf215546Sopenharmony_ci      if (result != VK_SUCCESS) {
1989bf215546Sopenharmony_ci         cmd->record_result = result;
1990bf215546Sopenharmony_ci         return;
1991bf215546Sopenharmony_ci      }
1992bf215546Sopenharmony_ci
1993bf215546Sopenharmony_ci      struct fdl6_view staging;
1994bf215546Sopenharmony_ci      const struct fdl_layout *staging_layout_ptr = &staging_layout;
1995bf215546Sopenharmony_ci      fdl6_view_init(&staging, &staging_layout_ptr, &(struct fdl_view_args) {
1996bf215546Sopenharmony_ci         .iova = staging_bo->iova,
1997bf215546Sopenharmony_ci         .base_array_layer = 0,
1998bf215546Sopenharmony_ci         .layer_count = 1,
1999bf215546Sopenharmony_ci         .base_miplevel = 0,
2000bf215546Sopenharmony_ci         .level_count = info->srcSubresource.layerCount,
2001bf215546Sopenharmony_ci         .format = tu_format_for_aspect(src_format, VK_IMAGE_ASPECT_COLOR_BIT),
2002bf215546Sopenharmony_ci         .swiz = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
2003bf215546Sopenharmony_ci         .type = FDL_VIEW_TYPE_2D,
2004bf215546Sopenharmony_ci      }, false);
2005bf215546Sopenharmony_ci
2006bf215546Sopenharmony_ci      ops->setup(cmd, cs, src_format, src_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false,
2007bf215546Sopenharmony_ci                 dst_image->layout[0].nr_samples);
2008bf215546Sopenharmony_ci      coords(ops, cs, &staging_offset, &src_offset, &extent);
2009bf215546Sopenharmony_ci
2010bf215546Sopenharmony_ci      for (uint32_t i = 0; i < layers_to_copy; i++) {
2011bf215546Sopenharmony_ci         ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, src_format);
2012bf215546Sopenharmony_ci         ops->dst(cs, &staging, i, src_format);
2013bf215546Sopenharmony_ci         ops->run(cmd, cs);
2014bf215546Sopenharmony_ci      }
2015bf215546Sopenharmony_ci
2016bf215546Sopenharmony_ci      /* When executed by the user there has to be a pipeline barrier here,
2017bf215546Sopenharmony_ci       * but since we're doing it manually we'll have to flush ourselves.
2018bf215546Sopenharmony_ci       */
2019bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
2020bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
2021bf215546Sopenharmony_ci      tu_cs_emit_wfi(cs);
2022bf215546Sopenharmony_ci
2023bf215546Sopenharmony_ci      fdl6_view_init(&staging, &staging_layout_ptr, &(struct fdl_view_args) {
2024bf215546Sopenharmony_ci         .iova = staging_bo->iova,
2025bf215546Sopenharmony_ci         .base_array_layer = 0,
2026bf215546Sopenharmony_ci         .layer_count = 1,
2027bf215546Sopenharmony_ci         .base_miplevel = 0,
2028bf215546Sopenharmony_ci         .level_count = info->srcSubresource.layerCount,
2029bf215546Sopenharmony_ci         .format = tu_format_for_aspect(dst_format, VK_IMAGE_ASPECT_COLOR_BIT),
2030bf215546Sopenharmony_ci         .swiz = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
2031bf215546Sopenharmony_ci         .type = FDL_VIEW_TYPE_2D,
2032bf215546Sopenharmony_ci      }, false);
2033bf215546Sopenharmony_ci
2034bf215546Sopenharmony_ci      ops->setup(cmd, cs, dst_format, dst_format, info->dstSubresource.aspectMask,
2035bf215546Sopenharmony_ci                 0, false, dst_image->layout[0].ubwc,
2036bf215546Sopenharmony_ci                 dst_image->layout[0].nr_samples);
2037bf215546Sopenharmony_ci      coords(ops, cs, &dst_offset, &staging_offset, &extent);
2038bf215546Sopenharmony_ci
2039bf215546Sopenharmony_ci      for (uint32_t i = 0; i < layers_to_copy; i++) {
2040bf215546Sopenharmony_ci         ops->src(cmd, cs, &staging, i, VK_FILTER_NEAREST, dst_format);
2041bf215546Sopenharmony_ci         ops->dst(cs, &dst, i, dst_format);
2042bf215546Sopenharmony_ci         ops->run(cmd, cs);
2043bf215546Sopenharmony_ci      }
2044bf215546Sopenharmony_ci   } else {
2045bf215546Sopenharmony_ci      tu_image_view_copy(&dst, dst_image, format, &info->dstSubresource, dst_offset.z);
2046bf215546Sopenharmony_ci      tu_image_view_copy(&src, src_image, format, &info->srcSubresource, src_offset.z);
2047bf215546Sopenharmony_ci
2048bf215546Sopenharmony_ci      ops->setup(cmd, cs, format, format, info->dstSubresource.aspectMask,
2049bf215546Sopenharmony_ci                 0, false, dst_image->layout[0].ubwc,
2050bf215546Sopenharmony_ci                 dst_image->layout[0].nr_samples);
2051bf215546Sopenharmony_ci      coords(ops, cs, &dst_offset, &src_offset, &extent);
2052bf215546Sopenharmony_ci
2053bf215546Sopenharmony_ci      for (uint32_t i = 0; i < layers_to_copy; i++) {
2054bf215546Sopenharmony_ci         ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, format);
2055bf215546Sopenharmony_ci         ops->dst(cs, &dst, i, format);
2056bf215546Sopenharmony_ci         ops->run(cmd, cs);
2057bf215546Sopenharmony_ci      }
2058bf215546Sopenharmony_ci   }
2059bf215546Sopenharmony_ci
2060bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
2061bf215546Sopenharmony_ci}
2062bf215546Sopenharmony_ci
2063bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
2064bf215546Sopenharmony_citu_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
2065bf215546Sopenharmony_ci                    const VkCopyImageInfo2* pCopyImageInfo)
2066bf215546Sopenharmony_ci{
2067bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2068bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_image, src_image, pCopyImageInfo->srcImage);
2069bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_image, dst_image, pCopyImageInfo->dstImage);
2070bf215546Sopenharmony_ci
2071bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pCopyImageInfo->regionCount; ++i) {
2072bf215546Sopenharmony_ci      if (src_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
2073bf215546Sopenharmony_ci         VkImageCopy2 info = pCopyImageInfo->pRegions[i];
2074bf215546Sopenharmony_ci         u_foreach_bit(b, info.dstSubresource.aspectMask) {
2075bf215546Sopenharmony_ci            info.srcSubresource.aspectMask = BIT(b);
2076bf215546Sopenharmony_ci            info.dstSubresource.aspectMask = BIT(b);
2077bf215546Sopenharmony_ci            tu_copy_image_to_image(cmd, src_image, dst_image, &info);
2078bf215546Sopenharmony_ci         }
2079bf215546Sopenharmony_ci         continue;
2080bf215546Sopenharmony_ci      }
2081bf215546Sopenharmony_ci
2082bf215546Sopenharmony_ci      tu_copy_image_to_image(cmd, src_image, dst_image,
2083bf215546Sopenharmony_ci                             pCopyImageInfo->pRegions + i);
2084bf215546Sopenharmony_ci   }
2085bf215546Sopenharmony_ci
2086bf215546Sopenharmony_ci   if (dst_image->lrz_height) {
2087bf215546Sopenharmony_ci      tu_disable_lrz(cmd, &cmd->cs, dst_image);
2088bf215546Sopenharmony_ci   }
2089bf215546Sopenharmony_ci}
2090bf215546Sopenharmony_ci
2091bf215546Sopenharmony_cistatic void
2092bf215546Sopenharmony_cicopy_buffer(struct tu_cmd_buffer *cmd,
2093bf215546Sopenharmony_ci            uint64_t dst_va,
2094bf215546Sopenharmony_ci            uint64_t src_va,
2095bf215546Sopenharmony_ci            uint64_t size,
2096bf215546Sopenharmony_ci            uint32_t block_size)
2097bf215546Sopenharmony_ci{
2098bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
2099bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->cs;
2100bf215546Sopenharmony_ci   enum pipe_format format = block_size == 4 ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R8_UNORM;
2101bf215546Sopenharmony_ci   uint64_t blocks = size / block_size;
2102bf215546Sopenharmony_ci
2103bf215546Sopenharmony_ci   ops->setup(cmd, cs, format, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false,
2104bf215546Sopenharmony_ci              VK_SAMPLE_COUNT_1_BIT);
2105bf215546Sopenharmony_ci
2106bf215546Sopenharmony_ci   while (blocks) {
2107bf215546Sopenharmony_ci      uint32_t src_x = (src_va & 63) / block_size;
2108bf215546Sopenharmony_ci      uint32_t dst_x = (dst_va & 63) / block_size;
2109bf215546Sopenharmony_ci      uint32_t width = MIN2(MIN2(blocks, 0x4000 - src_x), 0x4000 - dst_x);
2110bf215546Sopenharmony_ci
2111bf215546Sopenharmony_ci      ops->src_buffer(cmd, cs, format, src_va & ~63, 0, src_x + width, 1, format);
2112bf215546Sopenharmony_ci      ops->dst_buffer(     cs, format, dst_va & ~63, 0, format);
2113bf215546Sopenharmony_ci      ops->coords(cs, &(VkOffset2D) {dst_x}, &(VkOffset2D) {src_x}, &(VkExtent2D) {width, 1});
2114bf215546Sopenharmony_ci      ops->run(cmd, cs);
2115bf215546Sopenharmony_ci
2116bf215546Sopenharmony_ci      src_va += width * block_size;
2117bf215546Sopenharmony_ci      dst_va += width * block_size;
2118bf215546Sopenharmony_ci      blocks -= width;
2119bf215546Sopenharmony_ci   }
2120bf215546Sopenharmony_ci
2121bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
2122bf215546Sopenharmony_ci}
2123bf215546Sopenharmony_ci
2124bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
2125bf215546Sopenharmony_citu_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
2126bf215546Sopenharmony_ci                     const VkCopyBufferInfo2 *pCopyBufferInfo)
2127bf215546Sopenharmony_ci{
2128bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2129bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
2130bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
2131bf215546Sopenharmony_ci
2132bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) {
2133bf215546Sopenharmony_ci      const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[i];
2134bf215546Sopenharmony_ci      copy_buffer(cmd,
2135bf215546Sopenharmony_ci                  dst_buffer->iova + region->dstOffset,
2136bf215546Sopenharmony_ci                  src_buffer->iova + region->srcOffset,
2137bf215546Sopenharmony_ci                  region->size, 1);
2138bf215546Sopenharmony_ci   }
2139bf215546Sopenharmony_ci}
2140bf215546Sopenharmony_ci
2141bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
2142bf215546Sopenharmony_citu_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
2143bf215546Sopenharmony_ci                   VkBuffer dstBuffer,
2144bf215546Sopenharmony_ci                   VkDeviceSize dstOffset,
2145bf215546Sopenharmony_ci                   VkDeviceSize dataSize,
2146bf215546Sopenharmony_ci                   const void *pData)
2147bf215546Sopenharmony_ci{
2148bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2149bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
2150bf215546Sopenharmony_ci
2151bf215546Sopenharmony_ci   struct tu_cs_memory tmp;
2152bf215546Sopenharmony_ci   VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64 / 4, &tmp);
2153bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
2154bf215546Sopenharmony_ci      cmd->record_result = result;
2155bf215546Sopenharmony_ci      return;
2156bf215546Sopenharmony_ci   }
2157bf215546Sopenharmony_ci
2158bf215546Sopenharmony_ci   memcpy(tmp.map, pData, dataSize);
2159bf215546Sopenharmony_ci   copy_buffer(cmd, buffer->iova + dstOffset, tmp.iova, dataSize, 4);
2160bf215546Sopenharmony_ci}
2161bf215546Sopenharmony_ci
2162bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
2163bf215546Sopenharmony_citu_CmdFillBuffer(VkCommandBuffer commandBuffer,
2164bf215546Sopenharmony_ci                 VkBuffer dstBuffer,
2165bf215546Sopenharmony_ci                 VkDeviceSize dstOffset,
2166bf215546Sopenharmony_ci                 VkDeviceSize fillSize,
2167bf215546Sopenharmony_ci                 uint32_t data)
2168bf215546Sopenharmony_ci{
2169bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2170bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
2171bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
2172bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->cs;
2173bf215546Sopenharmony_ci
2174bf215546Sopenharmony_ci   if (fillSize == VK_WHOLE_SIZE)
2175bf215546Sopenharmony_ci      fillSize = buffer->size - dstOffset;
2176bf215546Sopenharmony_ci
2177bf215546Sopenharmony_ci   uint64_t dst_va = buffer->iova + dstOffset;
2178bf215546Sopenharmony_ci   uint32_t blocks = fillSize / 4;
2179bf215546Sopenharmony_ci
2180bf215546Sopenharmony_ci   ops->setup(cmd, cs, PIPE_FORMAT_R32_UINT, PIPE_FORMAT_R32_UINT,
2181bf215546Sopenharmony_ci              VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false,
2182bf215546Sopenharmony_ci              VK_SAMPLE_COUNT_1_BIT);
2183bf215546Sopenharmony_ci   ops->clear_value(cs, PIPE_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}});
2184bf215546Sopenharmony_ci
2185bf215546Sopenharmony_ci   while (blocks) {
2186bf215546Sopenharmony_ci      uint32_t dst_x = (dst_va & 63) / 4;
2187bf215546Sopenharmony_ci      uint32_t width = MIN2(blocks, 0x4000 - dst_x);
2188bf215546Sopenharmony_ci
2189bf215546Sopenharmony_ci      ops->dst_buffer(cs, PIPE_FORMAT_R32_UINT, dst_va & ~63, 0, PIPE_FORMAT_R32_UINT);
2190bf215546Sopenharmony_ci      ops->coords(cs, &(VkOffset2D) {dst_x}, NULL, &(VkExtent2D) {width, 1});
2191bf215546Sopenharmony_ci      ops->run(cmd, cs);
2192bf215546Sopenharmony_ci
2193bf215546Sopenharmony_ci      dst_va += width * 4;
2194bf215546Sopenharmony_ci      blocks -= width;
2195bf215546Sopenharmony_ci   }
2196bf215546Sopenharmony_ci
2197bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
2198bf215546Sopenharmony_ci}
2199bf215546Sopenharmony_ci
2200bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
2201bf215546Sopenharmony_citu_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
2202bf215546Sopenharmony_ci                       const VkResolveImageInfo2* pResolveImageInfo)
2203bf215546Sopenharmony_ci{
2204bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2205bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_image, src_image, pResolveImageInfo->srcImage);
2206bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_image, dst_image, pResolveImageInfo->dstImage);
2207bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
2208bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->cs;
2209bf215546Sopenharmony_ci
2210bf215546Sopenharmony_ci   enum pipe_format src_format =
2211bf215546Sopenharmony_ci      tu_vk_format_to_pipe_format(src_image->vk.format);
2212bf215546Sopenharmony_ci   enum pipe_format dst_format =
2213bf215546Sopenharmony_ci      tu_vk_format_to_pipe_format(dst_image->vk.format);
2214bf215546Sopenharmony_ci   ops->setup(cmd, cs, src_format, dst_format,
2215bf215546Sopenharmony_ci              VK_IMAGE_ASPECT_COLOR_BIT, 0, false, dst_image->layout[0].ubwc,
2216bf215546Sopenharmony_ci              VK_SAMPLE_COUNT_1_BIT);
2217bf215546Sopenharmony_ci
2218bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pResolveImageInfo->regionCount; ++i) {
2219bf215546Sopenharmony_ci      const VkImageResolve2 *info = &pResolveImageInfo->pRegions[i];
2220bf215546Sopenharmony_ci      uint32_t layers = MAX2(info->extent.depth, info->dstSubresource.layerCount);
2221bf215546Sopenharmony_ci
2222bf215546Sopenharmony_ci      assert(info->srcSubresource.layerCount == info->dstSubresource.layerCount);
2223bf215546Sopenharmony_ci      /* TODO: aspect masks possible ? */
2224bf215546Sopenharmony_ci
2225bf215546Sopenharmony_ci      coords(ops, cs, &info->dstOffset, &info->srcOffset, &info->extent);
2226bf215546Sopenharmony_ci
2227bf215546Sopenharmony_ci      struct fdl6_view dst, src;
2228bf215546Sopenharmony_ci      tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffset.z);
2229bf215546Sopenharmony_ci      tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffset.z);
2230bf215546Sopenharmony_ci
2231bf215546Sopenharmony_ci      for (uint32_t i = 0; i < layers; i++) {
2232bf215546Sopenharmony_ci         ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, dst_format);
2233bf215546Sopenharmony_ci         ops->dst(cs, &dst, i, src_format);
2234bf215546Sopenharmony_ci         ops->run(cmd, cs);
2235bf215546Sopenharmony_ci      }
2236bf215546Sopenharmony_ci   }
2237bf215546Sopenharmony_ci
2238bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
2239bf215546Sopenharmony_ci}
2240bf215546Sopenharmony_ci
2241bf215546Sopenharmony_ci#define for_each_layer(layer, layer_mask, layers) \
2242bf215546Sopenharmony_ci   for (uint32_t layer = 0; \
2243bf215546Sopenharmony_ci        layer < ((layer_mask) ? (util_logbase2(layer_mask) + 1) : layers); \
2244bf215546Sopenharmony_ci        layer++) \
2245bf215546Sopenharmony_ci      if (!layer_mask || (layer_mask & BIT(layer)))
2246bf215546Sopenharmony_ci
2247bf215546Sopenharmony_cistatic void
2248bf215546Sopenharmony_ciresolve_sysmem(struct tu_cmd_buffer *cmd,
2249bf215546Sopenharmony_ci               struct tu_cs *cs,
2250bf215546Sopenharmony_ci               VkFormat vk_src_format,
2251bf215546Sopenharmony_ci               VkFormat vk_dst_format,
2252bf215546Sopenharmony_ci               const struct tu_image_view *src,
2253bf215546Sopenharmony_ci               const struct tu_image_view *dst,
2254bf215546Sopenharmony_ci               uint32_t layer_mask,
2255bf215546Sopenharmony_ci               uint32_t layers,
2256bf215546Sopenharmony_ci               const VkRect2D *rect,
2257bf215546Sopenharmony_ci               bool src_separate_ds,
2258bf215546Sopenharmony_ci               bool dst_separate_ds)
2259bf215546Sopenharmony_ci{
2260bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
2261bf215546Sopenharmony_ci
2262bf215546Sopenharmony_ci   trace_start_sysmem_resolve(&cmd->trace, cs);
2263bf215546Sopenharmony_ci
2264bf215546Sopenharmony_ci   enum pipe_format src_format = tu_vk_format_to_pipe_format(vk_src_format);
2265bf215546Sopenharmony_ci   enum pipe_format dst_format = tu_vk_format_to_pipe_format(vk_dst_format);
2266bf215546Sopenharmony_ci
2267bf215546Sopenharmony_ci   ops->setup(cmd, cs, src_format, dst_format,
2268bf215546Sopenharmony_ci              VK_IMAGE_ASPECT_COLOR_BIT, 0, false, dst->view.ubwc_enabled,
2269bf215546Sopenharmony_ci              VK_SAMPLE_COUNT_1_BIT);
2270bf215546Sopenharmony_ci   ops->coords(cs, &rect->offset, &rect->offset, &rect->extent);
2271bf215546Sopenharmony_ci
2272bf215546Sopenharmony_ci   for_each_layer(i, layer_mask, layers) {
2273bf215546Sopenharmony_ci      if (src_separate_ds) {
2274bf215546Sopenharmony_ci         if (vk_src_format == VK_FORMAT_D32_SFLOAT) {
2275bf215546Sopenharmony_ci            r2d_src_depth(cmd, cs, src, i, VK_FILTER_NEAREST);
2276bf215546Sopenharmony_ci         } else {
2277bf215546Sopenharmony_ci            r2d_src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST);
2278bf215546Sopenharmony_ci         }
2279bf215546Sopenharmony_ci      } else {
2280bf215546Sopenharmony_ci         ops->src(cmd, cs, &src->view, i, VK_FILTER_NEAREST, dst_format);
2281bf215546Sopenharmony_ci      }
2282bf215546Sopenharmony_ci
2283bf215546Sopenharmony_ci      if (dst_separate_ds) {
2284bf215546Sopenharmony_ci         if (vk_dst_format == VK_FORMAT_D32_SFLOAT) {
2285bf215546Sopenharmony_ci            ops->dst_depth(cs, dst, i);
2286bf215546Sopenharmony_ci         } else {
2287bf215546Sopenharmony_ci            ops->dst_stencil(cs, dst, i);
2288bf215546Sopenharmony_ci         }
2289bf215546Sopenharmony_ci      } else {
2290bf215546Sopenharmony_ci         ops->dst(cs, &dst->view, i, src_format);
2291bf215546Sopenharmony_ci      }
2292bf215546Sopenharmony_ci
2293bf215546Sopenharmony_ci      ops->run(cmd, cs);
2294bf215546Sopenharmony_ci   }
2295bf215546Sopenharmony_ci
2296bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
2297bf215546Sopenharmony_ci
2298bf215546Sopenharmony_ci   trace_end_sysmem_resolve(&cmd->trace, cs, vk_dst_format);
2299bf215546Sopenharmony_ci}
2300bf215546Sopenharmony_ci
2301bf215546Sopenharmony_civoid
2302bf215546Sopenharmony_citu_resolve_sysmem(struct tu_cmd_buffer *cmd,
2303bf215546Sopenharmony_ci                  struct tu_cs *cs,
2304bf215546Sopenharmony_ci                  const struct tu_image_view *src,
2305bf215546Sopenharmony_ci                  const struct tu_image_view *dst,
2306bf215546Sopenharmony_ci                  uint32_t layer_mask,
2307bf215546Sopenharmony_ci                  uint32_t layers,
2308bf215546Sopenharmony_ci                  const VkRect2D *rect)
2309bf215546Sopenharmony_ci{
2310bf215546Sopenharmony_ci   assert(src->image->vk.format == dst->image->vk.format ||
2311bf215546Sopenharmony_ci          (vk_format_is_depth_or_stencil(src->image->vk.format) &&
2312bf215546Sopenharmony_ci           vk_format_is_depth_or_stencil(dst->image->vk.format)));
2313bf215546Sopenharmony_ci
2314bf215546Sopenharmony_ci   bool src_separate_ds = src->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT;
2315bf215546Sopenharmony_ci   bool dst_separate_ds = dst->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT;
2316bf215546Sopenharmony_ci
2317bf215546Sopenharmony_ci   if (dst_separate_ds) {
2318bf215546Sopenharmony_ci      resolve_sysmem(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT,
2319bf215546Sopenharmony_ci                     src, dst, layer_mask, layers, rect,
2320bf215546Sopenharmony_ci                     src_separate_ds, dst_separate_ds);
2321bf215546Sopenharmony_ci      resolve_sysmem(cmd, cs, VK_FORMAT_S8_UINT, VK_FORMAT_S8_UINT,
2322bf215546Sopenharmony_ci                     src, dst, layer_mask, layers, rect,
2323bf215546Sopenharmony_ci                     src_separate_ds, dst_separate_ds);
2324bf215546Sopenharmony_ci   } else {
2325bf215546Sopenharmony_ci      resolve_sysmem(cmd, cs, src->image->vk.format, dst->image->vk.format,
2326bf215546Sopenharmony_ci                     src, dst, layer_mask, layers, rect,
2327bf215546Sopenharmony_ci                     src_separate_ds, dst_separate_ds);
2328bf215546Sopenharmony_ci   }
2329bf215546Sopenharmony_ci}
2330bf215546Sopenharmony_ci
2331bf215546Sopenharmony_cistatic void
2332bf215546Sopenharmony_ciclear_image(struct tu_cmd_buffer *cmd,
2333bf215546Sopenharmony_ci            struct tu_image *image,
2334bf215546Sopenharmony_ci            const VkClearValue *clear_value,
2335bf215546Sopenharmony_ci            const VkImageSubresourceRange *range,
2336bf215546Sopenharmony_ci            VkImageAspectFlags aspect_mask)
2337bf215546Sopenharmony_ci{
2338bf215546Sopenharmony_ci   uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
2339bf215546Sopenharmony_ci   uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range);
2340bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->cs;
2341bf215546Sopenharmony_ci   enum pipe_format format;
2342bf215546Sopenharmony_ci   if (image->vk.format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
2343bf215546Sopenharmony_ci      format = PIPE_FORMAT_R32_UINT;
2344bf215546Sopenharmony_ci   } else {
2345bf215546Sopenharmony_ci      format = tu6_plane_format(image->vk.format,
2346bf215546Sopenharmony_ci                                tu6_plane_index(image->vk.format,
2347bf215546Sopenharmony_ci                                                aspect_mask));
2348bf215546Sopenharmony_ci   }
2349bf215546Sopenharmony_ci
2350bf215546Sopenharmony_ci   if (image->layout[0].depth0 > 1) {
2351bf215546Sopenharmony_ci      assert(layer_count == 1);
2352bf215546Sopenharmony_ci      assert(range->baseArrayLayer == 0);
2353bf215546Sopenharmony_ci   }
2354bf215546Sopenharmony_ci
2355bf215546Sopenharmony_ci   const struct blit_ops *ops = image->layout[0].nr_samples > 1 ? &r3d_ops : &r2d_ops;
2356bf215546Sopenharmony_ci
2357bf215546Sopenharmony_ci   ops->setup(cmd, cs, format, format, aspect_mask, 0, true, image->layout[0].ubwc,
2358bf215546Sopenharmony_ci              image->layout[0].nr_samples);
2359bf215546Sopenharmony_ci   if (image->vk.format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
2360bf215546Sopenharmony_ci      ops->clear_value(cs, PIPE_FORMAT_R9G9B9E5_FLOAT, clear_value);
2361bf215546Sopenharmony_ci   else
2362bf215546Sopenharmony_ci      ops->clear_value(cs, format, clear_value);
2363bf215546Sopenharmony_ci
2364bf215546Sopenharmony_ci   for (unsigned j = 0; j < level_count; j++) {
2365bf215546Sopenharmony_ci      if (image->layout[0].depth0 > 1)
2366bf215546Sopenharmony_ci         layer_count = u_minify(image->layout[0].depth0, range->baseMipLevel + j);
2367bf215546Sopenharmony_ci
2368bf215546Sopenharmony_ci      ops->coords(cs, &(VkOffset2D){}, NULL, &(VkExtent2D) {
2369bf215546Sopenharmony_ci                     u_minify(image->layout[0].width0, range->baseMipLevel + j),
2370bf215546Sopenharmony_ci                     u_minify(image->layout[0].height0, range->baseMipLevel + j)
2371bf215546Sopenharmony_ci                  });
2372bf215546Sopenharmony_ci
2373bf215546Sopenharmony_ci      struct fdl6_view dst;
2374bf215546Sopenharmony_ci      tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) {
2375bf215546Sopenharmony_ci         .aspectMask = aspect_mask,
2376bf215546Sopenharmony_ci         .mipLevel = range->baseMipLevel + j,
2377bf215546Sopenharmony_ci         .baseArrayLayer = range->baseArrayLayer,
2378bf215546Sopenharmony_ci         .layerCount = 1,
2379bf215546Sopenharmony_ci      }, 0, false);
2380bf215546Sopenharmony_ci
2381bf215546Sopenharmony_ci      for (uint32_t i = 0; i < layer_count; i++) {
2382bf215546Sopenharmony_ci         ops->dst(cs, &dst, i, format);
2383bf215546Sopenharmony_ci         ops->run(cmd, cs);
2384bf215546Sopenharmony_ci      }
2385bf215546Sopenharmony_ci   }
2386bf215546Sopenharmony_ci
2387bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
2388bf215546Sopenharmony_ci}
2389bf215546Sopenharmony_ci
2390bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
2391bf215546Sopenharmony_citu_CmdClearColorImage(VkCommandBuffer commandBuffer,
2392bf215546Sopenharmony_ci                      VkImage image_h,
2393bf215546Sopenharmony_ci                      VkImageLayout imageLayout,
2394bf215546Sopenharmony_ci                      const VkClearColorValue *pColor,
2395bf215546Sopenharmony_ci                      uint32_t rangeCount,
2396bf215546Sopenharmony_ci                      const VkImageSubresourceRange *pRanges)
2397bf215546Sopenharmony_ci{
2398bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2399bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_image, image, image_h);
2400bf215546Sopenharmony_ci
2401bf215546Sopenharmony_ci   for (unsigned i = 0; i < rangeCount; i++)
2402bf215546Sopenharmony_ci      clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT);
2403bf215546Sopenharmony_ci}
2404bf215546Sopenharmony_ci
2405bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
2406bf215546Sopenharmony_citu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
2407bf215546Sopenharmony_ci                             VkImage image_h,
2408bf215546Sopenharmony_ci                             VkImageLayout imageLayout,
2409bf215546Sopenharmony_ci                             const VkClearDepthStencilValue *pDepthStencil,
2410bf215546Sopenharmony_ci                             uint32_t rangeCount,
2411bf215546Sopenharmony_ci                             const VkImageSubresourceRange *pRanges)
2412bf215546Sopenharmony_ci{
2413bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2414bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_image, image, image_h);
2415bf215546Sopenharmony_ci
2416bf215546Sopenharmony_ci   for (unsigned i = 0; i < rangeCount; i++) {
2417bf215546Sopenharmony_ci      const VkImageSubresourceRange *range = &pRanges[i];
2418bf215546Sopenharmony_ci
2419bf215546Sopenharmony_ci      if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
2420bf215546Sopenharmony_ci         /* can't clear both depth and stencil at once, split up the aspect mask */
2421bf215546Sopenharmony_ci         u_foreach_bit(b, range->aspectMask)
2422bf215546Sopenharmony_ci            clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, BIT(b));
2423bf215546Sopenharmony_ci         continue;
2424bf215546Sopenharmony_ci      }
2425bf215546Sopenharmony_ci
2426bf215546Sopenharmony_ci      clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask);
2427bf215546Sopenharmony_ci   }
2428bf215546Sopenharmony_ci
2429bf215546Sopenharmony_ci   tu_lrz_clear_depth_image(cmd, image, pDepthStencil, rangeCount, pRanges);
2430bf215546Sopenharmony_ci}
2431bf215546Sopenharmony_ci
2432bf215546Sopenharmony_cistatic void
2433bf215546Sopenharmony_citu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
2434bf215546Sopenharmony_ci                            uint32_t attachment_count,
2435bf215546Sopenharmony_ci                            const VkClearAttachment *attachments,
2436bf215546Sopenharmony_ci                            uint32_t rect_count,
2437bf215546Sopenharmony_ci                            const VkClearRect *rects)
2438bf215546Sopenharmony_ci{
2439bf215546Sopenharmony_ci   /* the shader path here is special, it avoids changing MRT/etc state */
2440bf215546Sopenharmony_ci   const struct tu_subpass *subpass = cmd->state.subpass;
2441bf215546Sopenharmony_ci   const uint32_t mrt_count = subpass->color_count;
2442bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->draw_cs;
2443bf215546Sopenharmony_ci   uint32_t clear_value[MAX_RTS][4];
2444bf215546Sopenharmony_ci   float z_clear_val = 0.0f;
2445bf215546Sopenharmony_ci   uint8_t s_clear_val = 0;
2446bf215546Sopenharmony_ci   uint32_t clear_rts = 0, clear_components = 0;
2447bf215546Sopenharmony_ci   bool z_clear = false;
2448bf215546Sopenharmony_ci   bool s_clear = false;
2449bf215546Sopenharmony_ci
2450bf215546Sopenharmony_ci   trace_start_sysmem_clear_all(&cmd->trace, cs);
2451bf215546Sopenharmony_ci
2452bf215546Sopenharmony_ci   for (uint32_t i = 0; i < attachment_count; i++) {
2453bf215546Sopenharmony_ci      uint32_t a;
2454bf215546Sopenharmony_ci      if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
2455bf215546Sopenharmony_ci         uint32_t c = attachments[i].colorAttachment;
2456bf215546Sopenharmony_ci         a = subpass->color_attachments[c].attachment;
2457bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
2458bf215546Sopenharmony_ci            continue;
2459bf215546Sopenharmony_ci
2460bf215546Sopenharmony_ci         clear_rts |= 1 << c;
2461bf215546Sopenharmony_ci         clear_components |= 0xf << (c * 4);
2462bf215546Sopenharmony_ci         memcpy(clear_value[c], &attachments[i].clearValue, 4 * sizeof(uint32_t));
2463bf215546Sopenharmony_ci      } else {
2464bf215546Sopenharmony_ci         a = subpass->depth_stencil_attachment.attachment;
2465bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
2466bf215546Sopenharmony_ci            continue;
2467bf215546Sopenharmony_ci
2468bf215546Sopenharmony_ci         if (attachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
2469bf215546Sopenharmony_ci            z_clear = true;
2470bf215546Sopenharmony_ci            z_clear_val = attachments[i].clearValue.depthStencil.depth;
2471bf215546Sopenharmony_ci         }
2472bf215546Sopenharmony_ci
2473bf215546Sopenharmony_ci         if (attachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
2474bf215546Sopenharmony_ci            s_clear = true;
2475bf215546Sopenharmony_ci            s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff;
2476bf215546Sopenharmony_ci         }
2477bf215546Sopenharmony_ci      }
2478bf215546Sopenharmony_ci   }
2479bf215546Sopenharmony_ci
2480bf215546Sopenharmony_ci   /* We may not know the multisample count if there are no attachments, so
2481bf215546Sopenharmony_ci    * just bail early to avoid corner cases later.
2482bf215546Sopenharmony_ci    */
2483bf215546Sopenharmony_ci   if (clear_rts == 0 && !z_clear && !s_clear)
2484bf215546Sopenharmony_ci      return;
2485bf215546Sopenharmony_ci
2486bf215546Sopenharmony_ci   /* disable all draw states so they don't interfere
2487bf215546Sopenharmony_ci    * TODO: use and re-use draw states
2488bf215546Sopenharmony_ci    * we have to disable draw states individually to preserve
2489bf215546Sopenharmony_ci    * input attachment states, because a secondary command buffer
2490bf215546Sopenharmony_ci    * won't be able to restore them
2491bf215546Sopenharmony_ci    */
2492bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (TU_DRAW_STATE_COUNT - 2));
2493bf215546Sopenharmony_ci   for (uint32_t i = 0; i < TU_DRAW_STATE_COUNT; i++) {
2494bf215546Sopenharmony_ci      if (i == TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM ||
2495bf215546Sopenharmony_ci          i == TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM)
2496bf215546Sopenharmony_ci         continue;
2497bf215546Sopenharmony_ci      tu_cs_emit(cs, CP_SET_DRAW_STATE__0_GROUP_ID(i) |
2498bf215546Sopenharmony_ci                     CP_SET_DRAW_STATE__0_DISABLE);
2499bf215546Sopenharmony_ci      tu_cs_emit_qw(cs, 0);
2500bf215546Sopenharmony_ci   }
2501bf215546Sopenharmony_ci   cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE;
2502bf215546Sopenharmony_ci
2503bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
2504bf215546Sopenharmony_ci   tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
2505bf215546Sopenharmony_ci                  A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
2506bf215546Sopenharmony_ci                  0xfc000000);
2507bf215546Sopenharmony_ci   tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count));
2508bf215546Sopenharmony_ci
2509bf215546Sopenharmony_ci   r3d_common(cmd, cs, false, clear_rts, false, cmd->state.subpass->samples);
2510bf215546Sopenharmony_ci
2511bf215546Sopenharmony_ci   /* Disable sample counting in order to not affect occlusion query. */
2512bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = true));
2513bf215546Sopenharmony_ci
2514bf215546Sopenharmony_ci   if (cmd->state.prim_generated_query_running_before_rp) {
2515bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, STOP_PRIMITIVE_CTRS);
2516bf215546Sopenharmony_ci   }
2517bf215546Sopenharmony_ci
2518bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
2519bf215546Sopenharmony_ci                   A6XX_SP_FS_RENDER_COMPONENTS(.dword = clear_components));
2520bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
2521bf215546Sopenharmony_ci                   A6XX_RB_RENDER_COMPONENTS(.dword = clear_components));
2522bf215546Sopenharmony_ci
2523bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
2524bf215546Sopenharmony_ci                   A6XX_RB_FS_OUTPUT_CNTL0(),
2525bf215546Sopenharmony_ci                   A6XX_RB_FS_OUTPUT_CNTL1(.mrt = mrt_count));
2526bf215546Sopenharmony_ci
2527bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL());
2528bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.independent_blend = 1, .sample_mask = 0xffff));
2529bf215546Sopenharmony_ci   for (uint32_t i = 0; i < mrt_count; i++) {
2530bf215546Sopenharmony_ci      tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(i,
2531bf215546Sopenharmony_ci            .component_enable = COND(clear_rts & (1 << i), 0xf)));
2532bf215546Sopenharmony_ci   }
2533bf215546Sopenharmony_ci
2534bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0));
2535bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0));
2536bf215546Sopenharmony_ci
2537bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL());
2538bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL(
2539bf215546Sopenharmony_ci         .z_test_enable = z_clear,
2540bf215546Sopenharmony_ci         .z_write_enable = z_clear,
2541bf215546Sopenharmony_ci         .zfunc = FUNC_ALWAYS));
2542bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
2543bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL(
2544bf215546Sopenharmony_ci         .stencil_enable = s_clear,
2545bf215546Sopenharmony_ci         .func = FUNC_ALWAYS,
2546bf215546Sopenharmony_ci         .zpass = STENCIL_REPLACE));
2547bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK(.mask = 0xff));
2548bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK(.wrmask = 0xff));
2549bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_STENCILREF(.ref = s_clear_val));
2550bf215546Sopenharmony_ci
2551bf215546Sopenharmony_ci   unsigned num_rts = util_bitcount(clear_rts);
2552bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4 * num_rts);
2553bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2554bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2555bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
2556bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) |
2557bf215546Sopenharmony_ci                  CP_LOAD_STATE6_0_NUM_UNIT(num_rts));
2558bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
2559bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
2560bf215546Sopenharmony_ci   u_foreach_bit(b, clear_rts)
2561bf215546Sopenharmony_ci      tu_cs_emit_array(cs, clear_value[b], 4);
2562bf215546Sopenharmony_ci
2563bf215546Sopenharmony_ci   for (uint32_t i = 0; i < rect_count; i++) {
2564bf215546Sopenharmony_ci      /* This should be true because of this valid usage for
2565bf215546Sopenharmony_ci       * vkCmdClearAttachments:
2566bf215546Sopenharmony_ci       *
2567bf215546Sopenharmony_ci       *    "If the render pass instance this is recorded in uses multiview,
2568bf215546Sopenharmony_ci       *    then baseArrayLayer must be zero and layerCount must be one"
2569bf215546Sopenharmony_ci       */
2570bf215546Sopenharmony_ci      assert(!subpass->multiview_mask || rects[i].baseArrayLayer == 0);
2571bf215546Sopenharmony_ci
2572bf215546Sopenharmony_ci      /* a630 doesn't support multiview masks, which means that we can't use
2573bf215546Sopenharmony_ci       * the normal multiview path without potentially recompiling a shader
2574bf215546Sopenharmony_ci       * on-demand or using a more complicated variant that takes the mask as
2575bf215546Sopenharmony_ci       * a const. Just use the layered path instead, since it shouldn't be
2576bf215546Sopenharmony_ci       * much worse.
2577bf215546Sopenharmony_ci       */
2578bf215546Sopenharmony_ci      for_each_layer(layer, subpass->multiview_mask, rects[i].layerCount) {
2579bf215546Sopenharmony_ci         r3d_coords_raw(cs, (float[]) {
2580bf215546Sopenharmony_ci            rects[i].rect.offset.x, rects[i].rect.offset.y,
2581bf215546Sopenharmony_ci            z_clear_val, uif(rects[i].baseArrayLayer + layer),
2582bf215546Sopenharmony_ci            rects[i].rect.offset.x + rects[i].rect.extent.width,
2583bf215546Sopenharmony_ci            rects[i].rect.offset.y + rects[i].rect.extent.height,
2584bf215546Sopenharmony_ci            z_clear_val, 1.0f,
2585bf215546Sopenharmony_ci         });
2586bf215546Sopenharmony_ci         r3d_run_vis(cmd, cs);
2587bf215546Sopenharmony_ci      }
2588bf215546Sopenharmony_ci   }
2589bf215546Sopenharmony_ci
2590bf215546Sopenharmony_ci   /* Re-enable sample counting. */
2591bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = false));
2592bf215546Sopenharmony_ci
2593bf215546Sopenharmony_ci   if (cmd->state.prim_generated_query_running_before_rp) {
2594bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, START_PRIMITIVE_CTRS);
2595bf215546Sopenharmony_ci   }
2596bf215546Sopenharmony_ci
2597bf215546Sopenharmony_ci   trace_end_sysmem_clear_all(&cmd->trace,
2598bf215546Sopenharmony_ci                              cs, mrt_count, rect_count);
2599bf215546Sopenharmony_ci}
2600bf215546Sopenharmony_ci
2601bf215546Sopenharmony_cistatic void
2602bf215546Sopenharmony_cipack_gmem_clear_value(const VkClearValue *val, enum pipe_format format, uint32_t clear_value[4])
2603bf215546Sopenharmony_ci{
2604bf215546Sopenharmony_ci   switch (format) {
2605bf215546Sopenharmony_ci   case PIPE_FORMAT_Z24X8_UNORM:
2606bf215546Sopenharmony_ci   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
2607bf215546Sopenharmony_ci      clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) |
2608bf215546Sopenharmony_ci                       val->depthStencil.stencil << 24;
2609bf215546Sopenharmony_ci      return;
2610bf215546Sopenharmony_ci   case PIPE_FORMAT_Z16_UNORM:
2611bf215546Sopenharmony_ci      clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16);
2612bf215546Sopenharmony_ci      return;
2613bf215546Sopenharmony_ci   case PIPE_FORMAT_Z32_FLOAT:
2614bf215546Sopenharmony_ci      clear_value[0] = fui(val->depthStencil.depth);
2615bf215546Sopenharmony_ci      return;
2616bf215546Sopenharmony_ci   case PIPE_FORMAT_S8_UINT:
2617bf215546Sopenharmony_ci      clear_value[0] = val->depthStencil.stencil;
2618bf215546Sopenharmony_ci      return;
2619bf215546Sopenharmony_ci   default:
2620bf215546Sopenharmony_ci      break;
2621bf215546Sopenharmony_ci   }
2622bf215546Sopenharmony_ci
2623bf215546Sopenharmony_ci   float tmp[4];
2624bf215546Sopenharmony_ci   memcpy(tmp, val->color.float32, 4 * sizeof(float));
2625bf215546Sopenharmony_ci   if (util_format_is_srgb(format)) {
2626bf215546Sopenharmony_ci      for (int i = 0; i < 3; i++)
2627bf215546Sopenharmony_ci         tmp[i] = util_format_linear_to_srgb_float(tmp[i]);
2628bf215546Sopenharmony_ci   }
2629bf215546Sopenharmony_ci
2630bf215546Sopenharmony_ci#define PACK_F(type) util_format_##type##_pack_rgba_float \
2631bf215546Sopenharmony_ci   ( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1)
2632bf215546Sopenharmony_ci   switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
2633bf215546Sopenharmony_ci   case 4:
2634bf215546Sopenharmony_ci      PACK_F(r4g4b4a4_unorm);
2635bf215546Sopenharmony_ci      break;
2636bf215546Sopenharmony_ci   case 5:
2637bf215546Sopenharmony_ci      if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6)
2638bf215546Sopenharmony_ci         PACK_F(r5g6b5_unorm);
2639bf215546Sopenharmony_ci      else
2640bf215546Sopenharmony_ci         PACK_F(r5g5b5a1_unorm);
2641bf215546Sopenharmony_ci      break;
2642bf215546Sopenharmony_ci   case 8:
2643bf215546Sopenharmony_ci      if (util_format_is_snorm(format))
2644bf215546Sopenharmony_ci         PACK_F(r8g8b8a8_snorm);
2645bf215546Sopenharmony_ci      else if (util_format_is_unorm(format))
2646bf215546Sopenharmony_ci         PACK_F(r8g8b8a8_unorm);
2647bf215546Sopenharmony_ci      else
2648bf215546Sopenharmony_ci         pack_int8(clear_value, val->color.uint32);
2649bf215546Sopenharmony_ci      break;
2650bf215546Sopenharmony_ci   case 10:
2651bf215546Sopenharmony_ci      if (util_format_is_pure_integer(format))
2652bf215546Sopenharmony_ci         pack_int10_2(clear_value, val->color.uint32);
2653bf215546Sopenharmony_ci      else
2654bf215546Sopenharmony_ci         PACK_F(r10g10b10a2_unorm);
2655bf215546Sopenharmony_ci      break;
2656bf215546Sopenharmony_ci   case 11:
2657bf215546Sopenharmony_ci      clear_value[0] = float3_to_r11g11b10f(val->color.float32);
2658bf215546Sopenharmony_ci      break;
2659bf215546Sopenharmony_ci   case 16:
2660bf215546Sopenharmony_ci      if (util_format_is_snorm(format))
2661bf215546Sopenharmony_ci         PACK_F(r16g16b16a16_snorm);
2662bf215546Sopenharmony_ci      else if (util_format_is_unorm(format))
2663bf215546Sopenharmony_ci         PACK_F(r16g16b16a16_unorm);
2664bf215546Sopenharmony_ci      else if (util_format_is_float(format))
2665bf215546Sopenharmony_ci         PACK_F(r16g16b16a16_float);
2666bf215546Sopenharmony_ci      else
2667bf215546Sopenharmony_ci         pack_int16(clear_value, val->color.uint32);
2668bf215546Sopenharmony_ci      break;
2669bf215546Sopenharmony_ci   case 32:
2670bf215546Sopenharmony_ci      memcpy(clear_value, val->color.float32, 4 * sizeof(float));
2671bf215546Sopenharmony_ci      break;
2672bf215546Sopenharmony_ci   default:
2673bf215546Sopenharmony_ci      unreachable("unexpected channel size");
2674bf215546Sopenharmony_ci   }
2675bf215546Sopenharmony_ci#undef PACK_F
2676bf215546Sopenharmony_ci}
2677bf215546Sopenharmony_ci
2678bf215546Sopenharmony_cistatic void
2679bf215546Sopenharmony_ciclear_gmem_attachment(struct tu_cmd_buffer *cmd,
2680bf215546Sopenharmony_ci                      struct tu_cs *cs,
2681bf215546Sopenharmony_ci                      enum pipe_format format,
2682bf215546Sopenharmony_ci                      uint8_t clear_mask,
2683bf215546Sopenharmony_ci                      uint32_t gmem_offset,
2684bf215546Sopenharmony_ci                      const VkClearValue *value)
2685bf215546Sopenharmony_ci{
2686bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
2687bf215546Sopenharmony_ci   tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format)));
2688bf215546Sopenharmony_ci
2689bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1, .clear_mask = clear_mask));
2690bf215546Sopenharmony_ci
2691bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
2692bf215546Sopenharmony_ci   tu_cs_emit(cs, gmem_offset);
2693bf215546Sopenharmony_ci
2694bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
2695bf215546Sopenharmony_ci   tu_cs_emit(cs, 0);
2696bf215546Sopenharmony_ci
2697bf215546Sopenharmony_ci   uint32_t clear_vals[4] = {};
2698bf215546Sopenharmony_ci   pack_gmem_clear_value(value, format, clear_vals);
2699bf215546Sopenharmony_ci
2700bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
2701bf215546Sopenharmony_ci   tu_cs_emit_array(cs, clear_vals, 4);
2702bf215546Sopenharmony_ci
2703bf215546Sopenharmony_ci   tu6_emit_event_write(cmd, cs, BLIT);
2704bf215546Sopenharmony_ci}
2705bf215546Sopenharmony_ci
2706bf215546Sopenharmony_cistatic void
2707bf215546Sopenharmony_citu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
2708bf215546Sopenharmony_ci                              struct tu_cs *cs,
2709bf215546Sopenharmony_ci                              uint32_t attachment,
2710bf215546Sopenharmony_ci                              VkImageAspectFlags mask,
2711bf215546Sopenharmony_ci                              const VkClearValue *value)
2712bf215546Sopenharmony_ci{
2713bf215546Sopenharmony_ci   const struct tu_render_pass_attachment *att =
2714bf215546Sopenharmony_ci      &cmd->state.pass->attachments[attachment];
2715bf215546Sopenharmony_ci
2716bf215546Sopenharmony_ci   trace_start_gmem_clear(&cmd->trace, cs);
2717bf215546Sopenharmony_ci
2718bf215546Sopenharmony_ci   enum pipe_format format = tu_vk_format_to_pipe_format(att->format);
2719bf215546Sopenharmony_ci   if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
2720bf215546Sopenharmony_ci      if (mask & VK_IMAGE_ASPECT_DEPTH_BIT)
2721bf215546Sopenharmony_ci         clear_gmem_attachment(cmd, cs, PIPE_FORMAT_Z32_FLOAT, 0xf, tu_attachment_gmem_offset(cmd, att), value);
2722bf215546Sopenharmony_ci      if (mask & VK_IMAGE_ASPECT_STENCIL_BIT)
2723bf215546Sopenharmony_ci         clear_gmem_attachment(cmd, cs, PIPE_FORMAT_S8_UINT, 0xf, tu_attachment_gmem_offset_stencil(cmd, att), value);
2724bf215546Sopenharmony_ci      return;
2725bf215546Sopenharmony_ci   }
2726bf215546Sopenharmony_ci
2727bf215546Sopenharmony_ci   clear_gmem_attachment(cmd, cs, format, aspect_write_mask(format, mask),
2728bf215546Sopenharmony_ci                         tu_attachment_gmem_offset(cmd, att), value);
2729bf215546Sopenharmony_ci
2730bf215546Sopenharmony_ci   trace_end_gmem_clear(&cmd->trace, cs, att->format, att->samples);
2731bf215546Sopenharmony_ci}
2732bf215546Sopenharmony_ci
2733bf215546Sopenharmony_cistatic void
2734bf215546Sopenharmony_citu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
2735bf215546Sopenharmony_ci                          uint32_t attachment_count,
2736bf215546Sopenharmony_ci                          const VkClearAttachment *attachments,
2737bf215546Sopenharmony_ci                          uint32_t rect_count,
2738bf215546Sopenharmony_ci                          const VkClearRect *rects)
2739bf215546Sopenharmony_ci{
2740bf215546Sopenharmony_ci   const struct tu_subpass *subpass = cmd->state.subpass;
2741bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->draw_cs;
2742bf215546Sopenharmony_ci
2743bf215546Sopenharmony_ci   if (rect_count > 1)
2744bf215546Sopenharmony_ci      perf_debug(cmd->device, "TODO: Swap tu_clear_gmem_attachments() loop for smaller command stream");
2745bf215546Sopenharmony_ci
2746bf215546Sopenharmony_ci   for (unsigned i = 0; i < rect_count; i++) {
2747bf215546Sopenharmony_ci      unsigned x1 = rects[i].rect.offset.x;
2748bf215546Sopenharmony_ci      unsigned y1 = rects[i].rect.offset.y;
2749bf215546Sopenharmony_ci      unsigned x2 = x1 + rects[i].rect.extent.width - 1;
2750bf215546Sopenharmony_ci      unsigned y2 = y1 + rects[i].rect.extent.height - 1;
2751bf215546Sopenharmony_ci
2752bf215546Sopenharmony_ci      tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
2753bf215546Sopenharmony_ci      tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
2754bf215546Sopenharmony_ci      tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
2755bf215546Sopenharmony_ci
2756bf215546Sopenharmony_ci      for (unsigned j = 0; j < attachment_count; j++) {
2757bf215546Sopenharmony_ci         uint32_t a;
2758bf215546Sopenharmony_ci         if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
2759bf215546Sopenharmony_ci            a = subpass->color_attachments[attachments[j].colorAttachment].attachment;
2760bf215546Sopenharmony_ci         else
2761bf215546Sopenharmony_ci            a = subpass->depth_stencil_attachment.attachment;
2762bf215546Sopenharmony_ci
2763bf215546Sopenharmony_ci         if (a == VK_ATTACHMENT_UNUSED)
2764bf215546Sopenharmony_ci               continue;
2765bf215546Sopenharmony_ci
2766bf215546Sopenharmony_ci         tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask,
2767bf215546Sopenharmony_ci                                       &attachments[j].clearValue);
2768bf215546Sopenharmony_ci      }
2769bf215546Sopenharmony_ci   }
2770bf215546Sopenharmony_ci}
2771bf215546Sopenharmony_ci
2772bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
2773bf215546Sopenharmony_citu_CmdClearAttachments(VkCommandBuffer commandBuffer,
2774bf215546Sopenharmony_ci                       uint32_t attachmentCount,
2775bf215546Sopenharmony_ci                       const VkClearAttachment *pAttachments,
2776bf215546Sopenharmony_ci                       uint32_t rectCount,
2777bf215546Sopenharmony_ci                       const VkClearRect *pRects)
2778bf215546Sopenharmony_ci{
2779bf215546Sopenharmony_ci   TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2780bf215546Sopenharmony_ci   struct tu_cs *cs = &cmd->draw_cs;
2781bf215546Sopenharmony_ci
2782bf215546Sopenharmony_ci   /* sysmem path behaves like a draw, note we don't have a way of using different
2783bf215546Sopenharmony_ci    * flushes for sysmem/gmem, so this needs to be outside of the cond_exec
2784bf215546Sopenharmony_ci    */
2785bf215546Sopenharmony_ci   tu_emit_cache_flush_renderpass(cmd, cs);
2786bf215546Sopenharmony_ci
2787bf215546Sopenharmony_ci   for (uint32_t j = 0; j < attachmentCount; j++) {
2788bf215546Sopenharmony_ci      if ((pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) == 0)
2789bf215546Sopenharmony_ci         continue;
2790bf215546Sopenharmony_ci
2791bf215546Sopenharmony_ci      tu_lrz_disable_during_renderpass(cmd);
2792bf215546Sopenharmony_ci   }
2793bf215546Sopenharmony_ci
2794bf215546Sopenharmony_ci   /* vkCmdClearAttachments is supposed to respect the predicate if active. The
2795bf215546Sopenharmony_ci    * easiest way to do this is to always use the 3d path, which always works
2796bf215546Sopenharmony_ci    * even with GMEM because it's just a simple draw using the existing
2797bf215546Sopenharmony_ci    * attachment state.
2798bf215546Sopenharmony_ci    *
2799bf215546Sopenharmony_ci    * Similarly, we also use the 3D path when in a secondary command buffer that
2800bf215546Sopenharmony_ci    * doesn't know the GMEM layout that will be chosen by the primary.
2801bf215546Sopenharmony_ci    */
2802bf215546Sopenharmony_ci   if (cmd->state.predication_active || cmd->state.gmem_layout == TU_GMEM_LAYOUT_COUNT) {
2803bf215546Sopenharmony_ci      tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
2804bf215546Sopenharmony_ci      return;
2805bf215546Sopenharmony_ci   }
2806bf215546Sopenharmony_ci
2807bf215546Sopenharmony_ci   /* If we could skip tile load/stores based on any draws intersecting them at
2808bf215546Sopenharmony_ci    * binning time, then emit the clear as a 3D draw so that it contributes to
2809bf215546Sopenharmony_ci    * that visibility.
2810bf215546Sopenharmony_ci   */
2811bf215546Sopenharmony_ci   const struct tu_subpass *subpass = cmd->state.subpass;
2812bf215546Sopenharmony_ci   for (uint32_t i = 0; i < attachmentCount; i++) {
2813bf215546Sopenharmony_ci      uint32_t a;
2814bf215546Sopenharmony_ci      if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
2815bf215546Sopenharmony_ci         uint32_t c = pAttachments[i].colorAttachment;
2816bf215546Sopenharmony_ci         a = subpass->color_attachments[c].attachment;
2817bf215546Sopenharmony_ci      } else {
2818bf215546Sopenharmony_ci         a = subpass->depth_stencil_attachment.attachment;
2819bf215546Sopenharmony_ci      }
2820bf215546Sopenharmony_ci      if (a != VK_ATTACHMENT_UNUSED) {
2821bf215546Sopenharmony_ci         const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a];
2822bf215546Sopenharmony_ci         if (att->cond_load_allowed || att->cond_store_allowed) {
2823bf215546Sopenharmony_ci            tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
2824bf215546Sopenharmony_ci            return;
2825bf215546Sopenharmony_ci         }
2826bf215546Sopenharmony_ci      }
2827bf215546Sopenharmony_ci   }
2828bf215546Sopenharmony_ci
2829bf215546Sopenharmony_ci   /* Otherwise, emit 2D blits for gmem rendering. */
2830bf215546Sopenharmony_ci   tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
2831bf215546Sopenharmony_ci   tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
2832bf215546Sopenharmony_ci   tu_cond_exec_end(cs);
2833bf215546Sopenharmony_ci
2834bf215546Sopenharmony_ci   tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
2835bf215546Sopenharmony_ci   tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
2836bf215546Sopenharmony_ci   tu_cond_exec_end(cs);
2837bf215546Sopenharmony_ci}
2838bf215546Sopenharmony_ci
2839bf215546Sopenharmony_cistatic void
2840bf215546Sopenharmony_ciclear_sysmem_attachment(struct tu_cmd_buffer *cmd,
2841bf215546Sopenharmony_ci                        struct tu_cs *cs,
2842bf215546Sopenharmony_ci                        VkFormat vk_format,
2843bf215546Sopenharmony_ci                        VkImageAspectFlags clear_mask,
2844bf215546Sopenharmony_ci                        const VkClearValue *value,
2845bf215546Sopenharmony_ci                        uint32_t a,
2846bf215546Sopenharmony_ci                        bool separate_ds)
2847bf215546Sopenharmony_ci{
2848bf215546Sopenharmony_ci   enum pipe_format format = tu_vk_format_to_pipe_format(vk_format);
2849bf215546Sopenharmony_ci   const struct tu_framebuffer *fb = cmd->state.framebuffer;
2850bf215546Sopenharmony_ci   const struct tu_image_view *iview = cmd->state.attachments[a];
2851bf215546Sopenharmony_ci   const uint32_t clear_views = cmd->state.pass->attachments[a].clear_views;
2852bf215546Sopenharmony_ci   const struct blit_ops *ops = &r2d_ops;
2853bf215546Sopenharmony_ci   if (cmd->state.pass->attachments[a].samples > 1)
2854bf215546Sopenharmony_ci      ops = &r3d_ops;
2855bf215546Sopenharmony_ci
2856bf215546Sopenharmony_ci   trace_start_sysmem_clear(&cmd->trace, cs);
2857bf215546Sopenharmony_ci
2858bf215546Sopenharmony_ci   ops->setup(cmd, cs, format, format, clear_mask, 0, true, iview->view.ubwc_enabled,
2859bf215546Sopenharmony_ci              cmd->state.pass->attachments[a].samples);
2860bf215546Sopenharmony_ci   ops->coords(cs, &cmd->state.render_area.offset, NULL,
2861bf215546Sopenharmony_ci               &cmd->state.render_area.extent);
2862bf215546Sopenharmony_ci   ops->clear_value(cs, format, value);
2863bf215546Sopenharmony_ci
2864bf215546Sopenharmony_ci   for_each_layer(i, clear_views, fb->layers) {
2865bf215546Sopenharmony_ci      if (separate_ds) {
2866bf215546Sopenharmony_ci         if (vk_format == VK_FORMAT_D32_SFLOAT) {
2867bf215546Sopenharmony_ci            ops->dst_depth(cs, iview, i);
2868bf215546Sopenharmony_ci         } else {
2869bf215546Sopenharmony_ci            ops->dst_stencil(cs, iview, i);
2870bf215546Sopenharmony_ci         }
2871bf215546Sopenharmony_ci      } else {
2872bf215546Sopenharmony_ci         ops->dst(cs, &iview->view, i, format);
2873bf215546Sopenharmony_ci      }
2874bf215546Sopenharmony_ci      ops->run(cmd, cs);
2875bf215546Sopenharmony_ci   }
2876bf215546Sopenharmony_ci
2877bf215546Sopenharmony_ci   ops->teardown(cmd, cs);
2878bf215546Sopenharmony_ci
2879bf215546Sopenharmony_ci   trace_end_sysmem_clear(&cmd->trace, cs,
2880bf215546Sopenharmony_ci                          vk_format, ops == &r3d_ops,
2881bf215546Sopenharmony_ci                          cmd->state.pass->attachments[a].samples);
2882bf215546Sopenharmony_ci}
2883bf215546Sopenharmony_ci
2884bf215546Sopenharmony_civoid
2885bf215546Sopenharmony_citu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
2886bf215546Sopenharmony_ci                           struct tu_cs *cs,
2887bf215546Sopenharmony_ci                           uint32_t a,
2888bf215546Sopenharmony_ci                           const VkClearValue *value)
2889bf215546Sopenharmony_ci{
2890bf215546Sopenharmony_ci   const struct tu_render_pass_attachment *attachment =
2891bf215546Sopenharmony_ci      &cmd->state.pass->attachments[a];
2892bf215546Sopenharmony_ci
2893bf215546Sopenharmony_ci   if (!attachment->clear_mask)
2894bf215546Sopenharmony_ci      return;
2895bf215546Sopenharmony_ci
2896bf215546Sopenharmony_ci   if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
2897bf215546Sopenharmony_ci      if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
2898bf215546Sopenharmony_ci         clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT,
2899bf215546Sopenharmony_ci                                 value, a, true);
2900bf215546Sopenharmony_ci      }
2901bf215546Sopenharmony_ci      if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
2902bf215546Sopenharmony_ci         clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT,
2903bf215546Sopenharmony_ci                                 value, a, true);
2904bf215546Sopenharmony_ci      }
2905bf215546Sopenharmony_ci   } else {
2906bf215546Sopenharmony_ci      clear_sysmem_attachment(cmd, cs, attachment->format, attachment->clear_mask,
2907bf215546Sopenharmony_ci                              value, a, false);
2908bf215546Sopenharmony_ci   }
2909bf215546Sopenharmony_ci
2910bf215546Sopenharmony_ci   /* The spec doesn't explicitly say, but presumably the initial renderpass
2911bf215546Sopenharmony_ci    * clear is considered part of the renderpass, and therefore barriers
2912bf215546Sopenharmony_ci    * aren't required inside the subpass/renderpass.  Therefore we need to
2913bf215546Sopenharmony_ci    * flush CCU color into CCU depth here, just like with
2914bf215546Sopenharmony_ci    * vkCmdClearAttachments(). Note that because this only happens at the
2915bf215546Sopenharmony_ci    * beginning of a renderpass, and renderpass writes are considered
2916bf215546Sopenharmony_ci    * "incoherent", we shouldn't have to worry about syncing depth into color
2917bf215546Sopenharmony_ci    * beforehand as depth should already be flushed.
2918bf215546Sopenharmony_ci    */
2919bf215546Sopenharmony_ci   if (vk_format_is_depth_or_stencil(attachment->format)) {
2920bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
2921bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS);
2922bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH);
2923bf215546Sopenharmony_ci   } else {
2924bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
2925bf215546Sopenharmony_ci      tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR);
2926bf215546Sopenharmony_ci   }
2927bf215546Sopenharmony_ci
2928bf215546Sopenharmony_ci   if (cmd->device->physical_device->info->a6xx.has_ccu_flush_bug)
2929bf215546Sopenharmony_ci      tu_cs_emit_wfi(cs);
2930bf215546Sopenharmony_ci}
2931bf215546Sopenharmony_ci
2932bf215546Sopenharmony_civoid
2933bf215546Sopenharmony_citu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
2934bf215546Sopenharmony_ci                         struct tu_cs *cs,
2935bf215546Sopenharmony_ci                         uint32_t a,
2936bf215546Sopenharmony_ci                         const VkClearValue *value)
2937bf215546Sopenharmony_ci{
2938bf215546Sopenharmony_ci   const struct tu_render_pass_attachment *attachment =
2939bf215546Sopenharmony_ci      &cmd->state.pass->attachments[a];
2940bf215546Sopenharmony_ci
2941bf215546Sopenharmony_ci   if (!attachment->clear_mask)
2942bf215546Sopenharmony_ci      return;
2943bf215546Sopenharmony_ci
2944bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
2945bf215546Sopenharmony_ci
2946bf215546Sopenharmony_ci   tu_emit_clear_gmem_attachment(cmd, cs, a, attachment->clear_mask, value);
2947bf215546Sopenharmony_ci}
2948bf215546Sopenharmony_ci
2949bf215546Sopenharmony_cistatic void
2950bf215546Sopenharmony_citu_emit_blit(struct tu_cmd_buffer *cmd,
2951bf215546Sopenharmony_ci             struct tu_cs *cs,
2952bf215546Sopenharmony_ci             const struct tu_image_view *iview,
2953bf215546Sopenharmony_ci             const struct tu_render_pass_attachment *attachment,
2954bf215546Sopenharmony_ci             bool resolve,
2955bf215546Sopenharmony_ci             bool separate_stencil)
2956bf215546Sopenharmony_ci{
2957bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
2958bf215546Sopenharmony_ci                   A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
2959bf215546Sopenharmony_ci
2960bf215546Sopenharmony_ci   tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(
2961bf215546Sopenharmony_ci      .unk0 = !resolve,
2962bf215546Sopenharmony_ci      .gmem = !resolve,
2963bf215546Sopenharmony_ci      .sample_0 = vk_format_is_int(attachment->format) ||
2964bf215546Sopenharmony_ci         vk_format_is_depth_or_stencil(attachment->format)));
2965bf215546Sopenharmony_ci
2966bf215546Sopenharmony_ci   tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
2967bf215546Sopenharmony_ci   if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
2968bf215546Sopenharmony_ci      if (!separate_stencil) {
2969bf215546Sopenharmony_ci         tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO));
2970bf215546Sopenharmony_ci         tu_cs_emit_qw(cs, iview->depth_base_addr);
2971bf215546Sopenharmony_ci         tu_cs_emit(cs, iview->depth_PITCH);
2972bf215546Sopenharmony_ci
2973bf215546Sopenharmony_ci         tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
2974bf215546Sopenharmony_ci         tu_cs_image_flag_ref(cs, &iview->view, 0);
2975bf215546Sopenharmony_ci      } else {
2976bf215546Sopenharmony_ci         tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
2977bf215546Sopenharmony_ci         tu_cs_emit_qw(cs, iview->stencil_base_addr);
2978bf215546Sopenharmony_ci         tu_cs_emit(cs, iview->stencil_PITCH);
2979bf215546Sopenharmony_ci      }
2980bf215546Sopenharmony_ci   } else {
2981bf215546Sopenharmony_ci      tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO);
2982bf215546Sopenharmony_ci      tu_cs_image_ref_2d(cs, &iview->view, 0, false);
2983bf215546Sopenharmony_ci
2984bf215546Sopenharmony_ci      tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
2985bf215546Sopenharmony_ci      tu_cs_image_flag_ref(cs, &iview->view, 0);
2986bf215546Sopenharmony_ci   }
2987bf215546Sopenharmony_ci
2988bf215546Sopenharmony_ci   if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && separate_stencil) {
2989bf215546Sopenharmony_ci         tu_cs_emit_regs(cs,
2990bf215546Sopenharmony_ci                        A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset_stencil(cmd, attachment)));
2991bf215546Sopenharmony_ci   } else {
2992bf215546Sopenharmony_ci      tu_cs_emit_regs(cs,
2993bf215546Sopenharmony_ci                     A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset(cmd, attachment)));
2994bf215546Sopenharmony_ci   }
2995bf215546Sopenharmony_ci
2996bf215546Sopenharmony_ci   tu6_emit_event_write(cmd, cs, BLIT);
2997bf215546Sopenharmony_ci}
2998bf215546Sopenharmony_ci
2999bf215546Sopenharmony_cistatic bool
3000bf215546Sopenharmony_ciblit_can_resolve(VkFormat format)
3001bf215546Sopenharmony_ci{
3002bf215546Sopenharmony_ci   const struct util_format_description *desc = vk_format_description(format);
3003bf215546Sopenharmony_ci
3004bf215546Sopenharmony_ci   /* blit event can only do resolve for simple cases:
3005bf215546Sopenharmony_ci    * averaging samples as unsigned integers or choosing only one sample
3006bf215546Sopenharmony_ci    */
3007bf215546Sopenharmony_ci   if (vk_format_is_snorm(format) || vk_format_is_srgb(format))
3008bf215546Sopenharmony_ci      return false;
3009bf215546Sopenharmony_ci
3010bf215546Sopenharmony_ci   /* can't do formats with larger channel sizes
3011bf215546Sopenharmony_ci    * note: this includes all float formats
3012bf215546Sopenharmony_ci    * note2: single channel integer formats seem OK
3013bf215546Sopenharmony_ci    */
3014bf215546Sopenharmony_ci   if (desc->channel[0].size > 10)
3015bf215546Sopenharmony_ci      return false;
3016bf215546Sopenharmony_ci
3017bf215546Sopenharmony_ci   switch (format) {
3018bf215546Sopenharmony_ci   /* for unknown reasons blit event can't msaa resolve these formats when tiled
3019bf215546Sopenharmony_ci    * likely related to these formats having different layout from other cpp=2 formats
3020bf215546Sopenharmony_ci    */
3021bf215546Sopenharmony_ci   case VK_FORMAT_R8G8_UNORM:
3022bf215546Sopenharmony_ci   case VK_FORMAT_R8G8_UINT:
3023bf215546Sopenharmony_ci   case VK_FORMAT_R8G8_SINT:
3024bf215546Sopenharmony_ci   /* TODO: this one should be able to work? */
3025bf215546Sopenharmony_ci   case VK_FORMAT_D24_UNORM_S8_UINT:
3026bf215546Sopenharmony_ci      return false;
3027bf215546Sopenharmony_ci   default:
3028bf215546Sopenharmony_ci      break;
3029bf215546Sopenharmony_ci   }
3030bf215546Sopenharmony_ci
3031bf215546Sopenharmony_ci   return true;
3032bf215546Sopenharmony_ci}
3033bf215546Sopenharmony_ci
3034bf215546Sopenharmony_cistatic void
3035bf215546Sopenharmony_citu_begin_load_store_cond_exec(struct tu_cmd_buffer *cmd,
3036bf215546Sopenharmony_ci                              struct tu_cs *cs, bool load)
3037bf215546Sopenharmony_ci{
3038bf215546Sopenharmony_ci   tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST));
3039bf215546Sopenharmony_ci
3040bf215546Sopenharmony_ci   if (!unlikely(cmd->device->physical_device->instance->debug_flags &
3041bf215546Sopenharmony_ci                 TU_DEBUG_LOG_SKIP_GMEM_OPS))
3042bf215546Sopenharmony_ci      return;
3043bf215546Sopenharmony_ci
3044bf215546Sopenharmony_ci   uint64_t result_iova;
3045bf215546Sopenharmony_ci   if (load)
3046bf215546Sopenharmony_ci      result_iova = global_iova(cmd, dbg_gmem_taken_loads);
3047bf215546Sopenharmony_ci   else
3048bf215546Sopenharmony_ci      result_iova = global_iova(cmd, dbg_gmem_taken_stores);
3049bf215546Sopenharmony_ci
3050bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7);
3051bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B);
3052bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, result_iova);
3053bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, result_iova);
3054bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, global_iova(cmd, dbg_one));
3055bf215546Sopenharmony_ci}
3056bf215546Sopenharmony_ci
3057bf215546Sopenharmony_cistatic void
3058bf215546Sopenharmony_citu_end_load_store_cond_exec(struct tu_cmd_buffer *cmd,
3059bf215546Sopenharmony_ci                            struct tu_cs *cs, bool load)
3060bf215546Sopenharmony_ci{
3061bf215546Sopenharmony_ci   tu_cond_exec_end(cs);
3062bf215546Sopenharmony_ci
3063bf215546Sopenharmony_ci   if (!unlikely(cmd->device->physical_device->instance->debug_flags &
3064bf215546Sopenharmony_ci                 TU_DEBUG_LOG_SKIP_GMEM_OPS))
3065bf215546Sopenharmony_ci      return;
3066bf215546Sopenharmony_ci
3067bf215546Sopenharmony_ci   uint64_t result_iova;
3068bf215546Sopenharmony_ci   if (load)
3069bf215546Sopenharmony_ci      result_iova = global_iova(cmd, dbg_gmem_total_loads);
3070bf215546Sopenharmony_ci   else
3071bf215546Sopenharmony_ci      result_iova = global_iova(cmd, dbg_gmem_total_stores);
3072bf215546Sopenharmony_ci
3073bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7);
3074bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B);
3075bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, result_iova);
3076bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, result_iova);
3077bf215546Sopenharmony_ci   tu_cs_emit_qw(cs, global_iova(cmd, dbg_one));
3078bf215546Sopenharmony_ci}
3079bf215546Sopenharmony_ci
3080bf215546Sopenharmony_civoid
3081bf215546Sopenharmony_citu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
3082bf215546Sopenharmony_ci                        struct tu_cs *cs,
3083bf215546Sopenharmony_ci                        uint32_t a,
3084bf215546Sopenharmony_ci                        bool cond_exec_allowed,
3085bf215546Sopenharmony_ci                        bool force_load)
3086bf215546Sopenharmony_ci{
3087bf215546Sopenharmony_ci   const struct tu_image_view *iview = cmd->state.attachments[a];
3088bf215546Sopenharmony_ci   const struct tu_render_pass_attachment *attachment =
3089bf215546Sopenharmony_ci      &cmd->state.pass->attachments[a];
3090bf215546Sopenharmony_ci
3091bf215546Sopenharmony_ci   bool load_common = attachment->load || force_load;
3092bf215546Sopenharmony_ci   bool load_stencil =
3093bf215546Sopenharmony_ci      attachment->load_stencil ||
3094bf215546Sopenharmony_ci      (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load);
3095bf215546Sopenharmony_ci
3096bf215546Sopenharmony_ci   if (!load_common && !load_stencil)
3097bf215546Sopenharmony_ci      return;
3098bf215546Sopenharmony_ci
3099bf215546Sopenharmony_ci   trace_start_gmem_load(&cmd->trace, cs);
3100bf215546Sopenharmony_ci
3101bf215546Sopenharmony_ci   /* If attachment will be cleared by vkCmdClearAttachments - it is likely
3102bf215546Sopenharmony_ci    * that it would be partially cleared, and since it is done by 2d blit
3103bf215546Sopenharmony_ci    * it doesn't produce geometry, so we have to unconditionally load.
3104bf215546Sopenharmony_ci    *
3105bf215546Sopenharmony_ci    * To simplify conditions treat partially cleared separate DS as fully
3106bf215546Sopenharmony_ci    * cleared and don't emit cond_exec.
3107bf215546Sopenharmony_ci    */
3108bf215546Sopenharmony_ci   bool cond_exec = cond_exec_allowed && attachment->cond_load_allowed;
3109bf215546Sopenharmony_ci   if (cond_exec)
3110bf215546Sopenharmony_ci      tu_begin_load_store_cond_exec(cmd, cs, true);
3111bf215546Sopenharmony_ci
3112bf215546Sopenharmony_ci   if (load_common)
3113bf215546Sopenharmony_ci      tu_emit_blit(cmd, cs, iview, attachment, false, false);
3114bf215546Sopenharmony_ci
3115bf215546Sopenharmony_ci   if (load_stencil)
3116bf215546Sopenharmony_ci      tu_emit_blit(cmd, cs, iview, attachment, false, true);
3117bf215546Sopenharmony_ci
3118bf215546Sopenharmony_ci   if (cond_exec)
3119bf215546Sopenharmony_ci      tu_end_load_store_cond_exec(cmd, cs, true);
3120bf215546Sopenharmony_ci
3121bf215546Sopenharmony_ci   trace_end_gmem_load(&cmd->trace, cs, attachment->format, force_load);
3122bf215546Sopenharmony_ci}
3123bf215546Sopenharmony_ci
3124bf215546Sopenharmony_cistatic void
3125bf215546Sopenharmony_cistore_cp_blit(struct tu_cmd_buffer *cmd,
3126bf215546Sopenharmony_ci              struct tu_cs *cs,
3127bf215546Sopenharmony_ci              const struct tu_image_view *iview,
3128bf215546Sopenharmony_ci              uint32_t samples,
3129bf215546Sopenharmony_ci              bool separate_stencil,
3130bf215546Sopenharmony_ci              enum pipe_format src_format,
3131bf215546Sopenharmony_ci              enum pipe_format dst_format,
3132bf215546Sopenharmony_ci              uint32_t gmem_offset,
3133bf215546Sopenharmony_ci              uint32_t cpp)
3134bf215546Sopenharmony_ci{
3135bf215546Sopenharmony_ci   r2d_setup_common(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
3136bf215546Sopenharmony_ci                    iview->view.ubwc_enabled, true);
3137bf215546Sopenharmony_ci
3138bf215546Sopenharmony_ci   if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
3139bf215546Sopenharmony_ci      if (!separate_stencil) {
3140bf215546Sopenharmony_ci         r2d_dst_depth(cs, iview, 0);
3141bf215546Sopenharmony_ci      } else {
3142bf215546Sopenharmony_ci         r2d_dst_stencil(cs, iview, 0);
3143bf215546Sopenharmony_ci      }
3144bf215546Sopenharmony_ci   } else {
3145bf215546Sopenharmony_ci      r2d_dst(cs, &iview->view, 0, src_format);
3146bf215546Sopenharmony_ci   }
3147bf215546Sopenharmony_ci
3148bf215546Sopenharmony_ci   enum a6xx_format fmt = tu6_format_texture(src_format, TILE6_2).fmt;
3149bf215546Sopenharmony_ci   fixup_src_format(&src_format, dst_format, &fmt);
3150bf215546Sopenharmony_ci
3151bf215546Sopenharmony_ci   tu_cs_emit_regs(cs,
3152bf215546Sopenharmony_ci                   A6XX_SP_PS_2D_SRC_INFO(
3153bf215546Sopenharmony_ci                      .color_format = fmt,
3154bf215546Sopenharmony_ci                      .color_swap = WZYX,
3155bf215546Sopenharmony_ci                      .tile_mode = TILE6_2,
3156bf215546Sopenharmony_ci                      .srgb = util_format_is_srgb(src_format),
3157bf215546Sopenharmony_ci                      .samples = tu_msaa_samples(samples),
3158bf215546Sopenharmony_ci                      .samples_average = !util_format_is_pure_integer(dst_format) &&
3159bf215546Sopenharmony_ci                                         !util_format_is_depth_or_stencil(dst_format),
3160bf215546Sopenharmony_ci                      .unk20 = 1,
3161bf215546Sopenharmony_ci                      .unk22 = 1),
3162bf215546Sopenharmony_ci                   /* note: src size does not matter when not scaling */
3163bf215546Sopenharmony_ci                   A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
3164bf215546Sopenharmony_ci                   A6XX_SP_PS_2D_SRC(.qword = cmd->device->physical_device->gmem_base + gmem_offset),
3165bf215546Sopenharmony_ci                   A6XX_SP_PS_2D_SRC_PITCH(.pitch = cmd->state.tiling->tile0.width * cpp));
3166bf215546Sopenharmony_ci
3167bf215546Sopenharmony_ci   /* sync GMEM writes with CACHE. */
3168bf215546Sopenharmony_ci   tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
3169bf215546Sopenharmony_ci
3170bf215546Sopenharmony_ci   /* Wait for CACHE_INVALIDATE to land */
3171bf215546Sopenharmony_ci   tu_cs_emit_wfi(cs);
3172bf215546Sopenharmony_ci
3173bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_BLIT, 1);
3174bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
3175bf215546Sopenharmony_ci
3176bf215546Sopenharmony_ci   /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
3177bf215546Sopenharmony_ci    * sysmem, and we generally assume that GMEM renderpasses leave their
3178bf215546Sopenharmony_ci    * results in sysmem, so we need to flush manually here.
3179bf215546Sopenharmony_ci    */
3180bf215546Sopenharmony_ci   tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
3181bf215546Sopenharmony_ci}
3182bf215546Sopenharmony_ci
3183bf215546Sopenharmony_cistatic void
3184bf215546Sopenharmony_cistore_3d_blit(struct tu_cmd_buffer *cmd,
3185bf215546Sopenharmony_ci              struct tu_cs *cs,
3186bf215546Sopenharmony_ci              const struct tu_image_view *iview,
3187bf215546Sopenharmony_ci              uint32_t dst_samples,
3188bf215546Sopenharmony_ci              bool separate_stencil,
3189bf215546Sopenharmony_ci              enum pipe_format src_format,
3190bf215546Sopenharmony_ci              enum pipe_format dst_format,
3191bf215546Sopenharmony_ci              const VkRect2D *render_area,
3192bf215546Sopenharmony_ci              uint32_t gmem_offset,
3193bf215546Sopenharmony_ci              uint32_t cpp)
3194bf215546Sopenharmony_ci{
3195bf215546Sopenharmony_ci   /* RB_BIN_CONTROL/GRAS_BIN_CONTROL are normally only set once and they
3196bf215546Sopenharmony_ci    * aren't set until we know whether we're HW binning or not, and we want to
3197bf215546Sopenharmony_ci    * avoid a dependence on that here to be able to store attachments before
3198bf215546Sopenharmony_ci    * the end of the renderpass in the future. Use the scratch space to
3199bf215546Sopenharmony_ci    * save/restore them dynamically.
3200bf215546Sopenharmony_ci    */
3201bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1);
3202bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A6XX_RB_BIN_CONTROL) |
3203bf215546Sopenharmony_ci                  CP_REG_TO_SCRATCH_0_SCRATCH(0) |
3204bf215546Sopenharmony_ci                  CP_REG_TO_SCRATCH_0_CNT(1 - 1));
3205bf215546Sopenharmony_ci
3206bf215546Sopenharmony_ci   r3d_setup(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
3207bf215546Sopenharmony_ci             iview->view.ubwc_enabled, dst_samples);
3208bf215546Sopenharmony_ci
3209bf215546Sopenharmony_ci   r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
3210bf215546Sopenharmony_ci
3211bf215546Sopenharmony_ci   if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
3212bf215546Sopenharmony_ci      if (!separate_stencil) {
3213bf215546Sopenharmony_ci         r3d_dst_depth(cs, iview, 0);
3214bf215546Sopenharmony_ci      } else {
3215bf215546Sopenharmony_ci         r3d_dst_stencil(cs, iview, 0);
3216bf215546Sopenharmony_ci      }
3217bf215546Sopenharmony_ci   } else {
3218bf215546Sopenharmony_ci      r3d_dst(cs, &iview->view, 0, src_format);
3219bf215546Sopenharmony_ci   }
3220bf215546Sopenharmony_ci
3221bf215546Sopenharmony_ci   r3d_src_gmem(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp);
3222bf215546Sopenharmony_ci
3223bf215546Sopenharmony_ci   /* sync GMEM writes with CACHE. */
3224bf215546Sopenharmony_ci   tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
3225bf215546Sopenharmony_ci
3226bf215546Sopenharmony_ci   /* Wait for CACHE_INVALIDATE to land */
3227bf215546Sopenharmony_ci   tu_cs_emit_wfi(cs);
3228bf215546Sopenharmony_ci
3229bf215546Sopenharmony_ci   r3d_run(cmd, cs);
3230bf215546Sopenharmony_ci
3231bf215546Sopenharmony_ci   r3d_teardown(cmd, cs);
3232bf215546Sopenharmony_ci
3233bf215546Sopenharmony_ci   /* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
3234bf215546Sopenharmony_ci    * sysmem, and we generally assume that GMEM renderpasses leave their
3235bf215546Sopenharmony_ci    * results in sysmem, so we need to flush manually here. The 3d blit path
3236bf215546Sopenharmony_ci    * writes to depth images as a color RT, so there's no need to flush depth.
3237bf215546Sopenharmony_ci    */
3238bf215546Sopenharmony_ci   tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
3239bf215546Sopenharmony_ci
3240bf215546Sopenharmony_ci   /* Restore RB_BIN_CONTROL/GRAS_BIN_CONTROL saved above. */
3241bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
3242bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_RB_BIN_CONTROL) |
3243bf215546Sopenharmony_ci                  CP_SCRATCH_TO_REG_0_SCRATCH(0) |
3244bf215546Sopenharmony_ci                  CP_SCRATCH_TO_REG_0_CNT(1 - 1));
3245bf215546Sopenharmony_ci
3246bf215546Sopenharmony_ci   tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1);
3247bf215546Sopenharmony_ci   tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_GRAS_BIN_CONTROL) |
3248bf215546Sopenharmony_ci                  CP_SCRATCH_TO_REG_0_SCRATCH(0) |
3249bf215546Sopenharmony_ci                  CP_SCRATCH_TO_REG_0_CNT(1 - 1));
3250bf215546Sopenharmony_ci}
3251bf215546Sopenharmony_ci
3252bf215546Sopenharmony_cistatic bool
3253bf215546Sopenharmony_citu_attachment_store_unaligned(struct tu_cmd_buffer *cmd, uint32_t a)
3254bf215546Sopenharmony_ci{
3255bf215546Sopenharmony_ci   struct tu_physical_device *phys_dev = cmd->device->physical_device;
3256bf215546Sopenharmony_ci   const struct tu_image_view *iview = cmd->state.attachments[a];
3257bf215546Sopenharmony_ci   const VkRect2D *render_area = &cmd->state.render_area;
3258bf215546Sopenharmony_ci
3259bf215546Sopenharmony_ci   /* Unaligned store is incredibly rare in CTS, we have to force it to test. */
3260bf215546Sopenharmony_ci   if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_UNALIGNED_STORE))
3261bf215546Sopenharmony_ci      return true;
3262bf215546Sopenharmony_ci
3263bf215546Sopenharmony_ci   uint32_t x1 = render_area->offset.x;
3264bf215546Sopenharmony_ci   uint32_t y1 = render_area->offset.y;
3265bf215546Sopenharmony_ci   uint32_t x2 = x1 + render_area->extent.width;
3266bf215546Sopenharmony_ci   uint32_t y2 = y1 + render_area->extent.height;
3267bf215546Sopenharmony_ci   /* x2/y2 can be unaligned if equal to the size of the image, since it will
3268bf215546Sopenharmony_ci    * write into padding space. The one exception is linear levels which don't
3269bf215546Sopenharmony_ci    * have the required y padding in the layout (except for the last level)
3270bf215546Sopenharmony_ci    */
3271bf215546Sopenharmony_ci   bool need_y2_align =
3272bf215546Sopenharmony_ci      y2 != iview->view.height || iview->view.need_y2_align;
3273bf215546Sopenharmony_ci
3274bf215546Sopenharmony_ci   return (x1 % phys_dev->info->gmem_align_w ||
3275bf215546Sopenharmony_ci           (x2 % phys_dev->info->gmem_align_w && x2 != iview->view.width) ||
3276bf215546Sopenharmony_ci           y1 % phys_dev->info->gmem_align_h ||
3277bf215546Sopenharmony_ci           (y2 % phys_dev->info->gmem_align_h && need_y2_align));
3278bf215546Sopenharmony_ci}
3279bf215546Sopenharmony_ci
3280bf215546Sopenharmony_ci/* Choose the GMEM layout (use the CCU space or not) based on whether the
3281bf215546Sopenharmony_ci * current attachments will need.  This has to happen at vkBeginRenderPass()
3282bf215546Sopenharmony_ci * time because tu_attachment_store_unaligned() looks at the image views, which
3283bf215546Sopenharmony_ci * are only available at that point.  This should match the logic for the
3284bf215546Sopenharmony_ci * !unaligned case in tu_store_gmem_attachment().
3285bf215546Sopenharmony_ci */
3286bf215546Sopenharmony_civoid
3287bf215546Sopenharmony_citu_choose_gmem_layout(struct tu_cmd_buffer *cmd)
3288bf215546Sopenharmony_ci{
3289bf215546Sopenharmony_ci   cmd->state.gmem_layout = TU_GMEM_LAYOUT_FULL;
3290bf215546Sopenharmony_ci
3291bf215546Sopenharmony_ci   for (unsigned i = 0; i < cmd->state.pass->attachment_count; i++) {
3292bf215546Sopenharmony_ci      if (!cmd->state.attachments[i])
3293bf215546Sopenharmony_ci         continue;
3294bf215546Sopenharmony_ci
3295bf215546Sopenharmony_ci      struct tu_render_pass_attachment *att =
3296bf215546Sopenharmony_ci         &cmd->state.pass->attachments[i];
3297bf215546Sopenharmony_ci      if ((att->store || att->store_stencil) &&
3298bf215546Sopenharmony_ci          tu_attachment_store_unaligned(cmd, i))
3299bf215546Sopenharmony_ci         cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU;
3300bf215546Sopenharmony_ci      if (att->will_be_resolved && !blit_can_resolve(att->format))
3301bf215546Sopenharmony_ci         cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU;
3302bf215546Sopenharmony_ci   }
3303bf215546Sopenharmony_ci
3304bf215546Sopenharmony_ci   cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout];
3305bf215546Sopenharmony_ci}
3306bf215546Sopenharmony_ci
3307bf215546Sopenharmony_civoid
3308bf215546Sopenharmony_citu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
3309bf215546Sopenharmony_ci                         struct tu_cs *cs,
3310bf215546Sopenharmony_ci                         uint32_t a,
3311bf215546Sopenharmony_ci                         uint32_t gmem_a,
3312bf215546Sopenharmony_ci                         bool cond_exec_allowed)
3313bf215546Sopenharmony_ci{
3314bf215546Sopenharmony_ci   const VkRect2D *render_area = &cmd->state.render_area;
3315bf215546Sopenharmony_ci   struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
3316bf215546Sopenharmony_ci   const struct tu_image_view *iview = cmd->state.attachments[a];
3317bf215546Sopenharmony_ci   struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
3318bf215546Sopenharmony_ci
3319bf215546Sopenharmony_ci   if (!dst->store && !dst->store_stencil)
3320bf215546Sopenharmony_ci      return;
3321bf215546Sopenharmony_ci
3322bf215546Sopenharmony_ci   /* Unconditional store should happen only if attachment was cleared,
3323bf215546Sopenharmony_ci    * which could have happened either by load_op or via vkCmdClearAttachments.
3324bf215546Sopenharmony_ci    */
3325bf215546Sopenharmony_ci   bool cond_exec = cond_exec_allowed && src->cond_store_allowed;
3326bf215546Sopenharmony_ci   if (cond_exec) {
3327bf215546Sopenharmony_ci      tu_begin_load_store_cond_exec(cmd, cs, false);
3328bf215546Sopenharmony_ci   }
3329bf215546Sopenharmony_ci
3330bf215546Sopenharmony_ci   bool unaligned = tu_attachment_store_unaligned(cmd, a);
3331bf215546Sopenharmony_ci
3332bf215546Sopenharmony_ci   /* D32_SFLOAT_S8_UINT is quite special format: it has two planes,
3333bf215546Sopenharmony_ci    * one for depth and other for stencil. When resolving a MSAA
3334bf215546Sopenharmony_ci    * D32_SFLOAT_S8_UINT to S8_UINT, we need to take that into account.
3335bf215546Sopenharmony_ci    */
3336bf215546Sopenharmony_ci   bool resolve_d32s8_s8 =
3337bf215546Sopenharmony_ci      src->format == VK_FORMAT_D32_SFLOAT_S8_UINT &&
3338bf215546Sopenharmony_ci      dst->format == VK_FORMAT_S8_UINT;
3339bf215546Sopenharmony_ci
3340bf215546Sopenharmony_ci   /* The fast path doesn't support picking out the last component of a D24S8
3341bf215546Sopenharmony_ci    * texture reinterpreted as RGBA8_UNORM.
3342bf215546Sopenharmony_ci    */
3343bf215546Sopenharmony_ci   bool resolve_d24s8_s8 =
3344bf215546Sopenharmony_ci      src->format == VK_FORMAT_D24_UNORM_S8_UINT &&
3345bf215546Sopenharmony_ci      dst->format == VK_FORMAT_S8_UINT;
3346bf215546Sopenharmony_ci
3347bf215546Sopenharmony_ci   bool store_common = dst->store && !resolve_d32s8_s8;
3348bf215546Sopenharmony_ci   bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8;
3349bf215546Sopenharmony_ci
3350bf215546Sopenharmony_ci   trace_start_gmem_store(&cmd->trace, cs);
3351bf215546Sopenharmony_ci
3352bf215546Sopenharmony_ci   /* use fast path when render area is aligned, except for unsupported resolve cases */
3353bf215546Sopenharmony_ci   if (!unaligned && !resolve_d24s8_s8 &&
3354bf215546Sopenharmony_ci       (a == gmem_a || blit_can_resolve(dst->format))) {
3355bf215546Sopenharmony_ci      if (store_common)
3356bf215546Sopenharmony_ci         tu_emit_blit(cmd, cs, iview, src, true, false);
3357bf215546Sopenharmony_ci      if (store_separate_stencil)
3358bf215546Sopenharmony_ci         tu_emit_blit(cmd, cs, iview, src, true, true);
3359bf215546Sopenharmony_ci
3360bf215546Sopenharmony_ci      if (cond_exec) {
3361bf215546Sopenharmony_ci         tu_end_load_store_cond_exec(cmd, cs, false);
3362bf215546Sopenharmony_ci      }
3363bf215546Sopenharmony_ci
3364bf215546Sopenharmony_ci      trace_end_gmem_store(&cmd->trace, cs, dst->format, true, false);
3365bf215546Sopenharmony_ci      return;
3366bf215546Sopenharmony_ci   }
3367bf215546Sopenharmony_ci
3368bf215546Sopenharmony_ci   assert(cmd->state.gmem_layout == TU_GMEM_LAYOUT_AVOID_CCU);
3369bf215546Sopenharmony_ci
3370bf215546Sopenharmony_ci   enum pipe_format src_format = tu_vk_format_to_pipe_format(src->format);
3371bf215546Sopenharmony_ci   if (src_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
3372bf215546Sopenharmony_ci      src_format = PIPE_FORMAT_Z32_FLOAT;
3373bf215546Sopenharmony_ci
3374bf215546Sopenharmony_ci   enum pipe_format dst_format = tu_vk_format_to_pipe_format(dst->format);
3375bf215546Sopenharmony_ci   if (dst_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
3376bf215546Sopenharmony_ci      dst_format = PIPE_FORMAT_Z32_FLOAT;
3377bf215546Sopenharmony_ci
3378bf215546Sopenharmony_ci   if (dst->samples > 1) {
3379bf215546Sopenharmony_ci      /* If we hit this path, we have to disable draw states after every tile
3380bf215546Sopenharmony_ci       * instead of once at the end of the renderpass, so that they aren't
3381bf215546Sopenharmony_ci       * executed when calling CP_DRAW.
3382bf215546Sopenharmony_ci       *
3383bf215546Sopenharmony_ci       * TODO: store a flag somewhere so we don't do this more than once and
3384bf215546Sopenharmony_ci       * don't do it after the renderpass when this happens.
3385bf215546Sopenharmony_ci       */
3386bf215546Sopenharmony_ci      if (store_common || store_separate_stencil)
3387bf215546Sopenharmony_ci         tu_disable_draw_states(cmd, cs);
3388bf215546Sopenharmony_ci
3389bf215546Sopenharmony_ci      if (store_common) {
3390bf215546Sopenharmony_ci         store_3d_blit(cmd, cs, iview, dst->samples, false, src_format,
3391bf215546Sopenharmony_ci                       dst_format, render_area, tu_attachment_gmem_offset(cmd, src), src->cpp);
3392bf215546Sopenharmony_ci      }
3393bf215546Sopenharmony_ci      if (store_separate_stencil) {
3394bf215546Sopenharmony_ci         store_3d_blit(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT,
3395bf215546Sopenharmony_ci                       PIPE_FORMAT_S8_UINT, render_area,
3396bf215546Sopenharmony_ci                       tu_attachment_gmem_offset_stencil(cmd, src), src->samples);
3397bf215546Sopenharmony_ci      }
3398bf215546Sopenharmony_ci   } else {
3399bf215546Sopenharmony_ci      r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
3400bf215546Sopenharmony_ci
3401bf215546Sopenharmony_ci      if (store_common) {
3402bf215546Sopenharmony_ci         store_cp_blit(cmd, cs, iview, src->samples, false, src_format,
3403bf215546Sopenharmony_ci                       dst_format, tu_attachment_gmem_offset(cmd, src), src->cpp);
3404bf215546Sopenharmony_ci      }
3405bf215546Sopenharmony_ci      if (store_separate_stencil) {
3406bf215546Sopenharmony_ci         store_cp_blit(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT,
3407bf215546Sopenharmony_ci                       PIPE_FORMAT_S8_UINT, tu_attachment_gmem_offset_stencil(cmd, src), src->samples);
3408bf215546Sopenharmony_ci      }
3409bf215546Sopenharmony_ci   }
3410bf215546Sopenharmony_ci
3411bf215546Sopenharmony_ci   if (cond_exec) {
3412bf215546Sopenharmony_ci      tu_end_load_store_cond_exec(cmd, cs, false);
3413bf215546Sopenharmony_ci   }
3414bf215546Sopenharmony_ci
3415bf215546Sopenharmony_ci   trace_end_gmem_store(&cmd->trace, cs, dst->format, false, unaligned);
3416bf215546Sopenharmony_ci}
3417