1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Raspberry Pi Ltd
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "v3dv_private.h"
25bf215546Sopenharmony_ci#include "v3dv_meta_common.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci#include "broadcom/common/v3d_macros.h"
28bf215546Sopenharmony_ci#include "broadcom/common/v3d_tfu.h"
29bf215546Sopenharmony_ci#include "broadcom/cle/v3dx_pack.h"
30bf215546Sopenharmony_ci#include "broadcom/compiler/v3d_compiler.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_cistruct rcl_clear_info {
33bf215546Sopenharmony_ci   const union v3dv_clear_value *clear_value;
34bf215546Sopenharmony_ci   struct v3dv_image *image;
35bf215546Sopenharmony_ci   VkImageAspectFlags aspects;
36bf215546Sopenharmony_ci   uint32_t level;
37bf215546Sopenharmony_ci};
38bf215546Sopenharmony_ci
39bf215546Sopenharmony_cistatic struct v3dv_cl *
40bf215546Sopenharmony_ciemit_rcl_prologue(struct v3dv_job *job,
41bf215546Sopenharmony_ci                  struct v3dv_meta_framebuffer *fb,
42bf215546Sopenharmony_ci                  const struct rcl_clear_info *clear_info)
43bf215546Sopenharmony_ci{
44bf215546Sopenharmony_ci   const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci   struct v3dv_cl *rcl = &job->rcl;
47bf215546Sopenharmony_ci   v3dv_cl_ensure_space_with_branch(rcl, 200 +
48bf215546Sopenharmony_ci                                    tiling->layers * 256 *
49bf215546Sopenharmony_ci                                    cl_packet_length(SUPERTILE_COORDINATES));
50bf215546Sopenharmony_ci   if (job->cmd_buffer->state.oom)
51bf215546Sopenharmony_ci      return NULL;
52bf215546Sopenharmony_ci
53bf215546Sopenharmony_ci   assert(!tiling->msaa || !tiling->double_buffer);
54bf215546Sopenharmony_ci   cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
55bf215546Sopenharmony_ci      config.early_z_disable = true;
56bf215546Sopenharmony_ci      config.image_width_pixels = tiling->width;
57bf215546Sopenharmony_ci      config.image_height_pixels = tiling->height;
58bf215546Sopenharmony_ci      config.number_of_render_targets = 1;
59bf215546Sopenharmony_ci      config.multisample_mode_4x = tiling->msaa;
60bf215546Sopenharmony_ci      config.double_buffer_in_non_ms_mode = tiling->double_buffer;
61bf215546Sopenharmony_ci      config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
62bf215546Sopenharmony_ci      config.internal_depth_type = fb->internal_depth_type;
63bf215546Sopenharmony_ci   }
64bf215546Sopenharmony_ci
65bf215546Sopenharmony_ci   if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
66bf215546Sopenharmony_ci      uint32_t clear_pad = 0;
67bf215546Sopenharmony_ci      if (clear_info->image) {
68bf215546Sopenharmony_ci         const struct v3dv_image *image = clear_info->image;
69bf215546Sopenharmony_ci         const struct v3d_resource_slice *slice =
70bf215546Sopenharmony_ci            &image->slices[clear_info->level];
71bf215546Sopenharmony_ci         if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
72bf215546Sopenharmony_ci             slice->tiling == V3D_TILING_UIF_XOR) {
73bf215546Sopenharmony_ci            int uif_block_height = v3d_utile_height(image->cpp) * 2;
74bf215546Sopenharmony_ci
75bf215546Sopenharmony_ci            uint32_t implicit_padded_height =
76bf215546Sopenharmony_ci               align(tiling->height, uif_block_height) / uif_block_height;
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci            if (slice->padded_height_of_output_image_in_uif_blocks -
79bf215546Sopenharmony_ci                implicit_padded_height >= 15) {
80bf215546Sopenharmony_ci               clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
81bf215546Sopenharmony_ci            }
82bf215546Sopenharmony_ci         }
83bf215546Sopenharmony_ci      }
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci      const uint32_t *color = &clear_info->clear_value->color[0];
86bf215546Sopenharmony_ci      cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
87bf215546Sopenharmony_ci         clear.clear_color_low_32_bits = color[0];
88bf215546Sopenharmony_ci         clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
89bf215546Sopenharmony_ci         clear.render_target_number = 0;
90bf215546Sopenharmony_ci      };
91bf215546Sopenharmony_ci
92bf215546Sopenharmony_ci      if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
93bf215546Sopenharmony_ci         cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
94bf215546Sopenharmony_ci            clear.clear_color_mid_low_32_bits =
95bf215546Sopenharmony_ci              ((color[1] >> 24) | (color[2] << 8));
96bf215546Sopenharmony_ci            clear.clear_color_mid_high_24_bits =
97bf215546Sopenharmony_ci              ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
98bf215546Sopenharmony_ci            clear.render_target_number = 0;
99bf215546Sopenharmony_ci         };
100bf215546Sopenharmony_ci      }
101bf215546Sopenharmony_ci
102bf215546Sopenharmony_ci      if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
103bf215546Sopenharmony_ci         cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
104bf215546Sopenharmony_ci            clear.uif_padded_height_in_uif_blocks = clear_pad;
105bf215546Sopenharmony_ci            clear.clear_color_high_16_bits = color[3] >> 16;
106bf215546Sopenharmony_ci            clear.render_target_number = 0;
107bf215546Sopenharmony_ci         };
108bf215546Sopenharmony_ci      }
109bf215546Sopenharmony_ci   }
110bf215546Sopenharmony_ci
111bf215546Sopenharmony_ci   cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
112bf215546Sopenharmony_ci      rt.render_target_0_internal_bpp = tiling->internal_bpp;
113bf215546Sopenharmony_ci      rt.render_target_0_internal_type = fb->internal_type;
114bf215546Sopenharmony_ci      rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
115bf215546Sopenharmony_ci   }
116bf215546Sopenharmony_ci
117bf215546Sopenharmony_ci   cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
118bf215546Sopenharmony_ci      clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
119bf215546Sopenharmony_ci      clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
120bf215546Sopenharmony_ci   };
121bf215546Sopenharmony_ci
122bf215546Sopenharmony_ci   cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
123bf215546Sopenharmony_ci      init.use_auto_chained_tile_lists = true;
124bf215546Sopenharmony_ci      init.size_of_first_block_in_chained_tile_lists =
125bf215546Sopenharmony_ci         TILE_ALLOCATION_BLOCK_SIZE_64B;
126bf215546Sopenharmony_ci   }
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci   return rcl;
129bf215546Sopenharmony_ci}
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_cistatic void
132bf215546Sopenharmony_ciemit_frame_setup(struct v3dv_job *job,
133bf215546Sopenharmony_ci                 uint32_t min_layer,
134bf215546Sopenharmony_ci                 const union v3dv_clear_value *clear_value)
135bf215546Sopenharmony_ci{
136bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
137bf215546Sopenharmony_ci
138bf215546Sopenharmony_ci   const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci   struct v3dv_cl *rcl = &job->rcl;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci   const uint32_t tile_alloc_offset =
143bf215546Sopenharmony_ci      64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
144bf215546Sopenharmony_ci   cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
145bf215546Sopenharmony_ci      list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
146bf215546Sopenharmony_ci   }
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
149bf215546Sopenharmony_ci      config.number_of_bin_tile_lists = 1;
150bf215546Sopenharmony_ci      config.total_frame_width_in_tiles = tiling->draw_tiles_x;
151bf215546Sopenharmony_ci      config.total_frame_height_in_tiles = tiling->draw_tiles_y;
152bf215546Sopenharmony_ci
153bf215546Sopenharmony_ci      config.supertile_width_in_tiles = tiling->supertile_width;
154bf215546Sopenharmony_ci      config.supertile_height_in_tiles = tiling->supertile_height;
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci      config.total_frame_width_in_supertiles =
157bf215546Sopenharmony_ci         tiling->frame_width_in_supertiles;
158bf215546Sopenharmony_ci      config.total_frame_height_in_supertiles =
159bf215546Sopenharmony_ci         tiling->frame_height_in_supertiles;
160bf215546Sopenharmony_ci   }
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci   /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
163bf215546Sopenharmony_ci    * it here.
164bf215546Sopenharmony_ci    */
165bf215546Sopenharmony_ci   for (int i = 0; i < 2; i++) {
166bf215546Sopenharmony_ci      cl_emit(rcl, TILE_COORDINATES, coords);
167bf215546Sopenharmony_ci      cl_emit(rcl, END_OF_LOADS, end);
168bf215546Sopenharmony_ci      cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
169bf215546Sopenharmony_ci         store.buffer_to_store = NONE;
170bf215546Sopenharmony_ci      }
171bf215546Sopenharmony_ci      /* When using double-buffering, we need to clear both buffers (unless
172bf215546Sopenharmony_ci       * we only have a single tile to render).
173bf215546Sopenharmony_ci       */
174bf215546Sopenharmony_ci      if (clear_value &&
175bf215546Sopenharmony_ci          (i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
176bf215546Sopenharmony_ci         cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
177bf215546Sopenharmony_ci            clear.clear_z_stencil_buffer = true;
178bf215546Sopenharmony_ci            clear.clear_all_render_targets = true;
179bf215546Sopenharmony_ci         }
180bf215546Sopenharmony_ci      }
181bf215546Sopenharmony_ci      cl_emit(rcl, END_OF_TILE_MARKER, end);
182bf215546Sopenharmony_ci   }
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci   cl_emit(rcl, FLUSH_VCD_CACHE, flush);
185bf215546Sopenharmony_ci}
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_cistatic void
188bf215546Sopenharmony_ciemit_supertile_coordinates(struct v3dv_job *job,
189bf215546Sopenharmony_ci                           struct v3dv_meta_framebuffer *framebuffer)
190bf215546Sopenharmony_ci{
191bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci   struct v3dv_cl *rcl = &job->rcl;
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_ci   const uint32_t min_y = framebuffer->min_y_supertile;
196bf215546Sopenharmony_ci   const uint32_t max_y = framebuffer->max_y_supertile;
197bf215546Sopenharmony_ci   const uint32_t min_x = framebuffer->min_x_supertile;
198bf215546Sopenharmony_ci   const uint32_t max_x = framebuffer->max_x_supertile;
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   for (int y = min_y; y <= max_y; y++) {
201bf215546Sopenharmony_ci      for (int x = min_x; x <= max_x; x++) {
202bf215546Sopenharmony_ci         cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
203bf215546Sopenharmony_ci            coords.column_number_in_supertiles = x;
204bf215546Sopenharmony_ci            coords.row_number_in_supertiles = y;
205bf215546Sopenharmony_ci         }
206bf215546Sopenharmony_ci      }
207bf215546Sopenharmony_ci   }
208bf215546Sopenharmony_ci}
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_cistatic void
211bf215546Sopenharmony_ciemit_linear_load(struct v3dv_cl *cl,
212bf215546Sopenharmony_ci                 uint32_t buffer,
213bf215546Sopenharmony_ci                 struct v3dv_bo *bo,
214bf215546Sopenharmony_ci                 uint32_t offset,
215bf215546Sopenharmony_ci                 uint32_t stride,
216bf215546Sopenharmony_ci                 uint32_t format)
217bf215546Sopenharmony_ci{
218bf215546Sopenharmony_ci   cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
219bf215546Sopenharmony_ci      load.buffer_to_load = buffer;
220bf215546Sopenharmony_ci      load.address = v3dv_cl_address(bo, offset);
221bf215546Sopenharmony_ci      load.input_image_format = format;
222bf215546Sopenharmony_ci      load.memory_format = V3D_TILING_RASTER;
223bf215546Sopenharmony_ci      load.height_in_ub_or_stride = stride;
224bf215546Sopenharmony_ci      load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
225bf215546Sopenharmony_ci   }
226bf215546Sopenharmony_ci}
227bf215546Sopenharmony_ci
228bf215546Sopenharmony_cistatic void
229bf215546Sopenharmony_ciemit_linear_store(struct v3dv_cl *cl,
230bf215546Sopenharmony_ci                  uint32_t buffer,
231bf215546Sopenharmony_ci                  struct v3dv_bo *bo,
232bf215546Sopenharmony_ci                  uint32_t offset,
233bf215546Sopenharmony_ci                  uint32_t stride,
234bf215546Sopenharmony_ci                  bool msaa,
235bf215546Sopenharmony_ci                  uint32_t format)
236bf215546Sopenharmony_ci{
237bf215546Sopenharmony_ci   cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
238bf215546Sopenharmony_ci      store.buffer_to_store = RENDER_TARGET_0;
239bf215546Sopenharmony_ci      store.address = v3dv_cl_address(bo, offset);
240bf215546Sopenharmony_ci      store.clear_buffer_being_stored = false;
241bf215546Sopenharmony_ci      store.output_image_format = format;
242bf215546Sopenharmony_ci      store.memory_format = V3D_TILING_RASTER;
243bf215546Sopenharmony_ci      store.height_in_ub_or_stride = stride;
244bf215546Sopenharmony_ci      store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
245bf215546Sopenharmony_ci                                   V3D_DECIMATE_MODE_SAMPLE_0;
246bf215546Sopenharmony_ci   }
247bf215546Sopenharmony_ci}
248bf215546Sopenharmony_ci
249bf215546Sopenharmony_ci/* This chooses a tile buffer format that is appropriate for the copy operation.
250bf215546Sopenharmony_ci * Typically, this is the image render target type, however, if we are copying
251bf215546Sopenharmony_ci * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
252bf215546Sopenharmony_ci * we need to load and store to/from a tile color buffer using a compatible
253bf215546Sopenharmony_ci * color format.
254bf215546Sopenharmony_ci */
255bf215546Sopenharmony_cistatic uint32_t
256bf215546Sopenharmony_cichoose_tlb_format(struct v3dv_meta_framebuffer *framebuffer,
257bf215546Sopenharmony_ci                  VkImageAspectFlags aspect,
258bf215546Sopenharmony_ci                  bool for_store,
259bf215546Sopenharmony_ci                  bool is_copy_to_buffer,
260bf215546Sopenharmony_ci                  bool is_copy_from_buffer)
261bf215546Sopenharmony_ci{
262bf215546Sopenharmony_ci   if (is_copy_to_buffer || is_copy_from_buffer) {
263bf215546Sopenharmony_ci      switch (framebuffer->vk_format) {
264bf215546Sopenharmony_ci      case VK_FORMAT_D16_UNORM:
265bf215546Sopenharmony_ci         return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
266bf215546Sopenharmony_ci      case VK_FORMAT_D32_SFLOAT:
267bf215546Sopenharmony_ci         return V3D_OUTPUT_IMAGE_FORMAT_R32F;
268bf215546Sopenharmony_ci      case VK_FORMAT_X8_D24_UNORM_PACK32:
269bf215546Sopenharmony_ci         return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
270bf215546Sopenharmony_ci      case VK_FORMAT_D24_UNORM_S8_UINT:
271bf215546Sopenharmony_ci         /* When storing the stencil aspect of a combined depth/stencil image
272bf215546Sopenharmony_ci          * to a buffer, the Vulkan spec states that the output buffer must
273bf215546Sopenharmony_ci          * have packed stencil values, so we choose an R8UI format for our
274bf215546Sopenharmony_ci          * store outputs. For the load input we still want RGBA8UI since the
275bf215546Sopenharmony_ci          * source image contains 4 channels (including the 3 channels
276bf215546Sopenharmony_ci          * containing the 24-bit depth value).
277bf215546Sopenharmony_ci          *
278bf215546Sopenharmony_ci          * When loading the stencil aspect of a combined depth/stencil image
279bf215546Sopenharmony_ci          * from a buffer, we read packed 8-bit stencil values from the buffer
280bf215546Sopenharmony_ci          * that we need to put into the LSB of the 32-bit format (the R
281bf215546Sopenharmony_ci          * channel), so we use R8UI. For the store, if we used R8UI then we
282bf215546Sopenharmony_ci          * would write 8-bit stencil values consecutively over depth channels,
283bf215546Sopenharmony_ci          * so we need to use RGBA8UI. This will write each stencil value in
284bf215546Sopenharmony_ci          * its correct position, but will overwrite depth values (channels G
285bf215546Sopenharmony_ci          * B,A) with undefined values. To fix this,  we will have to restore
286bf215546Sopenharmony_ci          * the depth aspect from the Z tile buffer, which we should pre-load
287bf215546Sopenharmony_ci          * from the image before the store).
288bf215546Sopenharmony_ci          */
289bf215546Sopenharmony_ci         if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
290bf215546Sopenharmony_ci            return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
291bf215546Sopenharmony_ci         } else {
292bf215546Sopenharmony_ci            assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
293bf215546Sopenharmony_ci            if (is_copy_to_buffer) {
294bf215546Sopenharmony_ci               return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
295bf215546Sopenharmony_ci                                  V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
296bf215546Sopenharmony_ci            } else {
297bf215546Sopenharmony_ci               assert(is_copy_from_buffer);
298bf215546Sopenharmony_ci               return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
299bf215546Sopenharmony_ci                                  V3D_OUTPUT_IMAGE_FORMAT_R8UI;
300bf215546Sopenharmony_ci            }
301bf215546Sopenharmony_ci         }
302bf215546Sopenharmony_ci      default: /* Color formats */
303bf215546Sopenharmony_ci         return framebuffer->format->rt_type;
304bf215546Sopenharmony_ci         break;
305bf215546Sopenharmony_ci      }
306bf215546Sopenharmony_ci   } else {
307bf215546Sopenharmony_ci      return framebuffer->format->rt_type;
308bf215546Sopenharmony_ci   }
309bf215546Sopenharmony_ci}
310bf215546Sopenharmony_ci
311bf215546Sopenharmony_cistatic inline bool
312bf215546Sopenharmony_ciformat_needs_rb_swap(struct v3dv_device *device,
313bf215546Sopenharmony_ci                     VkFormat format)
314bf215546Sopenharmony_ci{
315bf215546Sopenharmony_ci   const uint8_t *swizzle = v3dv_get_format_swizzle(device, format);
316bf215546Sopenharmony_ci   return v3dv_format_swizzle_needs_rb_swap(swizzle);
317bf215546Sopenharmony_ci}
318bf215546Sopenharmony_ci
319bf215546Sopenharmony_cistatic inline bool
320bf215546Sopenharmony_ciformat_needs_reverse(struct v3dv_device *device,
321bf215546Sopenharmony_ci                     VkFormat format)
322bf215546Sopenharmony_ci{
323bf215546Sopenharmony_ci   const uint8_t *swizzle = v3dv_get_format_swizzle(device, format);
324bf215546Sopenharmony_ci   return v3dv_format_swizzle_needs_reverse(swizzle);
325bf215546Sopenharmony_ci}
326bf215546Sopenharmony_ci
327bf215546Sopenharmony_cistatic void
328bf215546Sopenharmony_ciemit_image_load(struct v3dv_device *device,
329bf215546Sopenharmony_ci                struct v3dv_cl *cl,
330bf215546Sopenharmony_ci                struct v3dv_meta_framebuffer *framebuffer,
331bf215546Sopenharmony_ci                struct v3dv_image *image,
332bf215546Sopenharmony_ci                VkImageAspectFlags aspect,
333bf215546Sopenharmony_ci                uint32_t layer,
334bf215546Sopenharmony_ci                uint32_t mip_level,
335bf215546Sopenharmony_ci                bool is_copy_to_buffer,
336bf215546Sopenharmony_ci                bool is_copy_from_buffer)
337bf215546Sopenharmony_ci{
338bf215546Sopenharmony_ci   uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
339bf215546Sopenharmony_ci
340bf215546Sopenharmony_ci   /* For image to/from buffer copies we always load to and store from RT0,
341bf215546Sopenharmony_ci    * even for depth/stencil aspects, because the hardware can't do raster
342bf215546Sopenharmony_ci    * stores or loads from/to the depth/stencil tile buffers.
343bf215546Sopenharmony_ci    */
344bf215546Sopenharmony_ci   bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
345bf215546Sopenharmony_ci                            aspect == VK_IMAGE_ASPECT_COLOR_BIT;
346bf215546Sopenharmony_ci
347bf215546Sopenharmony_ci   const struct v3d_resource_slice *slice = &image->slices[mip_level];
348bf215546Sopenharmony_ci   cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
349bf215546Sopenharmony_ci      load.buffer_to_load = load_to_color_tlb ?
350bf215546Sopenharmony_ci         RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_ci      load.address = v3dv_cl_address(image->mem->bo, layer_offset);
353bf215546Sopenharmony_ci
354bf215546Sopenharmony_ci      load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
355bf215546Sopenharmony_ci                                                  is_copy_to_buffer,
356bf215546Sopenharmony_ci                                                  is_copy_from_buffer);
357bf215546Sopenharmony_ci      load.memory_format = slice->tiling;
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci      /* When copying depth/stencil images to a buffer, for D24 formats Vulkan
360bf215546Sopenharmony_ci       * expects the depth value in the LSB bits of each 32-bit pixel.
361bf215546Sopenharmony_ci       * Unfortunately, the hardware seems to put the S8/X8 bits there and the
362bf215546Sopenharmony_ci       * depth bits on the MSB. To work around that we can reverse the channel
363bf215546Sopenharmony_ci       * order and then swap the R/B channels to get what we want.
364bf215546Sopenharmony_ci       *
365bf215546Sopenharmony_ci       * NOTE: reversing and swapping only gets us the behavior we want if the
366bf215546Sopenharmony_ci       * operations happen in that exact order, which seems to be the case when
367bf215546Sopenharmony_ci       * done on the tile buffer load operations. On the store, it seems the
368bf215546Sopenharmony_ci       * order is not the same. The order on the store is probably reversed so
369bf215546Sopenharmony_ci       * that reversing and swapping on both the load and the store preserves
370bf215546Sopenharmony_ci       * the original order of the channels in memory.
371bf215546Sopenharmony_ci       *
372bf215546Sopenharmony_ci       * Notice that we only need to do this when copying to a buffer, where
373bf215546Sopenharmony_ci       * depth and stencil aspects are copied as separate regions and
374bf215546Sopenharmony_ci       * the spec expects them to be tightly packed.
375bf215546Sopenharmony_ci       */
376bf215546Sopenharmony_ci      bool needs_rb_swap = false;
377bf215546Sopenharmony_ci      bool needs_chan_reverse = false;
378bf215546Sopenharmony_ci      if (is_copy_to_buffer &&
379bf215546Sopenharmony_ci         (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
380bf215546Sopenharmony_ci          (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
381bf215546Sopenharmony_ci           (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
382bf215546Sopenharmony_ci         needs_rb_swap = true;
383bf215546Sopenharmony_ci         needs_chan_reverse = true;
384bf215546Sopenharmony_ci      } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
385bf215546Sopenharmony_ci                 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
386bf215546Sopenharmony_ci         /* This is not a raw data copy (i.e. we are clearing the image),
387bf215546Sopenharmony_ci          * so we need to make sure we respect the format swizzle.
388bf215546Sopenharmony_ci          */
389bf215546Sopenharmony_ci         needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
390bf215546Sopenharmony_ci         needs_chan_reverse = format_needs_reverse(device, framebuffer->vk_format);
391bf215546Sopenharmony_ci      }
392bf215546Sopenharmony_ci
393bf215546Sopenharmony_ci      load.r_b_swap = needs_rb_swap;
394bf215546Sopenharmony_ci      load.channel_reverse = needs_chan_reverse;
395bf215546Sopenharmony_ci
396bf215546Sopenharmony_ci      if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
397bf215546Sopenharmony_ci          slice->tiling == V3D_TILING_UIF_XOR) {
398bf215546Sopenharmony_ci         load.height_in_ub_or_stride =
399bf215546Sopenharmony_ci            slice->padded_height_of_output_image_in_uif_blocks;
400bf215546Sopenharmony_ci      } else if (slice->tiling == V3D_TILING_RASTER) {
401bf215546Sopenharmony_ci         load.height_in_ub_or_stride = slice->stride;
402bf215546Sopenharmony_ci      }
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci      if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
405bf215546Sopenharmony_ci         load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
406bf215546Sopenharmony_ci      else
407bf215546Sopenharmony_ci         load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
408bf215546Sopenharmony_ci   }
409bf215546Sopenharmony_ci}
410bf215546Sopenharmony_ci
411bf215546Sopenharmony_cistatic void
412bf215546Sopenharmony_ciemit_image_store(struct v3dv_device *device,
413bf215546Sopenharmony_ci                 struct v3dv_cl *cl,
414bf215546Sopenharmony_ci                 struct v3dv_meta_framebuffer *framebuffer,
415bf215546Sopenharmony_ci                 struct v3dv_image *image,
416bf215546Sopenharmony_ci                 VkImageAspectFlags aspect,
417bf215546Sopenharmony_ci                 uint32_t layer,
418bf215546Sopenharmony_ci                 uint32_t mip_level,
419bf215546Sopenharmony_ci                 bool is_copy_to_buffer,
420bf215546Sopenharmony_ci                 bool is_copy_from_buffer,
421bf215546Sopenharmony_ci                 bool is_multisample_resolve)
422bf215546Sopenharmony_ci{
423bf215546Sopenharmony_ci   uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
426bf215546Sopenharmony_ci                               aspect == VK_IMAGE_ASPECT_COLOR_BIT;
427bf215546Sopenharmony_ci
428bf215546Sopenharmony_ci   const struct v3d_resource_slice *slice = &image->slices[mip_level];
429bf215546Sopenharmony_ci   cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
430bf215546Sopenharmony_ci      store.buffer_to_store = store_from_color_tlb ?
431bf215546Sopenharmony_ci         RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
432bf215546Sopenharmony_ci
433bf215546Sopenharmony_ci      store.address = v3dv_cl_address(image->mem->bo, layer_offset);
434bf215546Sopenharmony_ci      store.clear_buffer_being_stored = false;
435bf215546Sopenharmony_ci
436bf215546Sopenharmony_ci      /* See rationale in emit_image_load() */
437bf215546Sopenharmony_ci      bool needs_rb_swap = false;
438bf215546Sopenharmony_ci      bool needs_chan_reverse = false;
439bf215546Sopenharmony_ci      if (is_copy_from_buffer &&
440bf215546Sopenharmony_ci         (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
441bf215546Sopenharmony_ci          (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
442bf215546Sopenharmony_ci           (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
443bf215546Sopenharmony_ci         needs_rb_swap = true;
444bf215546Sopenharmony_ci         needs_chan_reverse = true;
445bf215546Sopenharmony_ci      } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
446bf215546Sopenharmony_ci                 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
447bf215546Sopenharmony_ci         needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
448bf215546Sopenharmony_ci         needs_chan_reverse = format_needs_reverse(device, framebuffer->vk_format);
449bf215546Sopenharmony_ci      }
450bf215546Sopenharmony_ci
451bf215546Sopenharmony_ci      store.r_b_swap = needs_rb_swap;
452bf215546Sopenharmony_ci      store.channel_reverse = needs_chan_reverse;
453bf215546Sopenharmony_ci
454bf215546Sopenharmony_ci      store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
455bf215546Sopenharmony_ci                                                    is_copy_to_buffer,
456bf215546Sopenharmony_ci                                                    is_copy_from_buffer);
457bf215546Sopenharmony_ci      store.memory_format = slice->tiling;
458bf215546Sopenharmony_ci      if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
459bf215546Sopenharmony_ci          slice->tiling == V3D_TILING_UIF_XOR) {
460bf215546Sopenharmony_ci         store.height_in_ub_or_stride =
461bf215546Sopenharmony_ci            slice->padded_height_of_output_image_in_uif_blocks;
462bf215546Sopenharmony_ci      } else if (slice->tiling == V3D_TILING_RASTER) {
463bf215546Sopenharmony_ci         store.height_in_ub_or_stride = slice->stride;
464bf215546Sopenharmony_ci      }
465bf215546Sopenharmony_ci
466bf215546Sopenharmony_ci      if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
467bf215546Sopenharmony_ci         store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
468bf215546Sopenharmony_ci      else if (is_multisample_resolve)
469bf215546Sopenharmony_ci         store.decimate_mode = V3D_DECIMATE_MODE_4X;
470bf215546Sopenharmony_ci      else
471bf215546Sopenharmony_ci         store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
472bf215546Sopenharmony_ci   }
473bf215546Sopenharmony_ci}
474bf215546Sopenharmony_ci
475bf215546Sopenharmony_cistatic void
476bf215546Sopenharmony_ciemit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
477bf215546Sopenharmony_ci                                        struct v3dv_meta_framebuffer *framebuffer,
478bf215546Sopenharmony_ci                                        struct v3dv_buffer *buffer,
479bf215546Sopenharmony_ci                                        struct v3dv_image *image,
480bf215546Sopenharmony_ci                                        uint32_t layer_offset,
481bf215546Sopenharmony_ci                                        const VkBufferImageCopy2 *region)
482bf215546Sopenharmony_ci{
483bf215546Sopenharmony_ci   struct v3dv_cl *cl = &job->indirect;
484bf215546Sopenharmony_ci   v3dv_cl_ensure_space(cl, 200, 1);
485bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
486bf215546Sopenharmony_ci
487bf215546Sopenharmony_ci   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_ci   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
490bf215546Sopenharmony_ci
491bf215546Sopenharmony_ci   /* Load image to TLB */
492bf215546Sopenharmony_ci   assert((image->vk.image_type != VK_IMAGE_TYPE_3D &&
493bf215546Sopenharmony_ci           layer_offset < region->imageSubresource.layerCount) ||
494bf215546Sopenharmony_ci          layer_offset < image->vk.extent.depth);
495bf215546Sopenharmony_ci
496bf215546Sopenharmony_ci   const uint32_t image_layer = image->vk.image_type != VK_IMAGE_TYPE_3D ?
497bf215546Sopenharmony_ci      region->imageSubresource.baseArrayLayer + layer_offset :
498bf215546Sopenharmony_ci      region->imageOffset.z + layer_offset;
499bf215546Sopenharmony_ci
500bf215546Sopenharmony_ci   emit_image_load(job->device, cl, framebuffer, image,
501bf215546Sopenharmony_ci                   region->imageSubresource.aspectMask,
502bf215546Sopenharmony_ci                   image_layer,
503bf215546Sopenharmony_ci                   region->imageSubresource.mipLevel,
504bf215546Sopenharmony_ci                   true, false);
505bf215546Sopenharmony_ci
506bf215546Sopenharmony_ci   cl_emit(cl, END_OF_LOADS, end);
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_ci   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
509bf215546Sopenharmony_ci
510bf215546Sopenharmony_ci   /* Store TLB to buffer */
511bf215546Sopenharmony_ci   uint32_t width, height;
512bf215546Sopenharmony_ci   if (region->bufferRowLength == 0)
513bf215546Sopenharmony_ci      width = region->imageExtent.width;
514bf215546Sopenharmony_ci   else
515bf215546Sopenharmony_ci      width = region->bufferRowLength;
516bf215546Sopenharmony_ci
517bf215546Sopenharmony_ci   if (region->bufferImageHeight == 0)
518bf215546Sopenharmony_ci      height = region->imageExtent.height;
519bf215546Sopenharmony_ci   else
520bf215546Sopenharmony_ci      height = region->bufferImageHeight;
521bf215546Sopenharmony_ci
522bf215546Sopenharmony_ci   /* Handle copy from compressed format */
523bf215546Sopenharmony_ci   width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
524bf215546Sopenharmony_ci   height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
525bf215546Sopenharmony_ci
526bf215546Sopenharmony_ci   /* If we are storing stencil from a combined depth/stencil format the
527bf215546Sopenharmony_ci    * Vulkan spec states that the output buffer must have packed stencil
528bf215546Sopenharmony_ci    * values, where each stencil value is 1 byte.
529bf215546Sopenharmony_ci    */
530bf215546Sopenharmony_ci   uint32_t cpp =
531bf215546Sopenharmony_ci      region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
532bf215546Sopenharmony_ci         1 : image->cpp;
533bf215546Sopenharmony_ci   uint32_t buffer_stride = width * cpp;
534bf215546Sopenharmony_ci   uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
535bf215546Sopenharmony_ci                            height * buffer_stride * layer_offset;
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci   uint32_t format = choose_tlb_format(framebuffer,
538bf215546Sopenharmony_ci                                       region->imageSubresource.aspectMask,
539bf215546Sopenharmony_ci                                       true, true, false);
540bf215546Sopenharmony_ci   bool msaa = image->vk.samples > VK_SAMPLE_COUNT_1_BIT;
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_ci   emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
543bf215546Sopenharmony_ci                     buffer_offset, buffer_stride, msaa, format);
544bf215546Sopenharmony_ci
545bf215546Sopenharmony_ci   cl_emit(cl, END_OF_TILE_MARKER, end);
546bf215546Sopenharmony_ci
547bf215546Sopenharmony_ci   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
550bf215546Sopenharmony_ci      branch.start = tile_list_start;
551bf215546Sopenharmony_ci      branch.end = v3dv_cl_get_address(cl);
552bf215546Sopenharmony_ci   }
553bf215546Sopenharmony_ci}
554bf215546Sopenharmony_ci
555bf215546Sopenharmony_cistatic void
556bf215546Sopenharmony_ciemit_copy_layer_to_buffer(struct v3dv_job *job,
557bf215546Sopenharmony_ci                          struct v3dv_buffer *buffer,
558bf215546Sopenharmony_ci                          struct v3dv_image *image,
559bf215546Sopenharmony_ci                          struct v3dv_meta_framebuffer *framebuffer,
560bf215546Sopenharmony_ci                          uint32_t layer,
561bf215546Sopenharmony_ci                          const VkBufferImageCopy2 *region)
562bf215546Sopenharmony_ci{
563bf215546Sopenharmony_ci   emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
564bf215546Sopenharmony_ci                                           image, layer, region);
565bf215546Sopenharmony_ci   emit_supertile_coordinates(job, framebuffer);
566bf215546Sopenharmony_ci}
567bf215546Sopenharmony_ci
568bf215546Sopenharmony_civoid
569bf215546Sopenharmony_civ3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job,
570bf215546Sopenharmony_ci                                         struct v3dv_buffer *buffer,
571bf215546Sopenharmony_ci                                         struct v3dv_image *image,
572bf215546Sopenharmony_ci                                         struct v3dv_meta_framebuffer *framebuffer,
573bf215546Sopenharmony_ci                                         const VkBufferImageCopy2 *region)
574bf215546Sopenharmony_ci{
575bf215546Sopenharmony_ci   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
576bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci   emit_frame_setup(job, 0, NULL);
579bf215546Sopenharmony_ci   for (int layer = 0; layer < job->frame_tiling.layers; layer++)
580bf215546Sopenharmony_ci      emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
581bf215546Sopenharmony_ci   cl_emit(rcl, END_OF_RENDERING, end);
582bf215546Sopenharmony_ci}
583bf215546Sopenharmony_ci
584bf215546Sopenharmony_cistatic void
585bf215546Sopenharmony_ciemit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
586bf215546Sopenharmony_ci                                       struct v3dv_meta_framebuffer *framebuffer,
587bf215546Sopenharmony_ci                                       struct v3dv_image *dst,
588bf215546Sopenharmony_ci                                       struct v3dv_image *src,
589bf215546Sopenharmony_ci                                       uint32_t layer_offset,
590bf215546Sopenharmony_ci                                       const VkImageResolve2 *region)
591bf215546Sopenharmony_ci{
592bf215546Sopenharmony_ci   struct v3dv_cl *cl = &job->indirect;
593bf215546Sopenharmony_ci   v3dv_cl_ensure_space(cl, 200, 1);
594bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
595bf215546Sopenharmony_ci
596bf215546Sopenharmony_ci   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
597bf215546Sopenharmony_ci
598bf215546Sopenharmony_ci   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci   assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
601bf215546Sopenharmony_ci           layer_offset < region->srcSubresource.layerCount) ||
602bf215546Sopenharmony_ci          layer_offset < src->vk.extent.depth);
603bf215546Sopenharmony_ci
604bf215546Sopenharmony_ci   const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
605bf215546Sopenharmony_ci      region->srcSubresource.baseArrayLayer + layer_offset :
606bf215546Sopenharmony_ci      region->srcOffset.z + layer_offset;
607bf215546Sopenharmony_ci
608bf215546Sopenharmony_ci   emit_image_load(job->device, cl, framebuffer, src,
609bf215546Sopenharmony_ci                   region->srcSubresource.aspectMask,
610bf215546Sopenharmony_ci                   src_layer,
611bf215546Sopenharmony_ci                   region->srcSubresource.mipLevel,
612bf215546Sopenharmony_ci                   false, false);
613bf215546Sopenharmony_ci
614bf215546Sopenharmony_ci   cl_emit(cl, END_OF_LOADS, end);
615bf215546Sopenharmony_ci
616bf215546Sopenharmony_ci   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
617bf215546Sopenharmony_ci
618bf215546Sopenharmony_ci   assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
619bf215546Sopenharmony_ci           layer_offset < region->dstSubresource.layerCount) ||
620bf215546Sopenharmony_ci          layer_offset < dst->vk.extent.depth);
621bf215546Sopenharmony_ci
622bf215546Sopenharmony_ci   const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
623bf215546Sopenharmony_ci      region->dstSubresource.baseArrayLayer + layer_offset :
624bf215546Sopenharmony_ci      region->dstOffset.z + layer_offset;
625bf215546Sopenharmony_ci
626bf215546Sopenharmony_ci   emit_image_store(job->device, cl, framebuffer, dst,
627bf215546Sopenharmony_ci                    region->dstSubresource.aspectMask,
628bf215546Sopenharmony_ci                    dst_layer,
629bf215546Sopenharmony_ci                    region->dstSubresource.mipLevel,
630bf215546Sopenharmony_ci                    false, false, true);
631bf215546Sopenharmony_ci
632bf215546Sopenharmony_ci   cl_emit(cl, END_OF_TILE_MARKER, end);
633bf215546Sopenharmony_ci
634bf215546Sopenharmony_ci   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
635bf215546Sopenharmony_ci
636bf215546Sopenharmony_ci   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
637bf215546Sopenharmony_ci      branch.start = tile_list_start;
638bf215546Sopenharmony_ci      branch.end = v3dv_cl_get_address(cl);
639bf215546Sopenharmony_ci   }
640bf215546Sopenharmony_ci}
641bf215546Sopenharmony_ci
642bf215546Sopenharmony_cistatic void
643bf215546Sopenharmony_ciemit_resolve_image_layer(struct v3dv_job *job,
644bf215546Sopenharmony_ci                         struct v3dv_image *dst,
645bf215546Sopenharmony_ci                         struct v3dv_image *src,
646bf215546Sopenharmony_ci                         struct v3dv_meta_framebuffer *framebuffer,
647bf215546Sopenharmony_ci                         uint32_t layer,
648bf215546Sopenharmony_ci                         const VkImageResolve2 *region)
649bf215546Sopenharmony_ci{
650bf215546Sopenharmony_ci   emit_resolve_image_layer_per_tile_list(job, framebuffer,
651bf215546Sopenharmony_ci                                          dst, src, layer, region);
652bf215546Sopenharmony_ci   emit_supertile_coordinates(job, framebuffer);
653bf215546Sopenharmony_ci}
654bf215546Sopenharmony_ci
655bf215546Sopenharmony_civoid
656bf215546Sopenharmony_civ3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job,
657bf215546Sopenharmony_ci                                  struct v3dv_image *dst,
658bf215546Sopenharmony_ci                                  struct v3dv_image *src,
659bf215546Sopenharmony_ci                                  struct v3dv_meta_framebuffer *framebuffer,
660bf215546Sopenharmony_ci                                  const VkImageResolve2 *region)
661bf215546Sopenharmony_ci{
662bf215546Sopenharmony_ci   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
663bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
664bf215546Sopenharmony_ci
665bf215546Sopenharmony_ci   emit_frame_setup(job, 0, NULL);
666bf215546Sopenharmony_ci   for (int layer = 0; layer < job->frame_tiling.layers; layer++)
667bf215546Sopenharmony_ci      emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
668bf215546Sopenharmony_ci   cl_emit(rcl, END_OF_RENDERING, end);
669bf215546Sopenharmony_ci}
670bf215546Sopenharmony_ci
671bf215546Sopenharmony_cistatic void
672bf215546Sopenharmony_ciemit_copy_buffer_per_tile_list(struct v3dv_job *job,
673bf215546Sopenharmony_ci                               struct v3dv_bo *dst,
674bf215546Sopenharmony_ci                               struct v3dv_bo *src,
675bf215546Sopenharmony_ci                               uint32_t dst_offset,
676bf215546Sopenharmony_ci                               uint32_t src_offset,
677bf215546Sopenharmony_ci                               uint32_t stride,
678bf215546Sopenharmony_ci                               uint32_t format)
679bf215546Sopenharmony_ci{
680bf215546Sopenharmony_ci   struct v3dv_cl *cl = &job->indirect;
681bf215546Sopenharmony_ci   v3dv_cl_ensure_space(cl, 200, 1);
682bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
683bf215546Sopenharmony_ci
684bf215546Sopenharmony_ci   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
685bf215546Sopenharmony_ci
686bf215546Sopenharmony_ci   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
687bf215546Sopenharmony_ci
688bf215546Sopenharmony_ci   emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci   cl_emit(cl, END_OF_LOADS, end);
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_ci   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
693bf215546Sopenharmony_ci
694bf215546Sopenharmony_ci   emit_linear_store(cl, RENDER_TARGET_0,
695bf215546Sopenharmony_ci                     dst, dst_offset, stride, false, format);
696bf215546Sopenharmony_ci
697bf215546Sopenharmony_ci   cl_emit(cl, END_OF_TILE_MARKER, end);
698bf215546Sopenharmony_ci
699bf215546Sopenharmony_ci   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
700bf215546Sopenharmony_ci
701bf215546Sopenharmony_ci   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
702bf215546Sopenharmony_ci      branch.start = tile_list_start;
703bf215546Sopenharmony_ci      branch.end = v3dv_cl_get_address(cl);
704bf215546Sopenharmony_ci   }
705bf215546Sopenharmony_ci}
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_civoid
708bf215546Sopenharmony_civ3dX(meta_emit_copy_buffer)(struct v3dv_job *job,
709bf215546Sopenharmony_ci                            struct v3dv_bo *dst,
710bf215546Sopenharmony_ci                            struct v3dv_bo *src,
711bf215546Sopenharmony_ci                            uint32_t dst_offset,
712bf215546Sopenharmony_ci                            uint32_t src_offset,
713bf215546Sopenharmony_ci                            struct v3dv_meta_framebuffer *framebuffer,
714bf215546Sopenharmony_ci                            uint32_t format,
715bf215546Sopenharmony_ci                            uint32_t item_size)
716bf215546Sopenharmony_ci{
717bf215546Sopenharmony_ci   const uint32_t stride = job->frame_tiling.width * item_size;
718bf215546Sopenharmony_ci   emit_copy_buffer_per_tile_list(job, dst, src,
719bf215546Sopenharmony_ci                                  dst_offset, src_offset,
720bf215546Sopenharmony_ci                                  stride, format);
721bf215546Sopenharmony_ci   emit_supertile_coordinates(job, framebuffer);
722bf215546Sopenharmony_ci}
723bf215546Sopenharmony_ci
724bf215546Sopenharmony_civoid
725bf215546Sopenharmony_civ3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job,
726bf215546Sopenharmony_ci                                struct v3dv_bo *dst,
727bf215546Sopenharmony_ci                                struct v3dv_bo *src,
728bf215546Sopenharmony_ci                                uint32_t dst_offset,
729bf215546Sopenharmony_ci                                uint32_t src_offset,
730bf215546Sopenharmony_ci                                struct v3dv_meta_framebuffer *framebuffer,
731bf215546Sopenharmony_ci                                uint32_t format,
732bf215546Sopenharmony_ci                                uint32_t item_size)
733bf215546Sopenharmony_ci{
734bf215546Sopenharmony_ci   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
735bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_ci   emit_frame_setup(job, 0, NULL);
738bf215546Sopenharmony_ci
739bf215546Sopenharmony_ci   v3dX(meta_emit_copy_buffer)(job, dst, src, dst_offset, src_offset,
740bf215546Sopenharmony_ci                               framebuffer, format, item_size);
741bf215546Sopenharmony_ci
742bf215546Sopenharmony_ci   cl_emit(rcl, END_OF_RENDERING, end);
743bf215546Sopenharmony_ci}
744bf215546Sopenharmony_ci
745bf215546Sopenharmony_cistatic void
746bf215546Sopenharmony_ciemit_copy_image_layer_per_tile_list(struct v3dv_job *job,
747bf215546Sopenharmony_ci                                    struct v3dv_meta_framebuffer *framebuffer,
748bf215546Sopenharmony_ci                                    struct v3dv_image *dst,
749bf215546Sopenharmony_ci                                    struct v3dv_image *src,
750bf215546Sopenharmony_ci                                    uint32_t layer_offset,
751bf215546Sopenharmony_ci                                    const VkImageCopy2 *region)
752bf215546Sopenharmony_ci{
753bf215546Sopenharmony_ci   struct v3dv_cl *cl = &job->indirect;
754bf215546Sopenharmony_ci   v3dv_cl_ensure_space(cl, 200, 1);
755bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
756bf215546Sopenharmony_ci
757bf215546Sopenharmony_ci   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
758bf215546Sopenharmony_ci
759bf215546Sopenharmony_ci   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
760bf215546Sopenharmony_ci
761bf215546Sopenharmony_ci   assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
762bf215546Sopenharmony_ci           layer_offset < region->srcSubresource.layerCount) ||
763bf215546Sopenharmony_ci          layer_offset < src->vk.extent.depth);
764bf215546Sopenharmony_ci
765bf215546Sopenharmony_ci   const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
766bf215546Sopenharmony_ci      region->srcSubresource.baseArrayLayer + layer_offset :
767bf215546Sopenharmony_ci      region->srcOffset.z + layer_offset;
768bf215546Sopenharmony_ci
769bf215546Sopenharmony_ci   emit_image_load(job->device, cl, framebuffer, src,
770bf215546Sopenharmony_ci                   region->srcSubresource.aspectMask,
771bf215546Sopenharmony_ci                   src_layer,
772bf215546Sopenharmony_ci                   region->srcSubresource.mipLevel,
773bf215546Sopenharmony_ci                   false, false);
774bf215546Sopenharmony_ci
775bf215546Sopenharmony_ci   cl_emit(cl, END_OF_LOADS, end);
776bf215546Sopenharmony_ci
777bf215546Sopenharmony_ci   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
778bf215546Sopenharmony_ci
779bf215546Sopenharmony_ci   assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
780bf215546Sopenharmony_ci           layer_offset < region->dstSubresource.layerCount) ||
781bf215546Sopenharmony_ci          layer_offset < dst->vk.extent.depth);
782bf215546Sopenharmony_ci
783bf215546Sopenharmony_ci   const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
784bf215546Sopenharmony_ci      region->dstSubresource.baseArrayLayer + layer_offset :
785bf215546Sopenharmony_ci      region->dstOffset.z + layer_offset;
786bf215546Sopenharmony_ci
787bf215546Sopenharmony_ci   emit_image_store(job->device, cl, framebuffer, dst,
788bf215546Sopenharmony_ci                    region->dstSubresource.aspectMask,
789bf215546Sopenharmony_ci                    dst_layer,
790bf215546Sopenharmony_ci                    region->dstSubresource.mipLevel,
791bf215546Sopenharmony_ci                    false, false, false);
792bf215546Sopenharmony_ci
793bf215546Sopenharmony_ci   cl_emit(cl, END_OF_TILE_MARKER, end);
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_ci   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
796bf215546Sopenharmony_ci
797bf215546Sopenharmony_ci   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
798bf215546Sopenharmony_ci      branch.start = tile_list_start;
799bf215546Sopenharmony_ci      branch.end = v3dv_cl_get_address(cl);
800bf215546Sopenharmony_ci   }
801bf215546Sopenharmony_ci}
802bf215546Sopenharmony_ci
803bf215546Sopenharmony_cistatic void
804bf215546Sopenharmony_ciemit_copy_image_layer(struct v3dv_job *job,
805bf215546Sopenharmony_ci                      struct v3dv_image *dst,
806bf215546Sopenharmony_ci                      struct v3dv_image *src,
807bf215546Sopenharmony_ci                      struct v3dv_meta_framebuffer *framebuffer,
808bf215546Sopenharmony_ci                      uint32_t layer,
809bf215546Sopenharmony_ci                      const VkImageCopy2 *region)
810bf215546Sopenharmony_ci{
811bf215546Sopenharmony_ci   emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
812bf215546Sopenharmony_ci   emit_supertile_coordinates(job, framebuffer);
813bf215546Sopenharmony_ci}
814bf215546Sopenharmony_ci
815bf215546Sopenharmony_civoid
816bf215546Sopenharmony_civ3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job,
817bf215546Sopenharmony_ci                               struct v3dv_image *dst,
818bf215546Sopenharmony_ci                               struct v3dv_image *src,
819bf215546Sopenharmony_ci                               struct v3dv_meta_framebuffer *framebuffer,
820bf215546Sopenharmony_ci                               const VkImageCopy2 *region)
821bf215546Sopenharmony_ci{
822bf215546Sopenharmony_ci   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
823bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
824bf215546Sopenharmony_ci
825bf215546Sopenharmony_ci   emit_frame_setup(job, 0, NULL);
826bf215546Sopenharmony_ci   for (int layer = 0; layer < job->frame_tiling.layers; layer++)
827bf215546Sopenharmony_ci      emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
828bf215546Sopenharmony_ci   cl_emit(rcl, END_OF_RENDERING, end);
829bf215546Sopenharmony_ci}
830bf215546Sopenharmony_ci
831bf215546Sopenharmony_civoid
832bf215546Sopenharmony_civ3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
833bf215546Sopenharmony_ci                        uint32_t dst_bo_handle,
834bf215546Sopenharmony_ci                        uint32_t dst_offset,
835bf215546Sopenharmony_ci                        enum v3d_tiling_mode dst_tiling,
836bf215546Sopenharmony_ci                        uint32_t dst_padded_height_or_stride,
837bf215546Sopenharmony_ci                        uint32_t dst_cpp,
838bf215546Sopenharmony_ci                        uint32_t src_bo_handle,
839bf215546Sopenharmony_ci                        uint32_t src_offset,
840bf215546Sopenharmony_ci                        enum v3d_tiling_mode src_tiling,
841bf215546Sopenharmony_ci                        uint32_t src_padded_height_or_stride,
842bf215546Sopenharmony_ci                        uint32_t src_cpp,
843bf215546Sopenharmony_ci                        uint32_t width,
844bf215546Sopenharmony_ci                        uint32_t height,
845bf215546Sopenharmony_ci                        const struct v3dv_format *format)
846bf215546Sopenharmony_ci{
847bf215546Sopenharmony_ci   struct drm_v3d_submit_tfu tfu = {
848bf215546Sopenharmony_ci      .ios = (height << 16) | width,
849bf215546Sopenharmony_ci      .bo_handles = {
850bf215546Sopenharmony_ci         dst_bo_handle,
851bf215546Sopenharmony_ci         src_bo_handle != dst_bo_handle ? src_bo_handle : 0
852bf215546Sopenharmony_ci      },
853bf215546Sopenharmony_ci   };
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_ci   tfu.iia |= src_offset;
856bf215546Sopenharmony_ci
857bf215546Sopenharmony_ci   if (src_tiling == V3D_TILING_RASTER) {
858bf215546Sopenharmony_ci      tfu.icfg = V3D33_TFU_ICFG_FORMAT_RASTER << V3D33_TFU_ICFG_FORMAT_SHIFT;
859bf215546Sopenharmony_ci   } else {
860bf215546Sopenharmony_ci      tfu.icfg = (V3D33_TFU_ICFG_FORMAT_LINEARTILE +
861bf215546Sopenharmony_ci                  (src_tiling - V3D_TILING_LINEARTILE)) <<
862bf215546Sopenharmony_ci                   V3D33_TFU_ICFG_FORMAT_SHIFT;
863bf215546Sopenharmony_ci   }
864bf215546Sopenharmony_ci   tfu.icfg |= format->tex_type << V3D33_TFU_ICFG_TTYPE_SHIFT;
865bf215546Sopenharmony_ci
866bf215546Sopenharmony_ci   tfu.ioa = dst_offset;
867bf215546Sopenharmony_ci
868bf215546Sopenharmony_ci   tfu.ioa |= (V3D33_TFU_IOA_FORMAT_LINEARTILE +
869bf215546Sopenharmony_ci               (dst_tiling - V3D_TILING_LINEARTILE)) <<
870bf215546Sopenharmony_ci                V3D33_TFU_IOA_FORMAT_SHIFT;
871bf215546Sopenharmony_ci
872bf215546Sopenharmony_ci   switch (src_tiling) {
873bf215546Sopenharmony_ci   case V3D_TILING_UIF_NO_XOR:
874bf215546Sopenharmony_ci   case V3D_TILING_UIF_XOR:
875bf215546Sopenharmony_ci      tfu.iis |= src_padded_height_or_stride / (2 * v3d_utile_height(src_cpp));
876bf215546Sopenharmony_ci      break;
877bf215546Sopenharmony_ci   case V3D_TILING_RASTER:
878bf215546Sopenharmony_ci      tfu.iis |= src_padded_height_or_stride / src_cpp;
879bf215546Sopenharmony_ci      break;
880bf215546Sopenharmony_ci   default:
881bf215546Sopenharmony_ci      break;
882bf215546Sopenharmony_ci   }
883bf215546Sopenharmony_ci
884bf215546Sopenharmony_ci   /* The TFU can handle raster sources but always produces UIF results */
885bf215546Sopenharmony_ci   assert(dst_tiling != V3D_TILING_RASTER);
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci   /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
888bf215546Sopenharmony_ci    * OPAD field for the destination (how many extra UIF blocks beyond
889bf215546Sopenharmony_ci    * those necessary to cover the height).
890bf215546Sopenharmony_ci    */
891bf215546Sopenharmony_ci   if (dst_tiling == V3D_TILING_UIF_NO_XOR || dst_tiling == V3D_TILING_UIF_XOR) {
892bf215546Sopenharmony_ci      uint32_t uif_block_h = 2 * v3d_utile_height(dst_cpp);
893bf215546Sopenharmony_ci      uint32_t implicit_padded_height = align(height, uif_block_h);
894bf215546Sopenharmony_ci      uint32_t icfg = (dst_padded_height_or_stride - implicit_padded_height) /
895bf215546Sopenharmony_ci                      uif_block_h;
896bf215546Sopenharmony_ci      tfu.icfg |= icfg << V3D33_TFU_ICFG_OPAD_SHIFT;
897bf215546Sopenharmony_ci   }
898bf215546Sopenharmony_ci
899bf215546Sopenharmony_ci   v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
900bf215546Sopenharmony_ci}
901bf215546Sopenharmony_ci
902bf215546Sopenharmony_cistatic void
903bf215546Sopenharmony_ciemit_clear_image_layer_per_tile_list(struct v3dv_job *job,
904bf215546Sopenharmony_ci                                     struct v3dv_meta_framebuffer *framebuffer,
905bf215546Sopenharmony_ci                                     struct v3dv_image *image,
906bf215546Sopenharmony_ci                                     VkImageAspectFlags aspects,
907bf215546Sopenharmony_ci                                     uint32_t layer,
908bf215546Sopenharmony_ci                                     uint32_t level)
909bf215546Sopenharmony_ci{
910bf215546Sopenharmony_ci   struct v3dv_cl *cl = &job->indirect;
911bf215546Sopenharmony_ci   v3dv_cl_ensure_space(cl, 200, 1);
912bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
913bf215546Sopenharmony_ci
914bf215546Sopenharmony_ci   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci   cl_emit(cl, END_OF_LOADS, end);
919bf215546Sopenharmony_ci
920bf215546Sopenharmony_ci   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci   emit_image_store(job->device, cl, framebuffer, image, aspects,
923bf215546Sopenharmony_ci                    layer, level, false, false, false);
924bf215546Sopenharmony_ci
925bf215546Sopenharmony_ci   cl_emit(cl, END_OF_TILE_MARKER, end);
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_ci   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
928bf215546Sopenharmony_ci
929bf215546Sopenharmony_ci   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
930bf215546Sopenharmony_ci      branch.start = tile_list_start;
931bf215546Sopenharmony_ci      branch.end = v3dv_cl_get_address(cl);
932bf215546Sopenharmony_ci   }
933bf215546Sopenharmony_ci}
934bf215546Sopenharmony_ci
935bf215546Sopenharmony_cistatic void
936bf215546Sopenharmony_ciemit_clear_image_layers(struct v3dv_job *job,
937bf215546Sopenharmony_ci                 struct v3dv_image *image,
938bf215546Sopenharmony_ci                 struct v3dv_meta_framebuffer *framebuffer,
939bf215546Sopenharmony_ci                 VkImageAspectFlags aspects,
940bf215546Sopenharmony_ci                 uint32_t min_layer,
941bf215546Sopenharmony_ci                 uint32_t max_layer,
942bf215546Sopenharmony_ci                 uint32_t level)
943bf215546Sopenharmony_ci{
944bf215546Sopenharmony_ci   for (uint32_t layer = min_layer; layer < max_layer; layer++) {
945bf215546Sopenharmony_ci      emit_clear_image_layer_per_tile_list(job, framebuffer, image, aspects,
946bf215546Sopenharmony_ci                                           layer, level);
947bf215546Sopenharmony_ci      emit_supertile_coordinates(job, framebuffer);
948bf215546Sopenharmony_ci   }
949bf215546Sopenharmony_ci}
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_civoid
952bf215546Sopenharmony_civ3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job,
953bf215546Sopenharmony_ci                                struct v3dv_image *image,
954bf215546Sopenharmony_ci                                struct v3dv_meta_framebuffer *framebuffer,
955bf215546Sopenharmony_ci                                const union v3dv_clear_value *clear_value,
956bf215546Sopenharmony_ci                                VkImageAspectFlags aspects,
957bf215546Sopenharmony_ci                                uint32_t min_layer,
958bf215546Sopenharmony_ci                                uint32_t max_layer,
959bf215546Sopenharmony_ci                                uint32_t level)
960bf215546Sopenharmony_ci{
961bf215546Sopenharmony_ci   const struct rcl_clear_info clear_info = {
962bf215546Sopenharmony_ci      .clear_value = clear_value,
963bf215546Sopenharmony_ci      .image = image,
964bf215546Sopenharmony_ci      .aspects = aspects,
965bf215546Sopenharmony_ci      .level = level,
966bf215546Sopenharmony_ci   };
967bf215546Sopenharmony_ci
968bf215546Sopenharmony_ci   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
969bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
970bf215546Sopenharmony_ci
971bf215546Sopenharmony_ci   emit_frame_setup(job, 0, clear_value);
972bf215546Sopenharmony_ci   emit_clear_image_layers(job, image, framebuffer, aspects,
973bf215546Sopenharmony_ci                           min_layer, max_layer, level);
974bf215546Sopenharmony_ci   cl_emit(rcl, END_OF_RENDERING, end);
975bf215546Sopenharmony_ci}
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_cistatic void
978bf215546Sopenharmony_ciemit_fill_buffer_per_tile_list(struct v3dv_job *job,
979bf215546Sopenharmony_ci                               struct v3dv_bo *bo,
980bf215546Sopenharmony_ci                               uint32_t offset,
981bf215546Sopenharmony_ci                               uint32_t stride)
982bf215546Sopenharmony_ci{
983bf215546Sopenharmony_ci   struct v3dv_cl *cl = &job->indirect;
984bf215546Sopenharmony_ci   v3dv_cl_ensure_space(cl, 200, 1);
985bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
986bf215546Sopenharmony_ci
987bf215546Sopenharmony_ci   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
988bf215546Sopenharmony_ci
989bf215546Sopenharmony_ci   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
990bf215546Sopenharmony_ci
991bf215546Sopenharmony_ci   cl_emit(cl, END_OF_LOADS, end);
992bf215546Sopenharmony_ci
993bf215546Sopenharmony_ci   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_ci   emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
996bf215546Sopenharmony_ci                     V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
997bf215546Sopenharmony_ci
998bf215546Sopenharmony_ci   cl_emit(cl, END_OF_TILE_MARKER, end);
999bf215546Sopenharmony_ci
1000bf215546Sopenharmony_ci   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1001bf215546Sopenharmony_ci
1002bf215546Sopenharmony_ci   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1003bf215546Sopenharmony_ci      branch.start = tile_list_start;
1004bf215546Sopenharmony_ci      branch.end = v3dv_cl_get_address(cl);
1005bf215546Sopenharmony_ci   }
1006bf215546Sopenharmony_ci}
1007bf215546Sopenharmony_ci
1008bf215546Sopenharmony_cistatic void
1009bf215546Sopenharmony_ciemit_fill_buffer(struct v3dv_job *job,
1010bf215546Sopenharmony_ci                 struct v3dv_bo *bo,
1011bf215546Sopenharmony_ci                 uint32_t offset,
1012bf215546Sopenharmony_ci                 struct v3dv_meta_framebuffer *framebuffer)
1013bf215546Sopenharmony_ci{
1014bf215546Sopenharmony_ci   const uint32_t stride = job->frame_tiling.width * 4;
1015bf215546Sopenharmony_ci   emit_fill_buffer_per_tile_list(job, bo, offset, stride);
1016bf215546Sopenharmony_ci   emit_supertile_coordinates(job, framebuffer);
1017bf215546Sopenharmony_ci}
1018bf215546Sopenharmony_ci
1019bf215546Sopenharmony_civoid
1020bf215546Sopenharmony_civ3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job,
1021bf215546Sopenharmony_ci                                struct v3dv_bo *bo,
1022bf215546Sopenharmony_ci                                uint32_t offset,
1023bf215546Sopenharmony_ci                                struct v3dv_meta_framebuffer *framebuffer,
1024bf215546Sopenharmony_ci                                uint32_t data)
1025bf215546Sopenharmony_ci{
1026bf215546Sopenharmony_ci   const union v3dv_clear_value clear_value = {
1027bf215546Sopenharmony_ci       .color = { data, 0, 0, 0 },
1028bf215546Sopenharmony_ci   };
1029bf215546Sopenharmony_ci
1030bf215546Sopenharmony_ci   const struct rcl_clear_info clear_info = {
1031bf215546Sopenharmony_ci      .clear_value = &clear_value,
1032bf215546Sopenharmony_ci      .image = NULL,
1033bf215546Sopenharmony_ci      .aspects = VK_IMAGE_ASPECT_COLOR_BIT,
1034bf215546Sopenharmony_ci      .level = 0,
1035bf215546Sopenharmony_ci   };
1036bf215546Sopenharmony_ci
1037bf215546Sopenharmony_ci   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
1038bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
1039bf215546Sopenharmony_ci
1040bf215546Sopenharmony_ci   emit_frame_setup(job, 0, &clear_value);
1041bf215546Sopenharmony_ci   emit_fill_buffer(job, bo, offset, framebuffer);
1042bf215546Sopenharmony_ci   cl_emit(rcl, END_OF_RENDERING, end);
1043bf215546Sopenharmony_ci}
1044bf215546Sopenharmony_ci
1045bf215546Sopenharmony_ci
1046bf215546Sopenharmony_cistatic void
1047bf215546Sopenharmony_ciemit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
1048bf215546Sopenharmony_ci                                        struct v3dv_meta_framebuffer *framebuffer,
1049bf215546Sopenharmony_ci                                        struct v3dv_image *image,
1050bf215546Sopenharmony_ci                                        struct v3dv_buffer *buffer,
1051bf215546Sopenharmony_ci                                        uint32_t layer,
1052bf215546Sopenharmony_ci                                        const VkBufferImageCopy2 *region)
1053bf215546Sopenharmony_ci{
1054bf215546Sopenharmony_ci   struct v3dv_cl *cl = &job->indirect;
1055bf215546Sopenharmony_ci   v3dv_cl_ensure_space(cl, 200, 1);
1056bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
1057bf215546Sopenharmony_ci
1058bf215546Sopenharmony_ci   struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1059bf215546Sopenharmony_ci
1060bf215546Sopenharmony_ci   cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1061bf215546Sopenharmony_ci
1062bf215546Sopenharmony_ci   const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
1063bf215546Sopenharmony_ci   assert((image->vk.image_type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
1064bf215546Sopenharmony_ci          layer < image->vk.extent.depth);
1065bf215546Sopenharmony_ci
1066bf215546Sopenharmony_ci   /* Load TLB from buffer */
1067bf215546Sopenharmony_ci   uint32_t width, height;
1068bf215546Sopenharmony_ci   if (region->bufferRowLength == 0)
1069bf215546Sopenharmony_ci      width = region->imageExtent.width;
1070bf215546Sopenharmony_ci   else
1071bf215546Sopenharmony_ci      width = region->bufferRowLength;
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_ci   if (region->bufferImageHeight == 0)
1074bf215546Sopenharmony_ci      height = region->imageExtent.height;
1075bf215546Sopenharmony_ci   else
1076bf215546Sopenharmony_ci      height = region->bufferImageHeight;
1077bf215546Sopenharmony_ci
1078bf215546Sopenharmony_ci   /* Handle copy to compressed format using a compatible format */
1079bf215546Sopenharmony_ci   width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
1080bf215546Sopenharmony_ci   height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
1081bf215546Sopenharmony_ci
1082bf215546Sopenharmony_ci   uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1083bf215546Sopenharmony_ci                  1 : image->cpp;
1084bf215546Sopenharmony_ci   uint32_t buffer_stride = width * cpp;
1085bf215546Sopenharmony_ci   uint32_t buffer_offset =
1086bf215546Sopenharmony_ci      buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
1087bf215546Sopenharmony_ci
1088bf215546Sopenharmony_ci   uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
1089bf215546Sopenharmony_ci                                       false, false, true);
1090bf215546Sopenharmony_ci
1091bf215546Sopenharmony_ci   uint32_t image_layer = layer + (image->vk.image_type != VK_IMAGE_TYPE_3D ?
1092bf215546Sopenharmony_ci      imgrsc->baseArrayLayer : region->imageOffset.z);
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci   emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
1095bf215546Sopenharmony_ci                    buffer_offset, buffer_stride, format);
1096bf215546Sopenharmony_ci
1097bf215546Sopenharmony_ci   /* Because we can't do raster loads/stores of Z/S formats we need to
1098bf215546Sopenharmony_ci    * use a color tile buffer with a compatible RGBA color format instead.
1099bf215546Sopenharmony_ci    * However, when we are uploading a single aspect to a combined
1100bf215546Sopenharmony_ci    * depth/stencil image we have the problem that our tile buffer stores don't
1101bf215546Sopenharmony_ci    * allow us to mask out the other aspect, so we always write all four RGBA
1102bf215546Sopenharmony_ci    * channels to the image and we end up overwriting that other aspect with
1103bf215546Sopenharmony_ci    * undefined values. To work around that, we first load the aspect we are
1104bf215546Sopenharmony_ci    * not copying from the image memory into a proper Z/S tile buffer. Then we
1105bf215546Sopenharmony_ci    * do our store from the color buffer for the aspect we are copying, and
1106bf215546Sopenharmony_ci    * after that, we do another store from the Z/S tile buffer to restore the
1107bf215546Sopenharmony_ci    * other aspect to its original value.
1108bf215546Sopenharmony_ci    */
1109bf215546Sopenharmony_ci   if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
1110bf215546Sopenharmony_ci      if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1111bf215546Sopenharmony_ci         emit_image_load(job->device, cl, framebuffer, image,
1112bf215546Sopenharmony_ci                         VK_IMAGE_ASPECT_STENCIL_BIT,
1113bf215546Sopenharmony_ci                         image_layer, imgrsc->mipLevel,
1114bf215546Sopenharmony_ci                         false, false);
1115bf215546Sopenharmony_ci      } else {
1116bf215546Sopenharmony_ci         assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
1117bf215546Sopenharmony_ci         emit_image_load(job->device, cl, framebuffer, image,
1118bf215546Sopenharmony_ci                         VK_IMAGE_ASPECT_DEPTH_BIT,
1119bf215546Sopenharmony_ci                         image_layer, imgrsc->mipLevel,
1120bf215546Sopenharmony_ci                         false, false);
1121bf215546Sopenharmony_ci      }
1122bf215546Sopenharmony_ci   }
1123bf215546Sopenharmony_ci
1124bf215546Sopenharmony_ci   cl_emit(cl, END_OF_LOADS, end);
1125bf215546Sopenharmony_ci
1126bf215546Sopenharmony_ci   cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1127bf215546Sopenharmony_ci
1128bf215546Sopenharmony_ci   /* Store TLB to image */
1129bf215546Sopenharmony_ci   emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask,
1130bf215546Sopenharmony_ci                    image_layer, imgrsc->mipLevel,
1131bf215546Sopenharmony_ci                    false, true, false);
1132bf215546Sopenharmony_ci
1133bf215546Sopenharmony_ci   if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
1134bf215546Sopenharmony_ci      if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1135bf215546Sopenharmony_ci         emit_image_store(job->device, cl, framebuffer, image,
1136bf215546Sopenharmony_ci                          VK_IMAGE_ASPECT_STENCIL_BIT,
1137bf215546Sopenharmony_ci                          image_layer, imgrsc->mipLevel,
1138bf215546Sopenharmony_ci                          false, false, false);
1139bf215546Sopenharmony_ci      } else {
1140bf215546Sopenharmony_ci         assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
1141bf215546Sopenharmony_ci         emit_image_store(job->device, cl, framebuffer, image,
1142bf215546Sopenharmony_ci                          VK_IMAGE_ASPECT_DEPTH_BIT,
1143bf215546Sopenharmony_ci                          image_layer, imgrsc->mipLevel,
1144bf215546Sopenharmony_ci                          false, false, false);
1145bf215546Sopenharmony_ci      }
1146bf215546Sopenharmony_ci   }
1147bf215546Sopenharmony_ci
1148bf215546Sopenharmony_ci   cl_emit(cl, END_OF_TILE_MARKER, end);
1149bf215546Sopenharmony_ci
1150bf215546Sopenharmony_ci   cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1151bf215546Sopenharmony_ci
1152bf215546Sopenharmony_ci   cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1153bf215546Sopenharmony_ci      branch.start = tile_list_start;
1154bf215546Sopenharmony_ci      branch.end = v3dv_cl_get_address(cl);
1155bf215546Sopenharmony_ci   }
1156bf215546Sopenharmony_ci}
1157bf215546Sopenharmony_ci
1158bf215546Sopenharmony_cistatic void
1159bf215546Sopenharmony_ciemit_copy_buffer_to_layer(struct v3dv_job *job,
1160bf215546Sopenharmony_ci                          struct v3dv_image *image,
1161bf215546Sopenharmony_ci                          struct v3dv_buffer *buffer,
1162bf215546Sopenharmony_ci                          struct v3dv_meta_framebuffer *framebuffer,
1163bf215546Sopenharmony_ci                          uint32_t layer,
1164bf215546Sopenharmony_ci                          const VkBufferImageCopy2 *region)
1165bf215546Sopenharmony_ci{
1166bf215546Sopenharmony_ci   emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
1167bf215546Sopenharmony_ci                                           layer, region);
1168bf215546Sopenharmony_ci   emit_supertile_coordinates(job, framebuffer);
1169bf215546Sopenharmony_ci}
1170bf215546Sopenharmony_ci
1171bf215546Sopenharmony_civoid
1172bf215546Sopenharmony_civ3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job,
1173bf215546Sopenharmony_ci                                         struct v3dv_image *image,
1174bf215546Sopenharmony_ci                                         struct v3dv_buffer *buffer,
1175bf215546Sopenharmony_ci                                         struct v3dv_meta_framebuffer *framebuffer,
1176bf215546Sopenharmony_ci                                         const VkBufferImageCopy2 *region)
1177bf215546Sopenharmony_ci{
1178bf215546Sopenharmony_ci   struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
1179bf215546Sopenharmony_ci   v3dv_return_if_oom(NULL, job);
1180bf215546Sopenharmony_ci
1181bf215546Sopenharmony_ci   emit_frame_setup(job, 0, NULL);
1182bf215546Sopenharmony_ci   for (int layer = 0; layer < job->frame_tiling.layers; layer++)
1183bf215546Sopenharmony_ci      emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
1184bf215546Sopenharmony_ci   cl_emit(rcl, END_OF_RENDERING, end);
1185bf215546Sopenharmony_ci}
1186bf215546Sopenharmony_ci
1187bf215546Sopenharmony_ci/* Figure out a TLB size configuration for a number of pixels to process.
1188bf215546Sopenharmony_ci * Beware that we can't "render" more than MAX_DIMxMAX_DIM pixels in a single
1189bf215546Sopenharmony_ci * job, if the pixel count is larger than this, the caller might need to split
1190bf215546Sopenharmony_ci * the job and call this function multiple times.
1191bf215546Sopenharmony_ci */
1192bf215546Sopenharmony_cistatic void
1193bf215546Sopenharmony_ciframebuffer_size_for_pixel_count(uint32_t num_pixels,
1194bf215546Sopenharmony_ci                                 uint32_t *width,
1195bf215546Sopenharmony_ci                                 uint32_t *height)
1196bf215546Sopenharmony_ci{
1197bf215546Sopenharmony_ci   assert(num_pixels > 0);
1198bf215546Sopenharmony_ci
1199bf215546Sopenharmony_ci   const uint32_t max_dim_pixels = V3D_MAX_IMAGE_DIMENSION;
1200bf215546Sopenharmony_ci   const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
1201bf215546Sopenharmony_ci
1202bf215546Sopenharmony_ci   uint32_t w, h;
1203bf215546Sopenharmony_ci   if (num_pixels > max_pixels) {
1204bf215546Sopenharmony_ci      w = max_dim_pixels;
1205bf215546Sopenharmony_ci      h = max_dim_pixels;
1206bf215546Sopenharmony_ci   } else {
1207bf215546Sopenharmony_ci      w = num_pixels;
1208bf215546Sopenharmony_ci      h = 1;
1209bf215546Sopenharmony_ci      while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
1210bf215546Sopenharmony_ci         w >>= 1;
1211bf215546Sopenharmony_ci         h <<= 1;
1212bf215546Sopenharmony_ci      }
1213bf215546Sopenharmony_ci   }
1214bf215546Sopenharmony_ci   assert(w <= max_dim_pixels && h <= max_dim_pixels);
1215bf215546Sopenharmony_ci   assert(w * h <= num_pixels);
1216bf215546Sopenharmony_ci   assert(w > 0 && h > 0);
1217bf215546Sopenharmony_ci
1218bf215546Sopenharmony_ci   *width = w;
1219bf215546Sopenharmony_ci   *height = h;
1220bf215546Sopenharmony_ci}
1221bf215546Sopenharmony_ci
1222bf215546Sopenharmony_cistruct v3dv_job *
1223bf215546Sopenharmony_civ3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
1224bf215546Sopenharmony_ci                       struct v3dv_bo *dst,
1225bf215546Sopenharmony_ci                       uint32_t dst_offset,
1226bf215546Sopenharmony_ci                       struct v3dv_bo *src,
1227bf215546Sopenharmony_ci                       uint32_t src_offset,
1228bf215546Sopenharmony_ci                       const VkBufferCopy2 *region)
1229bf215546Sopenharmony_ci{
1230bf215546Sopenharmony_ci   const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1231bf215546Sopenharmony_ci   const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1232bf215546Sopenharmony_ci
1233bf215546Sopenharmony_ci   /* Select appropriate pixel format for the copy operation based on the
1234bf215546Sopenharmony_ci    * size to copy and the alignment of the source and destination offsets.
1235bf215546Sopenharmony_ci    */
1236bf215546Sopenharmony_ci   src_offset += region->srcOffset;
1237bf215546Sopenharmony_ci   dst_offset += region->dstOffset;
1238bf215546Sopenharmony_ci   uint32_t item_size = 4;
1239bf215546Sopenharmony_ci   while (item_size > 1 &&
1240bf215546Sopenharmony_ci          (src_offset % item_size != 0 || dst_offset % item_size != 0)) {
1241bf215546Sopenharmony_ci      item_size /= 2;
1242bf215546Sopenharmony_ci   }
1243bf215546Sopenharmony_ci
1244bf215546Sopenharmony_ci   while (item_size > 1 && region->size % item_size != 0)
1245bf215546Sopenharmony_ci      item_size /= 2;
1246bf215546Sopenharmony_ci
1247bf215546Sopenharmony_ci   assert(region->size % item_size == 0);
1248bf215546Sopenharmony_ci   uint32_t num_items = region->size / item_size;
1249bf215546Sopenharmony_ci   assert(num_items > 0);
1250bf215546Sopenharmony_ci
1251bf215546Sopenharmony_ci   uint32_t format;
1252bf215546Sopenharmony_ci   VkFormat vk_format;
1253bf215546Sopenharmony_ci   switch (item_size) {
1254bf215546Sopenharmony_ci   case 4:
1255bf215546Sopenharmony_ci      format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
1256bf215546Sopenharmony_ci      vk_format = VK_FORMAT_R8G8B8A8_UINT;
1257bf215546Sopenharmony_ci      break;
1258bf215546Sopenharmony_ci   case 2:
1259bf215546Sopenharmony_ci      format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
1260bf215546Sopenharmony_ci      vk_format = VK_FORMAT_R8G8_UINT;
1261bf215546Sopenharmony_ci      break;
1262bf215546Sopenharmony_ci   default:
1263bf215546Sopenharmony_ci      format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
1264bf215546Sopenharmony_ci      vk_format = VK_FORMAT_R8_UINT;
1265bf215546Sopenharmony_ci      break;
1266bf215546Sopenharmony_ci   }
1267bf215546Sopenharmony_ci
1268bf215546Sopenharmony_ci   struct v3dv_job *job = NULL;
1269bf215546Sopenharmony_ci   while (num_items > 0) {
1270bf215546Sopenharmony_ci      job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1271bf215546Sopenharmony_ci      if (!job)
1272bf215546Sopenharmony_ci         return NULL;
1273bf215546Sopenharmony_ci
1274bf215546Sopenharmony_ci      uint32_t width, height;
1275bf215546Sopenharmony_ci      framebuffer_size_for_pixel_count(num_items, &width, &height);
1276bf215546Sopenharmony_ci
1277bf215546Sopenharmony_ci      v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false);
1278bf215546Sopenharmony_ci
1279bf215546Sopenharmony_ci      struct v3dv_meta_framebuffer framebuffer;
1280bf215546Sopenharmony_ci      v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type,
1281bf215546Sopenharmony_ci                                  &job->frame_tiling);
1282bf215546Sopenharmony_ci
1283bf215546Sopenharmony_ci      v3dX(job_emit_binning_flush)(job);
1284bf215546Sopenharmony_ci
1285bf215546Sopenharmony_ci      v3dX(meta_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset,
1286bf215546Sopenharmony_ci                                      &framebuffer, format, item_size);
1287bf215546Sopenharmony_ci
1288bf215546Sopenharmony_ci      v3dv_cmd_buffer_finish_job(cmd_buffer);
1289bf215546Sopenharmony_ci
1290bf215546Sopenharmony_ci      const uint32_t items_copied = width * height;
1291bf215546Sopenharmony_ci      const uint32_t bytes_copied = items_copied * item_size;
1292bf215546Sopenharmony_ci      num_items -= items_copied;
1293bf215546Sopenharmony_ci      src_offset += bytes_copied;
1294bf215546Sopenharmony_ci      dst_offset += bytes_copied;
1295bf215546Sopenharmony_ci   }
1296bf215546Sopenharmony_ci
1297bf215546Sopenharmony_ci   return job;
1298bf215546Sopenharmony_ci}
1299bf215546Sopenharmony_ci
1300bf215546Sopenharmony_civoid
1301bf215546Sopenharmony_civ3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
1302bf215546Sopenharmony_ci                       struct v3dv_bo *bo,
1303bf215546Sopenharmony_ci                       uint32_t offset,
1304bf215546Sopenharmony_ci                       uint32_t size,
1305bf215546Sopenharmony_ci                       uint32_t data)
1306bf215546Sopenharmony_ci{
1307bf215546Sopenharmony_ci   assert(size > 0 && size % 4 == 0);
1308bf215546Sopenharmony_ci   assert(offset + size <= bo->size);
1309bf215546Sopenharmony_ci
1310bf215546Sopenharmony_ci   const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1311bf215546Sopenharmony_ci   const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1312bf215546Sopenharmony_ci   uint32_t num_items = size / 4;
1313bf215546Sopenharmony_ci
1314bf215546Sopenharmony_ci   while (num_items > 0) {
1315bf215546Sopenharmony_ci      struct v3dv_job *job =
1316bf215546Sopenharmony_ci         v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1317bf215546Sopenharmony_ci      if (!job)
1318bf215546Sopenharmony_ci         return;
1319bf215546Sopenharmony_ci
1320bf215546Sopenharmony_ci      uint32_t width, height;
1321bf215546Sopenharmony_ci      framebuffer_size_for_pixel_count(num_items, &width, &height);
1322bf215546Sopenharmony_ci
1323bf215546Sopenharmony_ci      v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false);
1324bf215546Sopenharmony_ci
1325bf215546Sopenharmony_ci      struct v3dv_meta_framebuffer framebuffer;
1326bf215546Sopenharmony_ci      v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
1327bf215546Sopenharmony_ci                                  internal_type, &job->frame_tiling);
1328bf215546Sopenharmony_ci
1329bf215546Sopenharmony_ci      v3dX(job_emit_binning_flush)(job);
1330bf215546Sopenharmony_ci
1331bf215546Sopenharmony_ci      v3dX(meta_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data);
1332bf215546Sopenharmony_ci
1333bf215546Sopenharmony_ci      v3dv_cmd_buffer_finish_job(cmd_buffer);
1334bf215546Sopenharmony_ci
1335bf215546Sopenharmony_ci      const uint32_t items_copied = width * height;
1336bf215546Sopenharmony_ci      const uint32_t bytes_copied = items_copied * 4;
1337bf215546Sopenharmony_ci      num_items -= items_copied;
1338bf215546Sopenharmony_ci      offset += bytes_copied;
1339bf215546Sopenharmony_ci   }
1340bf215546Sopenharmony_ci}
1341bf215546Sopenharmony_ci
1342bf215546Sopenharmony_civoid
1343bf215546Sopenharmony_civ3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb,
1344bf215546Sopenharmony_ci                            VkFormat vk_format,
1345bf215546Sopenharmony_ci                            uint32_t internal_type,
1346bf215546Sopenharmony_ci                            const struct v3dv_frame_tiling *tiling)
1347bf215546Sopenharmony_ci{
1348bf215546Sopenharmony_ci   fb->internal_type = internal_type;
1349bf215546Sopenharmony_ci
1350bf215546Sopenharmony_ci   /* Supertile coverage always starts at 0,0  */
1351bf215546Sopenharmony_ci   uint32_t supertile_w_in_pixels =
1352bf215546Sopenharmony_ci      tiling->tile_width * tiling->supertile_width;
1353bf215546Sopenharmony_ci   uint32_t supertile_h_in_pixels =
1354bf215546Sopenharmony_ci      tiling->tile_height * tiling->supertile_height;
1355bf215546Sopenharmony_ci
1356bf215546Sopenharmony_ci   fb->min_x_supertile = 0;
1357bf215546Sopenharmony_ci   fb->min_y_supertile = 0;
1358bf215546Sopenharmony_ci   fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
1359bf215546Sopenharmony_ci   fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
1360bf215546Sopenharmony_ci
1361bf215546Sopenharmony_ci   fb->vk_format = vk_format;
1362bf215546Sopenharmony_ci   fb->format = v3dX(get_format)(vk_format);
1363bf215546Sopenharmony_ci
1364bf215546Sopenharmony_ci   fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
1365bf215546Sopenharmony_ci   if (vk_format_is_depth_or_stencil(vk_format))
1366bf215546Sopenharmony_ci      fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format);
1367bf215546Sopenharmony_ci}
1368