1 /*
2 * Copyright © 2021 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "broadcom/common/v3d_macros.h"
28 #include "broadcom/common/v3d_tfu.h"
29 #include "broadcom/cle/v3dx_pack.h"
30 #include "broadcom/compiler/v3d_compiler.h"
31
32 struct rcl_clear_info {
33 const union v3dv_clear_value *clear_value;
34 struct v3dv_image *image;
35 VkImageAspectFlags aspects;
36 uint32_t level;
37 };
38
39 static struct v3dv_cl *
emit_rcl_prologue(struct v3dv_job *job, struct v3dv_meta_framebuffer *fb, const struct rcl_clear_info *clear_info)40 emit_rcl_prologue(struct v3dv_job *job,
41 struct v3dv_meta_framebuffer *fb,
42 const struct rcl_clear_info *clear_info)
43 {
44 const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
45
46 struct v3dv_cl *rcl = &job->rcl;
47 v3dv_cl_ensure_space_with_branch(rcl, 200 +
48 tiling->layers * 256 *
49 cl_packet_length(SUPERTILE_COORDINATES));
50 if (job->cmd_buffer->state.oom)
51 return NULL;
52
53 assert(!tiling->msaa || !tiling->double_buffer);
54 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
55 config.early_z_disable = true;
56 config.image_width_pixels = tiling->width;
57 config.image_height_pixels = tiling->height;
58 config.number_of_render_targets = 1;
59 config.multisample_mode_4x = tiling->msaa;
60 config.double_buffer_in_non_ms_mode = tiling->double_buffer;
61 config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
62 config.internal_depth_type = fb->internal_depth_type;
63 }
64
65 if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
66 uint32_t clear_pad = 0;
67 if (clear_info->image) {
68 const struct v3dv_image *image = clear_info->image;
69 const struct v3d_resource_slice *slice =
70 &image->slices[clear_info->level];
71 if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
72 slice->tiling == V3D_TILING_UIF_XOR) {
73 int uif_block_height = v3d_utile_height(image->cpp) * 2;
74
75 uint32_t implicit_padded_height =
76 align(tiling->height, uif_block_height) / uif_block_height;
77
78 if (slice->padded_height_of_output_image_in_uif_blocks -
79 implicit_padded_height >= 15) {
80 clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
81 }
82 }
83 }
84
85 const uint32_t *color = &clear_info->clear_value->color[0];
86 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
87 clear.clear_color_low_32_bits = color[0];
88 clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
89 clear.render_target_number = 0;
90 };
91
92 if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
93 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
94 clear.clear_color_mid_low_32_bits =
95 ((color[1] >> 24) | (color[2] << 8));
96 clear.clear_color_mid_high_24_bits =
97 ((color[2] >> 24) | ((color[3] & 0xffff) << 8));
98 clear.render_target_number = 0;
99 };
100 }
101
102 if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
103 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
104 clear.uif_padded_height_in_uif_blocks = clear_pad;
105 clear.clear_color_high_16_bits = color[3] >> 16;
106 clear.render_target_number = 0;
107 };
108 }
109 }
110
111 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
112 rt.render_target_0_internal_bpp = tiling->internal_bpp;
113 rt.render_target_0_internal_type = fb->internal_type;
114 rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
115 }
116
117 cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
118 clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
119 clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
120 };
121
122 cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
123 init.use_auto_chained_tile_lists = true;
124 init.size_of_first_block_in_chained_tile_lists =
125 TILE_ALLOCATION_BLOCK_SIZE_64B;
126 }
127
128 return rcl;
129 }
130
131 static void
emit_frame_setup(struct v3dv_job *job, uint32_t min_layer, const union v3dv_clear_value *clear_value)132 emit_frame_setup(struct v3dv_job *job,
133 uint32_t min_layer,
134 const union v3dv_clear_value *clear_value)
135 {
136 v3dv_return_if_oom(NULL, job);
137
138 const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
139
140 struct v3dv_cl *rcl = &job->rcl;
141
142 const uint32_t tile_alloc_offset =
143 64 * min_layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
144 cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
145 list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
146 }
147
148 cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
149 config.number_of_bin_tile_lists = 1;
150 config.total_frame_width_in_tiles = tiling->draw_tiles_x;
151 config.total_frame_height_in_tiles = tiling->draw_tiles_y;
152
153 config.supertile_width_in_tiles = tiling->supertile_width;
154 config.supertile_height_in_tiles = tiling->supertile_height;
155
156 config.total_frame_width_in_supertiles =
157 tiling->frame_width_in_supertiles;
158 config.total_frame_height_in_supertiles =
159 tiling->frame_height_in_supertiles;
160 }
161
162 /* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
163 * it here.
164 */
165 for (int i = 0; i < 2; i++) {
166 cl_emit(rcl, TILE_COORDINATES, coords);
167 cl_emit(rcl, END_OF_LOADS, end);
168 cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
169 store.buffer_to_store = NONE;
170 }
171 /* When using double-buffering, we need to clear both buffers (unless
172 * we only have a single tile to render).
173 */
174 if (clear_value &&
175 (i == 0 || v3dv_do_double_initial_tile_clear(tiling))) {
176 cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
177 clear.clear_z_stencil_buffer = true;
178 clear.clear_all_render_targets = true;
179 }
180 }
181 cl_emit(rcl, END_OF_TILE_MARKER, end);
182 }
183
184 cl_emit(rcl, FLUSH_VCD_CACHE, flush);
185 }
186
187 static void
emit_supertile_coordinates(struct v3dv_job *job, struct v3dv_meta_framebuffer *framebuffer)188 emit_supertile_coordinates(struct v3dv_job *job,
189 struct v3dv_meta_framebuffer *framebuffer)
190 {
191 v3dv_return_if_oom(NULL, job);
192
193 struct v3dv_cl *rcl = &job->rcl;
194
195 const uint32_t min_y = framebuffer->min_y_supertile;
196 const uint32_t max_y = framebuffer->max_y_supertile;
197 const uint32_t min_x = framebuffer->min_x_supertile;
198 const uint32_t max_x = framebuffer->max_x_supertile;
199
200 for (int y = min_y; y <= max_y; y++) {
201 for (int x = min_x; x <= max_x; x++) {
202 cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
203 coords.column_number_in_supertiles = x;
204 coords.row_number_in_supertiles = y;
205 }
206 }
207 }
208 }
209
210 static void
emit_linear_load(struct v3dv_cl *cl, uint32_t buffer, struct v3dv_bo *bo, uint32_t offset, uint32_t stride, uint32_t format)211 emit_linear_load(struct v3dv_cl *cl,
212 uint32_t buffer,
213 struct v3dv_bo *bo,
214 uint32_t offset,
215 uint32_t stride,
216 uint32_t format)
217 {
218 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
219 load.buffer_to_load = buffer;
220 load.address = v3dv_cl_address(bo, offset);
221 load.input_image_format = format;
222 load.memory_format = V3D_TILING_RASTER;
223 load.height_in_ub_or_stride = stride;
224 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
225 }
226 }
227
228 static void
emit_linear_store(struct v3dv_cl *cl, uint32_t buffer, struct v3dv_bo *bo, uint32_t offset, uint32_t stride, bool msaa, uint32_t format)229 emit_linear_store(struct v3dv_cl *cl,
230 uint32_t buffer,
231 struct v3dv_bo *bo,
232 uint32_t offset,
233 uint32_t stride,
234 bool msaa,
235 uint32_t format)
236 {
237 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
238 store.buffer_to_store = RENDER_TARGET_0;
239 store.address = v3dv_cl_address(bo, offset);
240 store.clear_buffer_being_stored = false;
241 store.output_image_format = format;
242 store.memory_format = V3D_TILING_RASTER;
243 store.height_in_ub_or_stride = stride;
244 store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
245 V3D_DECIMATE_MODE_SAMPLE_0;
246 }
247 }
248
249 /* This chooses a tile buffer format that is appropriate for the copy operation.
250 * Typically, this is the image render target type, however, if we are copying
251 * depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
252 * we need to load and store to/from a tile color buffer using a compatible
253 * color format.
254 */
255 static uint32_t
choose_tlb_format(struct v3dv_meta_framebuffer *framebuffer, VkImageAspectFlags aspect, bool for_store, bool is_copy_to_buffer, bool is_copy_from_buffer)256 choose_tlb_format(struct v3dv_meta_framebuffer *framebuffer,
257 VkImageAspectFlags aspect,
258 bool for_store,
259 bool is_copy_to_buffer,
260 bool is_copy_from_buffer)
261 {
262 if (is_copy_to_buffer || is_copy_from_buffer) {
263 switch (framebuffer->vk_format) {
264 case VK_FORMAT_D16_UNORM:
265 return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
266 case VK_FORMAT_D32_SFLOAT:
267 return V3D_OUTPUT_IMAGE_FORMAT_R32F;
268 case VK_FORMAT_X8_D24_UNORM_PACK32:
269 return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
270 case VK_FORMAT_D24_UNORM_S8_UINT:
271 /* When storing the stencil aspect of a combined depth/stencil image
272 * to a buffer, the Vulkan spec states that the output buffer must
273 * have packed stencil values, so we choose an R8UI format for our
274 * store outputs. For the load input we still want RGBA8UI since the
275 * source image contains 4 channels (including the 3 channels
276 * containing the 24-bit depth value).
277 *
278 * When loading the stencil aspect of a combined depth/stencil image
279 * from a buffer, we read packed 8-bit stencil values from the buffer
280 * that we need to put into the LSB of the 32-bit format (the R
281 * channel), so we use R8UI. For the store, if we used R8UI then we
282 * would write 8-bit stencil values consecutively over depth channels,
283 * so we need to use RGBA8UI. This will write each stencil value in
284 * its correct position, but will overwrite depth values (channels G
285 * B,A) with undefined values. To fix this, we will have to restore
286 * the depth aspect from the Z tile buffer, which we should pre-load
287 * from the image before the store).
288 */
289 if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
290 return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
291 } else {
292 assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
293 if (is_copy_to_buffer) {
294 return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
295 V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
296 } else {
297 assert(is_copy_from_buffer);
298 return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
299 V3D_OUTPUT_IMAGE_FORMAT_R8UI;
300 }
301 }
302 default: /* Color formats */
303 return framebuffer->format->rt_type;
304 break;
305 }
306 } else {
307 return framebuffer->format->rt_type;
308 }
309 }
310
311 static inline bool
format_needs_rb_swap(struct v3dv_device *device, VkFormat format)312 format_needs_rb_swap(struct v3dv_device *device,
313 VkFormat format)
314 {
315 const uint8_t *swizzle = v3dv_get_format_swizzle(device, format);
316 return v3dv_format_swizzle_needs_rb_swap(swizzle);
317 }
318
319 static inline bool
format_needs_reverse(struct v3dv_device *device, VkFormat format)320 format_needs_reverse(struct v3dv_device *device,
321 VkFormat format)
322 {
323 const uint8_t *swizzle = v3dv_get_format_swizzle(device, format);
324 return v3dv_format_swizzle_needs_reverse(swizzle);
325 }
326
327 static void
emit_image_load(struct v3dv_device *device, struct v3dv_cl *cl, struct v3dv_meta_framebuffer *framebuffer, struct v3dv_image *image, VkImageAspectFlags aspect, uint32_t layer, uint32_t mip_level, bool is_copy_to_buffer, bool is_copy_from_buffer)328 emit_image_load(struct v3dv_device *device,
329 struct v3dv_cl *cl,
330 struct v3dv_meta_framebuffer *framebuffer,
331 struct v3dv_image *image,
332 VkImageAspectFlags aspect,
333 uint32_t layer,
334 uint32_t mip_level,
335 bool is_copy_to_buffer,
336 bool is_copy_from_buffer)
337 {
338 uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
339
340 /* For image to/from buffer copies we always load to and store from RT0,
341 * even for depth/stencil aspects, because the hardware can't do raster
342 * stores or loads from/to the depth/stencil tile buffers.
343 */
344 bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
345 aspect == VK_IMAGE_ASPECT_COLOR_BIT;
346
347 const struct v3d_resource_slice *slice = &image->slices[mip_level];
348 cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
349 load.buffer_to_load = load_to_color_tlb ?
350 RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
351
352 load.address = v3dv_cl_address(image->mem->bo, layer_offset);
353
354 load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
355 is_copy_to_buffer,
356 is_copy_from_buffer);
357 load.memory_format = slice->tiling;
358
359 /* When copying depth/stencil images to a buffer, for D24 formats Vulkan
360 * expects the depth value in the LSB bits of each 32-bit pixel.
361 * Unfortunately, the hardware seems to put the S8/X8 bits there and the
362 * depth bits on the MSB. To work around that we can reverse the channel
363 * order and then swap the R/B channels to get what we want.
364 *
365 * NOTE: reversing and swapping only gets us the behavior we want if the
366 * operations happen in that exact order, which seems to be the case when
367 * done on the tile buffer load operations. On the store, it seems the
368 * order is not the same. The order on the store is probably reversed so
369 * that reversing and swapping on both the load and the store preserves
370 * the original order of the channels in memory.
371 *
372 * Notice that we only need to do this when copying to a buffer, where
373 * depth and stencil aspects are copied as separate regions and
374 * the spec expects them to be tightly packed.
375 */
376 bool needs_rb_swap = false;
377 bool needs_chan_reverse = false;
378 if (is_copy_to_buffer &&
379 (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
380 (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
381 (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
382 needs_rb_swap = true;
383 needs_chan_reverse = true;
384 } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
385 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
386 /* This is not a raw data copy (i.e. we are clearing the image),
387 * so we need to make sure we respect the format swizzle.
388 */
389 needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
390 needs_chan_reverse = format_needs_reverse(device, framebuffer->vk_format);
391 }
392
393 load.r_b_swap = needs_rb_swap;
394 load.channel_reverse = needs_chan_reverse;
395
396 if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
397 slice->tiling == V3D_TILING_UIF_XOR) {
398 load.height_in_ub_or_stride =
399 slice->padded_height_of_output_image_in_uif_blocks;
400 } else if (slice->tiling == V3D_TILING_RASTER) {
401 load.height_in_ub_or_stride = slice->stride;
402 }
403
404 if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
405 load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
406 else
407 load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
408 }
409 }
410
411 static void
emit_image_store(struct v3dv_device *device, struct v3dv_cl *cl, struct v3dv_meta_framebuffer *framebuffer, struct v3dv_image *image, VkImageAspectFlags aspect, uint32_t layer, uint32_t mip_level, bool is_copy_to_buffer, bool is_copy_from_buffer, bool is_multisample_resolve)412 emit_image_store(struct v3dv_device *device,
413 struct v3dv_cl *cl,
414 struct v3dv_meta_framebuffer *framebuffer,
415 struct v3dv_image *image,
416 VkImageAspectFlags aspect,
417 uint32_t layer,
418 uint32_t mip_level,
419 bool is_copy_to_buffer,
420 bool is_copy_from_buffer,
421 bool is_multisample_resolve)
422 {
423 uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
424
425 bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
426 aspect == VK_IMAGE_ASPECT_COLOR_BIT;
427
428 const struct v3d_resource_slice *slice = &image->slices[mip_level];
429 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
430 store.buffer_to_store = store_from_color_tlb ?
431 RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
432
433 store.address = v3dv_cl_address(image->mem->bo, layer_offset);
434 store.clear_buffer_being_stored = false;
435
436 /* See rationale in emit_image_load() */
437 bool needs_rb_swap = false;
438 bool needs_chan_reverse = false;
439 if (is_copy_from_buffer &&
440 (framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
441 (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
442 (aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
443 needs_rb_swap = true;
444 needs_chan_reverse = true;
445 } else if (!is_copy_from_buffer && !is_copy_to_buffer &&
446 (aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
447 needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
448 needs_chan_reverse = format_needs_reverse(device, framebuffer->vk_format);
449 }
450
451 store.r_b_swap = needs_rb_swap;
452 store.channel_reverse = needs_chan_reverse;
453
454 store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
455 is_copy_to_buffer,
456 is_copy_from_buffer);
457 store.memory_format = slice->tiling;
458 if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
459 slice->tiling == V3D_TILING_UIF_XOR) {
460 store.height_in_ub_or_stride =
461 slice->padded_height_of_output_image_in_uif_blocks;
462 } else if (slice->tiling == V3D_TILING_RASTER) {
463 store.height_in_ub_or_stride = slice->stride;
464 }
465
466 if (image->vk.samples > VK_SAMPLE_COUNT_1_BIT)
467 store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
468 else if (is_multisample_resolve)
469 store.decimate_mode = V3D_DECIMATE_MODE_4X;
470 else
471 store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
472 }
473 }
474
475 static void
emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job, struct v3dv_meta_framebuffer *framebuffer, struct v3dv_buffer *buffer, struct v3dv_image *image, uint32_t layer_offset, const VkBufferImageCopy2 *region)476 emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
477 struct v3dv_meta_framebuffer *framebuffer,
478 struct v3dv_buffer *buffer,
479 struct v3dv_image *image,
480 uint32_t layer_offset,
481 const VkBufferImageCopy2 *region)
482 {
483 struct v3dv_cl *cl = &job->indirect;
484 v3dv_cl_ensure_space(cl, 200, 1);
485 v3dv_return_if_oom(NULL, job);
486
487 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
488
489 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
490
491 /* Load image to TLB */
492 assert((image->vk.image_type != VK_IMAGE_TYPE_3D &&
493 layer_offset < region->imageSubresource.layerCount) ||
494 layer_offset < image->vk.extent.depth);
495
496 const uint32_t image_layer = image->vk.image_type != VK_IMAGE_TYPE_3D ?
497 region->imageSubresource.baseArrayLayer + layer_offset :
498 region->imageOffset.z + layer_offset;
499
500 emit_image_load(job->device, cl, framebuffer, image,
501 region->imageSubresource.aspectMask,
502 image_layer,
503 region->imageSubresource.mipLevel,
504 true, false);
505
506 cl_emit(cl, END_OF_LOADS, end);
507
508 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
509
510 /* Store TLB to buffer */
511 uint32_t width, height;
512 if (region->bufferRowLength == 0)
513 width = region->imageExtent.width;
514 else
515 width = region->bufferRowLength;
516
517 if (region->bufferImageHeight == 0)
518 height = region->imageExtent.height;
519 else
520 height = region->bufferImageHeight;
521
522 /* Handle copy from compressed format */
523 width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
524 height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
525
526 /* If we are storing stencil from a combined depth/stencil format the
527 * Vulkan spec states that the output buffer must have packed stencil
528 * values, where each stencil value is 1 byte.
529 */
530 uint32_t cpp =
531 region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
532 1 : image->cpp;
533 uint32_t buffer_stride = width * cpp;
534 uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
535 height * buffer_stride * layer_offset;
536
537 uint32_t format = choose_tlb_format(framebuffer,
538 region->imageSubresource.aspectMask,
539 true, true, false);
540 bool msaa = image->vk.samples > VK_SAMPLE_COUNT_1_BIT;
541
542 emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
543 buffer_offset, buffer_stride, msaa, format);
544
545 cl_emit(cl, END_OF_TILE_MARKER, end);
546
547 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
548
549 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
550 branch.start = tile_list_start;
551 branch.end = v3dv_cl_get_address(cl);
552 }
553 }
554
555 static void
emit_copy_layer_to_buffer(struct v3dv_job *job, struct v3dv_buffer *buffer, struct v3dv_image *image, struct v3dv_meta_framebuffer *framebuffer, uint32_t layer, const VkBufferImageCopy2 *region)556 emit_copy_layer_to_buffer(struct v3dv_job *job,
557 struct v3dv_buffer *buffer,
558 struct v3dv_image *image,
559 struct v3dv_meta_framebuffer *framebuffer,
560 uint32_t layer,
561 const VkBufferImageCopy2 *region)
562 {
563 emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
564 image, layer, region);
565 emit_supertile_coordinates(job, framebuffer);
566 }
567
568 void
meta_emit_copy_image_to_buffer_rcl(struct v3dv_job *job, struct v3dv_buffer *buffer, struct v3dv_image *image, struct v3dv_meta_framebuffer *framebuffer, const VkBufferImageCopy2 *region)569 v3dX(meta_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job,
570 struct v3dv_buffer *buffer,
571 struct v3dv_image *image,
572 struct v3dv_meta_framebuffer *framebuffer,
573 const VkBufferImageCopy2 *region)
574 {
575 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
576 v3dv_return_if_oom(NULL, job);
577
578 emit_frame_setup(job, 0, NULL);
579 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
580 emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
581 cl_emit(rcl, END_OF_RENDERING, end);
582 }
583
584 static void
emit_resolve_image_layer_per_tile_list(struct v3dv_job *job, struct v3dv_meta_framebuffer *framebuffer, struct v3dv_image *dst, struct v3dv_image *src, uint32_t layer_offset, const VkImageResolve2 *region)585 emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
586 struct v3dv_meta_framebuffer *framebuffer,
587 struct v3dv_image *dst,
588 struct v3dv_image *src,
589 uint32_t layer_offset,
590 const VkImageResolve2 *region)
591 {
592 struct v3dv_cl *cl = &job->indirect;
593 v3dv_cl_ensure_space(cl, 200, 1);
594 v3dv_return_if_oom(NULL, job);
595
596 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
597
598 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
599
600 assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
601 layer_offset < region->srcSubresource.layerCount) ||
602 layer_offset < src->vk.extent.depth);
603
604 const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
605 region->srcSubresource.baseArrayLayer + layer_offset :
606 region->srcOffset.z + layer_offset;
607
608 emit_image_load(job->device, cl, framebuffer, src,
609 region->srcSubresource.aspectMask,
610 src_layer,
611 region->srcSubresource.mipLevel,
612 false, false);
613
614 cl_emit(cl, END_OF_LOADS, end);
615
616 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
617
618 assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
619 layer_offset < region->dstSubresource.layerCount) ||
620 layer_offset < dst->vk.extent.depth);
621
622 const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
623 region->dstSubresource.baseArrayLayer + layer_offset :
624 region->dstOffset.z + layer_offset;
625
626 emit_image_store(job->device, cl, framebuffer, dst,
627 region->dstSubresource.aspectMask,
628 dst_layer,
629 region->dstSubresource.mipLevel,
630 false, false, true);
631
632 cl_emit(cl, END_OF_TILE_MARKER, end);
633
634 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
635
636 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
637 branch.start = tile_list_start;
638 branch.end = v3dv_cl_get_address(cl);
639 }
640 }
641
642 static void
emit_resolve_image_layer(struct v3dv_job *job, struct v3dv_image *dst, struct v3dv_image *src, struct v3dv_meta_framebuffer *framebuffer, uint32_t layer, const VkImageResolve2 *region)643 emit_resolve_image_layer(struct v3dv_job *job,
644 struct v3dv_image *dst,
645 struct v3dv_image *src,
646 struct v3dv_meta_framebuffer *framebuffer,
647 uint32_t layer,
648 const VkImageResolve2 *region)
649 {
650 emit_resolve_image_layer_per_tile_list(job, framebuffer,
651 dst, src, layer, region);
652 emit_supertile_coordinates(job, framebuffer);
653 }
654
655 void
meta_emit_resolve_image_rcl(struct v3dv_job *job, struct v3dv_image *dst, struct v3dv_image *src, struct v3dv_meta_framebuffer *framebuffer, const VkImageResolve2 *region)656 v3dX(meta_emit_resolve_image_rcl)(struct v3dv_job *job,
657 struct v3dv_image *dst,
658 struct v3dv_image *src,
659 struct v3dv_meta_framebuffer *framebuffer,
660 const VkImageResolve2 *region)
661 {
662 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
663 v3dv_return_if_oom(NULL, job);
664
665 emit_frame_setup(job, 0, NULL);
666 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
667 emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
668 cl_emit(rcl, END_OF_RENDERING, end);
669 }
670
671 static void
emit_copy_buffer_per_tile_list(struct v3dv_job *job, struct v3dv_bo *dst, struct v3dv_bo *src, uint32_t dst_offset, uint32_t src_offset, uint32_t stride, uint32_t format)672 emit_copy_buffer_per_tile_list(struct v3dv_job *job,
673 struct v3dv_bo *dst,
674 struct v3dv_bo *src,
675 uint32_t dst_offset,
676 uint32_t src_offset,
677 uint32_t stride,
678 uint32_t format)
679 {
680 struct v3dv_cl *cl = &job->indirect;
681 v3dv_cl_ensure_space(cl, 200, 1);
682 v3dv_return_if_oom(NULL, job);
683
684 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
685
686 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
687
688 emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
689
690 cl_emit(cl, END_OF_LOADS, end);
691
692 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
693
694 emit_linear_store(cl, RENDER_TARGET_0,
695 dst, dst_offset, stride, false, format);
696
697 cl_emit(cl, END_OF_TILE_MARKER, end);
698
699 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
700
701 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
702 branch.start = tile_list_start;
703 branch.end = v3dv_cl_get_address(cl);
704 }
705 }
706
707 void
meta_emit_copy_buffer(struct v3dv_job *job, struct v3dv_bo *dst, struct v3dv_bo *src, uint32_t dst_offset, uint32_t src_offset, struct v3dv_meta_framebuffer *framebuffer, uint32_t format, uint32_t item_size)708 v3dX(meta_emit_copy_buffer)(struct v3dv_job *job,
709 struct v3dv_bo *dst,
710 struct v3dv_bo *src,
711 uint32_t dst_offset,
712 uint32_t src_offset,
713 struct v3dv_meta_framebuffer *framebuffer,
714 uint32_t format,
715 uint32_t item_size)
716 {
717 const uint32_t stride = job->frame_tiling.width * item_size;
718 emit_copy_buffer_per_tile_list(job, dst, src,
719 dst_offset, src_offset,
720 stride, format);
721 emit_supertile_coordinates(job, framebuffer);
722 }
723
724 void
meta_emit_copy_buffer_rcl(struct v3dv_job *job, struct v3dv_bo *dst, struct v3dv_bo *src, uint32_t dst_offset, uint32_t src_offset, struct v3dv_meta_framebuffer *framebuffer, uint32_t format, uint32_t item_size)725 v3dX(meta_emit_copy_buffer_rcl)(struct v3dv_job *job,
726 struct v3dv_bo *dst,
727 struct v3dv_bo *src,
728 uint32_t dst_offset,
729 uint32_t src_offset,
730 struct v3dv_meta_framebuffer *framebuffer,
731 uint32_t format,
732 uint32_t item_size)
733 {
734 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
735 v3dv_return_if_oom(NULL, job);
736
737 emit_frame_setup(job, 0, NULL);
738
739 v3dX(meta_emit_copy_buffer)(job, dst, src, dst_offset, src_offset,
740 framebuffer, format, item_size);
741
742 cl_emit(rcl, END_OF_RENDERING, end);
743 }
744
745 static void
emit_copy_image_layer_per_tile_list(struct v3dv_job *job, struct v3dv_meta_framebuffer *framebuffer, struct v3dv_image *dst, struct v3dv_image *src, uint32_t layer_offset, const VkImageCopy2 *region)746 emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
747 struct v3dv_meta_framebuffer *framebuffer,
748 struct v3dv_image *dst,
749 struct v3dv_image *src,
750 uint32_t layer_offset,
751 const VkImageCopy2 *region)
752 {
753 struct v3dv_cl *cl = &job->indirect;
754 v3dv_cl_ensure_space(cl, 200, 1);
755 v3dv_return_if_oom(NULL, job);
756
757 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
758
759 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
760
761 assert((src->vk.image_type != VK_IMAGE_TYPE_3D &&
762 layer_offset < region->srcSubresource.layerCount) ||
763 layer_offset < src->vk.extent.depth);
764
765 const uint32_t src_layer = src->vk.image_type != VK_IMAGE_TYPE_3D ?
766 region->srcSubresource.baseArrayLayer + layer_offset :
767 region->srcOffset.z + layer_offset;
768
769 emit_image_load(job->device, cl, framebuffer, src,
770 region->srcSubresource.aspectMask,
771 src_layer,
772 region->srcSubresource.mipLevel,
773 false, false);
774
775 cl_emit(cl, END_OF_LOADS, end);
776
777 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
778
779 assert((dst->vk.image_type != VK_IMAGE_TYPE_3D &&
780 layer_offset < region->dstSubresource.layerCount) ||
781 layer_offset < dst->vk.extent.depth);
782
783 const uint32_t dst_layer = dst->vk.image_type != VK_IMAGE_TYPE_3D ?
784 region->dstSubresource.baseArrayLayer + layer_offset :
785 region->dstOffset.z + layer_offset;
786
787 emit_image_store(job->device, cl, framebuffer, dst,
788 region->dstSubresource.aspectMask,
789 dst_layer,
790 region->dstSubresource.mipLevel,
791 false, false, false);
792
793 cl_emit(cl, END_OF_TILE_MARKER, end);
794
795 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
796
797 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
798 branch.start = tile_list_start;
799 branch.end = v3dv_cl_get_address(cl);
800 }
801 }
802
803 static void
emit_copy_image_layer(struct v3dv_job *job, struct v3dv_image *dst, struct v3dv_image *src, struct v3dv_meta_framebuffer *framebuffer, uint32_t layer, const VkImageCopy2 *region)804 emit_copy_image_layer(struct v3dv_job *job,
805 struct v3dv_image *dst,
806 struct v3dv_image *src,
807 struct v3dv_meta_framebuffer *framebuffer,
808 uint32_t layer,
809 const VkImageCopy2 *region)
810 {
811 emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
812 emit_supertile_coordinates(job, framebuffer);
813 }
814
815 void
meta_emit_copy_image_rcl(struct v3dv_job *job, struct v3dv_image *dst, struct v3dv_image *src, struct v3dv_meta_framebuffer *framebuffer, const VkImageCopy2 *region)816 v3dX(meta_emit_copy_image_rcl)(struct v3dv_job *job,
817 struct v3dv_image *dst,
818 struct v3dv_image *src,
819 struct v3dv_meta_framebuffer *framebuffer,
820 const VkImageCopy2 *region)
821 {
822 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
823 v3dv_return_if_oom(NULL, job);
824
825 emit_frame_setup(job, 0, NULL);
826 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
827 emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
828 cl_emit(rcl, END_OF_RENDERING, end);
829 }
830
831 void
meta_emit_tfu_job(struct v3dv_cmd_buffer *cmd_buffer, uint32_t dst_bo_handle, uint32_t dst_offset, enum v3d_tiling_mode dst_tiling, uint32_t dst_padded_height_or_stride, uint32_t dst_cpp, uint32_t src_bo_handle, uint32_t src_offset, enum v3d_tiling_mode src_tiling, uint32_t src_padded_height_or_stride, uint32_t src_cpp, uint32_t width, uint32_t height, const struct v3dv_format *format)832 v3dX(meta_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
833 uint32_t dst_bo_handle,
834 uint32_t dst_offset,
835 enum v3d_tiling_mode dst_tiling,
836 uint32_t dst_padded_height_or_stride,
837 uint32_t dst_cpp,
838 uint32_t src_bo_handle,
839 uint32_t src_offset,
840 enum v3d_tiling_mode src_tiling,
841 uint32_t src_padded_height_or_stride,
842 uint32_t src_cpp,
843 uint32_t width,
844 uint32_t height,
845 const struct v3dv_format *format)
846 {
847 struct drm_v3d_submit_tfu tfu = {
848 .ios = (height << 16) | width,
849 .bo_handles = {
850 dst_bo_handle,
851 src_bo_handle != dst_bo_handle ? src_bo_handle : 0
852 },
853 };
854
855 tfu.iia |= src_offset;
856
857 if (src_tiling == V3D_TILING_RASTER) {
858 tfu.icfg = V3D33_TFU_ICFG_FORMAT_RASTER << V3D33_TFU_ICFG_FORMAT_SHIFT;
859 } else {
860 tfu.icfg = (V3D33_TFU_ICFG_FORMAT_LINEARTILE +
861 (src_tiling - V3D_TILING_LINEARTILE)) <<
862 V3D33_TFU_ICFG_FORMAT_SHIFT;
863 }
864 tfu.icfg |= format->tex_type << V3D33_TFU_ICFG_TTYPE_SHIFT;
865
866 tfu.ioa = dst_offset;
867
868 tfu.ioa |= (V3D33_TFU_IOA_FORMAT_LINEARTILE +
869 (dst_tiling - V3D_TILING_LINEARTILE)) <<
870 V3D33_TFU_IOA_FORMAT_SHIFT;
871
872 switch (src_tiling) {
873 case V3D_TILING_UIF_NO_XOR:
874 case V3D_TILING_UIF_XOR:
875 tfu.iis |= src_padded_height_or_stride / (2 * v3d_utile_height(src_cpp));
876 break;
877 case V3D_TILING_RASTER:
878 tfu.iis |= src_padded_height_or_stride / src_cpp;
879 break;
880 default:
881 break;
882 }
883
884 /* The TFU can handle raster sources but always produces UIF results */
885 assert(dst_tiling != V3D_TILING_RASTER);
886
887 /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
888 * OPAD field for the destination (how many extra UIF blocks beyond
889 * those necessary to cover the height).
890 */
891 if (dst_tiling == V3D_TILING_UIF_NO_XOR || dst_tiling == V3D_TILING_UIF_XOR) {
892 uint32_t uif_block_h = 2 * v3d_utile_height(dst_cpp);
893 uint32_t implicit_padded_height = align(height, uif_block_h);
894 uint32_t icfg = (dst_padded_height_or_stride - implicit_padded_height) /
895 uif_block_h;
896 tfu.icfg |= icfg << V3D33_TFU_ICFG_OPAD_SHIFT;
897 }
898
899 v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
900 }
901
902 static void
emit_clear_image_layer_per_tile_list(struct v3dv_job *job, struct v3dv_meta_framebuffer *framebuffer, struct v3dv_image *image, VkImageAspectFlags aspects, uint32_t layer, uint32_t level)903 emit_clear_image_layer_per_tile_list(struct v3dv_job *job,
904 struct v3dv_meta_framebuffer *framebuffer,
905 struct v3dv_image *image,
906 VkImageAspectFlags aspects,
907 uint32_t layer,
908 uint32_t level)
909 {
910 struct v3dv_cl *cl = &job->indirect;
911 v3dv_cl_ensure_space(cl, 200, 1);
912 v3dv_return_if_oom(NULL, job);
913
914 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
915
916 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
917
918 cl_emit(cl, END_OF_LOADS, end);
919
920 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
921
922 emit_image_store(job->device, cl, framebuffer, image, aspects,
923 layer, level, false, false, false);
924
925 cl_emit(cl, END_OF_TILE_MARKER, end);
926
927 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
928
929 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
930 branch.start = tile_list_start;
931 branch.end = v3dv_cl_get_address(cl);
932 }
933 }
934
935 static void
emit_clear_image_layers(struct v3dv_job *job, struct v3dv_image *image, struct v3dv_meta_framebuffer *framebuffer, VkImageAspectFlags aspects, uint32_t min_layer, uint32_t max_layer, uint32_t level)936 emit_clear_image_layers(struct v3dv_job *job,
937 struct v3dv_image *image,
938 struct v3dv_meta_framebuffer *framebuffer,
939 VkImageAspectFlags aspects,
940 uint32_t min_layer,
941 uint32_t max_layer,
942 uint32_t level)
943 {
944 for (uint32_t layer = min_layer; layer < max_layer; layer++) {
945 emit_clear_image_layer_per_tile_list(job, framebuffer, image, aspects,
946 layer, level);
947 emit_supertile_coordinates(job, framebuffer);
948 }
949 }
950
951 void
meta_emit_clear_image_rcl(struct v3dv_job *job, struct v3dv_image *image, struct v3dv_meta_framebuffer *framebuffer, const union v3dv_clear_value *clear_value, VkImageAspectFlags aspects, uint32_t min_layer, uint32_t max_layer, uint32_t level)952 v3dX(meta_emit_clear_image_rcl)(struct v3dv_job *job,
953 struct v3dv_image *image,
954 struct v3dv_meta_framebuffer *framebuffer,
955 const union v3dv_clear_value *clear_value,
956 VkImageAspectFlags aspects,
957 uint32_t min_layer,
958 uint32_t max_layer,
959 uint32_t level)
960 {
961 const struct rcl_clear_info clear_info = {
962 .clear_value = clear_value,
963 .image = image,
964 .aspects = aspects,
965 .level = level,
966 };
967
968 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
969 v3dv_return_if_oom(NULL, job);
970
971 emit_frame_setup(job, 0, clear_value);
972 emit_clear_image_layers(job, image, framebuffer, aspects,
973 min_layer, max_layer, level);
974 cl_emit(rcl, END_OF_RENDERING, end);
975 }
976
977 static void
emit_fill_buffer_per_tile_list(struct v3dv_job *job, struct v3dv_bo *bo, uint32_t offset, uint32_t stride)978 emit_fill_buffer_per_tile_list(struct v3dv_job *job,
979 struct v3dv_bo *bo,
980 uint32_t offset,
981 uint32_t stride)
982 {
983 struct v3dv_cl *cl = &job->indirect;
984 v3dv_cl_ensure_space(cl, 200, 1);
985 v3dv_return_if_oom(NULL, job);
986
987 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
988
989 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
990
991 cl_emit(cl, END_OF_LOADS, end);
992
993 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
994
995 emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
996 V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
997
998 cl_emit(cl, END_OF_TILE_MARKER, end);
999
1000 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1001
1002 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1003 branch.start = tile_list_start;
1004 branch.end = v3dv_cl_get_address(cl);
1005 }
1006 }
1007
1008 static void
emit_fill_buffer(struct v3dv_job *job, struct v3dv_bo *bo, uint32_t offset, struct v3dv_meta_framebuffer *framebuffer)1009 emit_fill_buffer(struct v3dv_job *job,
1010 struct v3dv_bo *bo,
1011 uint32_t offset,
1012 struct v3dv_meta_framebuffer *framebuffer)
1013 {
1014 const uint32_t stride = job->frame_tiling.width * 4;
1015 emit_fill_buffer_per_tile_list(job, bo, offset, stride);
1016 emit_supertile_coordinates(job, framebuffer);
1017 }
1018
1019 void
meta_emit_fill_buffer_rcl(struct v3dv_job *job, struct v3dv_bo *bo, uint32_t offset, struct v3dv_meta_framebuffer *framebuffer, uint32_t data)1020 v3dX(meta_emit_fill_buffer_rcl)(struct v3dv_job *job,
1021 struct v3dv_bo *bo,
1022 uint32_t offset,
1023 struct v3dv_meta_framebuffer *framebuffer,
1024 uint32_t data)
1025 {
1026 const union v3dv_clear_value clear_value = {
1027 .color = { data, 0, 0, 0 },
1028 };
1029
1030 const struct rcl_clear_info clear_info = {
1031 .clear_value = &clear_value,
1032 .image = NULL,
1033 .aspects = VK_IMAGE_ASPECT_COLOR_BIT,
1034 .level = 0,
1035 };
1036
1037 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
1038 v3dv_return_if_oom(NULL, job);
1039
1040 emit_frame_setup(job, 0, &clear_value);
1041 emit_fill_buffer(job, bo, offset, framebuffer);
1042 cl_emit(rcl, END_OF_RENDERING, end);
1043 }
1044
1045
1046 static void
emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job, struct v3dv_meta_framebuffer *framebuffer, struct v3dv_image *image, struct v3dv_buffer *buffer, uint32_t layer, const VkBufferImageCopy2 *region)1047 emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
1048 struct v3dv_meta_framebuffer *framebuffer,
1049 struct v3dv_image *image,
1050 struct v3dv_buffer *buffer,
1051 uint32_t layer,
1052 const VkBufferImageCopy2 *region)
1053 {
1054 struct v3dv_cl *cl = &job->indirect;
1055 v3dv_cl_ensure_space(cl, 200, 1);
1056 v3dv_return_if_oom(NULL, job);
1057
1058 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1059
1060 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1061
1062 const VkImageSubresourceLayers *imgrsc = ®ion->imageSubresource;
1063 assert((image->vk.image_type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
1064 layer < image->vk.extent.depth);
1065
1066 /* Load TLB from buffer */
1067 uint32_t width, height;
1068 if (region->bufferRowLength == 0)
1069 width = region->imageExtent.width;
1070 else
1071 width = region->bufferRowLength;
1072
1073 if (region->bufferImageHeight == 0)
1074 height = region->imageExtent.height;
1075 else
1076 height = region->bufferImageHeight;
1077
1078 /* Handle copy to compressed format using a compatible format */
1079 width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk.format));
1080 height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk.format));
1081
1082 uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1083 1 : image->cpp;
1084 uint32_t buffer_stride = width * cpp;
1085 uint32_t buffer_offset =
1086 buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
1087
1088 uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
1089 false, false, true);
1090
1091 uint32_t image_layer = layer + (image->vk.image_type != VK_IMAGE_TYPE_3D ?
1092 imgrsc->baseArrayLayer : region->imageOffset.z);
1093
1094 emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
1095 buffer_offset, buffer_stride, format);
1096
1097 /* Because we can't do raster loads/stores of Z/S formats we need to
1098 * use a color tile buffer with a compatible RGBA color format instead.
1099 * However, when we are uploading a single aspect to a combined
1100 * depth/stencil image we have the problem that our tile buffer stores don't
1101 * allow us to mask out the other aspect, so we always write all four RGBA
1102 * channels to the image and we end up overwriting that other aspect with
1103 * undefined values. To work around that, we first load the aspect we are
1104 * not copying from the image memory into a proper Z/S tile buffer. Then we
1105 * do our store from the color buffer for the aspect we are copying, and
1106 * after that, we do another store from the Z/S tile buffer to restore the
1107 * other aspect to its original value.
1108 */
1109 if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
1110 if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1111 emit_image_load(job->device, cl, framebuffer, image,
1112 VK_IMAGE_ASPECT_STENCIL_BIT,
1113 image_layer, imgrsc->mipLevel,
1114 false, false);
1115 } else {
1116 assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
1117 emit_image_load(job->device, cl, framebuffer, image,
1118 VK_IMAGE_ASPECT_DEPTH_BIT,
1119 image_layer, imgrsc->mipLevel,
1120 false, false);
1121 }
1122 }
1123
1124 cl_emit(cl, END_OF_LOADS, end);
1125
1126 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1127
1128 /* Store TLB to image */
1129 emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask,
1130 image_layer, imgrsc->mipLevel,
1131 false, true, false);
1132
1133 if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
1134 if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1135 emit_image_store(job->device, cl, framebuffer, image,
1136 VK_IMAGE_ASPECT_STENCIL_BIT,
1137 image_layer, imgrsc->mipLevel,
1138 false, false, false);
1139 } else {
1140 assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
1141 emit_image_store(job->device, cl, framebuffer, image,
1142 VK_IMAGE_ASPECT_DEPTH_BIT,
1143 image_layer, imgrsc->mipLevel,
1144 false, false, false);
1145 }
1146 }
1147
1148 cl_emit(cl, END_OF_TILE_MARKER, end);
1149
1150 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1151
1152 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1153 branch.start = tile_list_start;
1154 branch.end = v3dv_cl_get_address(cl);
1155 }
1156 }
1157
1158 static void
emit_copy_buffer_to_layer(struct v3dv_job *job, struct v3dv_image *image, struct v3dv_buffer *buffer, struct v3dv_meta_framebuffer *framebuffer, uint32_t layer, const VkBufferImageCopy2 *region)1159 emit_copy_buffer_to_layer(struct v3dv_job *job,
1160 struct v3dv_image *image,
1161 struct v3dv_buffer *buffer,
1162 struct v3dv_meta_framebuffer *framebuffer,
1163 uint32_t layer,
1164 const VkBufferImageCopy2 *region)
1165 {
1166 emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
1167 layer, region);
1168 emit_supertile_coordinates(job, framebuffer);
1169 }
1170
1171 void
meta_emit_copy_buffer_to_image_rcl(struct v3dv_job *job, struct v3dv_image *image, struct v3dv_buffer *buffer, struct v3dv_meta_framebuffer *framebuffer, const VkBufferImageCopy2 *region)1172 v3dX(meta_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job,
1173 struct v3dv_image *image,
1174 struct v3dv_buffer *buffer,
1175 struct v3dv_meta_framebuffer *framebuffer,
1176 const VkBufferImageCopy2 *region)
1177 {
1178 struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
1179 v3dv_return_if_oom(NULL, job);
1180
1181 emit_frame_setup(job, 0, NULL);
1182 for (int layer = 0; layer < job->frame_tiling.layers; layer++)
1183 emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
1184 cl_emit(rcl, END_OF_RENDERING, end);
1185 }
1186
1187 /* Figure out a TLB size configuration for a number of pixels to process.
1188 * Beware that we can't "render" more than MAX_DIMxMAX_DIM pixels in a single
1189 * job, if the pixel count is larger than this, the caller might need to split
1190 * the job and call this function multiple times.
1191 */
1192 static void
framebuffer_size_for_pixel_count(uint32_t num_pixels, uint32_t *width, uint32_t *height)1193 framebuffer_size_for_pixel_count(uint32_t num_pixels,
1194 uint32_t *width,
1195 uint32_t *height)
1196 {
1197 assert(num_pixels > 0);
1198
1199 const uint32_t max_dim_pixels = V3D_MAX_IMAGE_DIMENSION;
1200 const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
1201
1202 uint32_t w, h;
1203 if (num_pixels > max_pixels) {
1204 w = max_dim_pixels;
1205 h = max_dim_pixels;
1206 } else {
1207 w = num_pixels;
1208 h = 1;
1209 while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
1210 w >>= 1;
1211 h <<= 1;
1212 }
1213 }
1214 assert(w <= max_dim_pixels && h <= max_dim_pixels);
1215 assert(w * h <= num_pixels);
1216 assert(w > 0 && h > 0);
1217
1218 *width = w;
1219 *height = h;
1220 }
1221
1222 struct v3dv_job *
meta_copy_buffer(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_bo *dst, uint32_t dst_offset, struct v3dv_bo *src, uint32_t src_offset, const VkBufferCopy2 *region)1223 v3dX(meta_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
1224 struct v3dv_bo *dst,
1225 uint32_t dst_offset,
1226 struct v3dv_bo *src,
1227 uint32_t src_offset,
1228 const VkBufferCopy2 *region)
1229 {
1230 const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1231 const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1232
1233 /* Select appropriate pixel format for the copy operation based on the
1234 * size to copy and the alignment of the source and destination offsets.
1235 */
1236 src_offset += region->srcOffset;
1237 dst_offset += region->dstOffset;
1238 uint32_t item_size = 4;
1239 while (item_size > 1 &&
1240 (src_offset % item_size != 0 || dst_offset % item_size != 0)) {
1241 item_size /= 2;
1242 }
1243
1244 while (item_size > 1 && region->size % item_size != 0)
1245 item_size /= 2;
1246
1247 assert(region->size % item_size == 0);
1248 uint32_t num_items = region->size / item_size;
1249 assert(num_items > 0);
1250
1251 uint32_t format;
1252 VkFormat vk_format;
1253 switch (item_size) {
1254 case 4:
1255 format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
1256 vk_format = VK_FORMAT_R8G8B8A8_UINT;
1257 break;
1258 case 2:
1259 format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
1260 vk_format = VK_FORMAT_R8G8_UINT;
1261 break;
1262 default:
1263 format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
1264 vk_format = VK_FORMAT_R8_UINT;
1265 break;
1266 }
1267
1268 struct v3dv_job *job = NULL;
1269 while (num_items > 0) {
1270 job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1271 if (!job)
1272 return NULL;
1273
1274 uint32_t width, height;
1275 framebuffer_size_for_pixel_count(num_items, &width, &height);
1276
1277 v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false);
1278
1279 struct v3dv_meta_framebuffer framebuffer;
1280 v3dX(meta_framebuffer_init)(&framebuffer, vk_format, internal_type,
1281 &job->frame_tiling);
1282
1283 v3dX(job_emit_binning_flush)(job);
1284
1285 v3dX(meta_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset,
1286 &framebuffer, format, item_size);
1287
1288 v3dv_cmd_buffer_finish_job(cmd_buffer);
1289
1290 const uint32_t items_copied = width * height;
1291 const uint32_t bytes_copied = items_copied * item_size;
1292 num_items -= items_copied;
1293 src_offset += bytes_copied;
1294 dst_offset += bytes_copied;
1295 }
1296
1297 return job;
1298 }
1299
1300 void
meta_fill_buffer(struct v3dv_cmd_buffer *cmd_buffer, struct v3dv_bo *bo, uint32_t offset, uint32_t size, uint32_t data)1301 v3dX(meta_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
1302 struct v3dv_bo *bo,
1303 uint32_t offset,
1304 uint32_t size,
1305 uint32_t data)
1306 {
1307 assert(size > 0 && size % 4 == 0);
1308 assert(offset + size <= bo->size);
1309
1310 const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1311 const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1312 uint32_t num_items = size / 4;
1313
1314 while (num_items > 0) {
1315 struct v3dv_job *job =
1316 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1317 if (!job)
1318 return;
1319
1320 uint32_t width, height;
1321 framebuffer_size_for_pixel_count(num_items, &width, &height);
1322
1323 v3dv_job_start_frame(job, width, height, 1, true, 1, internal_bpp, false);
1324
1325 struct v3dv_meta_framebuffer framebuffer;
1326 v3dX(meta_framebuffer_init)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
1327 internal_type, &job->frame_tiling);
1328
1329 v3dX(job_emit_binning_flush)(job);
1330
1331 v3dX(meta_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data);
1332
1333 v3dv_cmd_buffer_finish_job(cmd_buffer);
1334
1335 const uint32_t items_copied = width * height;
1336 const uint32_t bytes_copied = items_copied * 4;
1337 num_items -= items_copied;
1338 offset += bytes_copied;
1339 }
1340 }
1341
1342 void
meta_framebuffer_init(struct v3dv_meta_framebuffer *fb, VkFormat vk_format, uint32_t internal_type, const struct v3dv_frame_tiling *tiling)1343 v3dX(meta_framebuffer_init)(struct v3dv_meta_framebuffer *fb,
1344 VkFormat vk_format,
1345 uint32_t internal_type,
1346 const struct v3dv_frame_tiling *tiling)
1347 {
1348 fb->internal_type = internal_type;
1349
1350 /* Supertile coverage always starts at 0,0 */
1351 uint32_t supertile_w_in_pixels =
1352 tiling->tile_width * tiling->supertile_width;
1353 uint32_t supertile_h_in_pixels =
1354 tiling->tile_height * tiling->supertile_height;
1355
1356 fb->min_x_supertile = 0;
1357 fb->min_y_supertile = 0;
1358 fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
1359 fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
1360
1361 fb->vk_format = vk_format;
1362 fb->format = v3dX(get_format)(vk_format);
1363
1364 fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
1365 if (vk_format_is_depth_or_stencil(vk_format))
1366 fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format);
1367 }
1368