1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright 2010 Jerome Glisse <glisse@freedesktop.org>
3bf215546Sopenharmony_ci * Copyright 2015-2021 Advanced Micro Devices, Inc.
4bf215546Sopenharmony_ci * All Rights Reserved.
5bf215546Sopenharmony_ci *
6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
9bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub
10bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom
11bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions:
12bf215546Sopenharmony_ci *
13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
15bf215546Sopenharmony_ci * Software.
16bf215546Sopenharmony_ci *
17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
20bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
21bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
22bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
23bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE.
24bf215546Sopenharmony_ci */
25bf215546Sopenharmony_ci#include "util/u_memory.h"
26bf215546Sopenharmony_ci#include "radv_cs.h"
27bf215546Sopenharmony_ci#include "radv_private.h"
28bf215546Sopenharmony_ci#include "sid.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_cistatic bool
31bf215546Sopenharmony_ciradv_translate_format_to_hw(struct radeon_info *info, VkFormat format, unsigned *hw_fmt,
32bf215546Sopenharmony_ci                            unsigned *hw_type)
33bf215546Sopenharmony_ci{
34bf215546Sopenharmony_ci   const struct util_format_description *desc = vk_format_description(format);
35bf215546Sopenharmony_ci   *hw_fmt = radv_translate_colorformat(format);
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci   int firstchan;
38bf215546Sopenharmony_ci   for (firstchan = 0; firstchan < 4; firstchan++) {
39bf215546Sopenharmony_ci      if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) {
40bf215546Sopenharmony_ci         break;
41bf215546Sopenharmony_ci      }
42bf215546Sopenharmony_ci   }
43bf215546Sopenharmony_ci   if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) {
44bf215546Sopenharmony_ci      *hw_type = V_028C70_NUMBER_FLOAT;
45bf215546Sopenharmony_ci   } else {
46bf215546Sopenharmony_ci      *hw_type = V_028C70_NUMBER_UNORM;
47bf215546Sopenharmony_ci      if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
48bf215546Sopenharmony_ci         *hw_type = V_028C70_NUMBER_SRGB;
49bf215546Sopenharmony_ci      else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) {
50bf215546Sopenharmony_ci         if (desc->channel[firstchan].pure_integer) {
51bf215546Sopenharmony_ci            *hw_type = V_028C70_NUMBER_SINT;
52bf215546Sopenharmony_ci         } else {
53bf215546Sopenharmony_ci            assert(desc->channel[firstchan].normalized);
54bf215546Sopenharmony_ci            *hw_type = V_028C70_NUMBER_SNORM;
55bf215546Sopenharmony_ci         }
56bf215546Sopenharmony_ci      } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) {
57bf215546Sopenharmony_ci         if (desc->channel[firstchan].pure_integer) {
58bf215546Sopenharmony_ci            *hw_type = V_028C70_NUMBER_UINT;
59bf215546Sopenharmony_ci         } else {
60bf215546Sopenharmony_ci            assert(desc->channel[firstchan].normalized);
61bf215546Sopenharmony_ci            *hw_type = V_028C70_NUMBER_UNORM;
62bf215546Sopenharmony_ci         }
63bf215546Sopenharmony_ci      } else {
64bf215546Sopenharmony_ci         return false;
65bf215546Sopenharmony_ci      }
66bf215546Sopenharmony_ci   }
67bf215546Sopenharmony_ci   return true;
68bf215546Sopenharmony_ci}
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_cistatic bool
71bf215546Sopenharmony_ciradv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
72bf215546Sopenharmony_ci                                     struct radv_buffer *buffer,
73bf215546Sopenharmony_ci                                     const VkBufferImageCopy2 *region)
74bf215546Sopenharmony_ci{
75bf215546Sopenharmony_ci   assert(image->plane_count == 1);
76bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
77bf215546Sopenharmony_ci   unsigned bpp = image->planes[0].surface.bpe;
78bf215546Sopenharmony_ci   uint64_t dst_address = buffer->bo->va;
79bf215546Sopenharmony_ci   uint64_t src_address = image->bindings[0].bo->va + image->planes[0].surface.u.gfx9.surf_offset;
80bf215546Sopenharmony_ci   unsigned src_pitch = image->planes[0].surface.u.gfx9.surf_pitch;
81bf215546Sopenharmony_ci   unsigned copy_width = DIV_ROUND_UP(image->info.width, image->planes[0].surface.blk_w);
82bf215546Sopenharmony_ci   unsigned copy_height = DIV_ROUND_UP(image->info.height, image->planes[0].surface.blk_h);
83bf215546Sopenharmony_ci   bool tmz = false;
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   uint32_t ib_pad_dw_mask = cmd_buffer->device->physical_device->rad_info.ib_pad_dw_mask[AMD_IP_SDMA];
86bf215546Sopenharmony_ci
87bf215546Sopenharmony_ci   /* Linear -> linear sub-window copy. */
88bf215546Sopenharmony_ci   if (image->planes[0].surface.is_linear) {
89bf215546Sopenharmony_ci      ASSERTED unsigned cdw_max =
90bf215546Sopenharmony_ci         radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, align(8, ib_pad_dw_mask + 1));
91bf215546Sopenharmony_ci      unsigned bytes = src_pitch * copy_height * bpp;
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci      if (!(bytes < (1u << 22)))
94bf215546Sopenharmony_ci         return false;
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0x00000000);
97bf215546Sopenharmony_ci
98bf215546Sopenharmony_ci      src_address += image->planes[0].surface.u.gfx9.offset[0];
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY,
101bf215546Sopenharmony_ci                                                  CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0)));
102bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, bytes);
103bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0);
104bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, src_address);
105bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, src_address >> 32);
106bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, dst_address);
107bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, dst_address >> 32);
108bf215546Sopenharmony_ci
109bf215546Sopenharmony_ci      while (cmd_buffer->cs->cdw & ib_pad_dw_mask)
110bf215546Sopenharmony_ci         radeon_emit(cmd_buffer->cs, SDMA_NOP_PAD);
111bf215546Sopenharmony_ci
112bf215546Sopenharmony_ci      assert(cmd_buffer->cs->cdw <= cdw_max);
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_ci      return true;
115bf215546Sopenharmony_ci   }
116bf215546Sopenharmony_ci   /* Tiled sub-window copy -> Linear */
117bf215546Sopenharmony_ci   else {
118bf215546Sopenharmony_ci      unsigned tiled_width = copy_width;
119bf215546Sopenharmony_ci      unsigned tiled_height = copy_height;
120bf215546Sopenharmony_ci      unsigned linear_pitch = region->bufferRowLength;
121bf215546Sopenharmony_ci      unsigned linear_slice_pitch = region->bufferRowLength * copy_height;
122bf215546Sopenharmony_ci      uint64_t tiled_address = src_address;
123bf215546Sopenharmony_ci      uint64_t linear_address = dst_address;
124bf215546Sopenharmony_ci      bool is_v5 = device->physical_device->rad_info.gfx_level >= GFX10;
125bf215546Sopenharmony_ci      /* Only SDMA 5 supports DCC with SDMA */
126bf215546Sopenharmony_ci      bool dcc = radv_dcc_enabled(image, 0) && is_v5;
127bf215546Sopenharmony_ci
128bf215546Sopenharmony_ci      /* Check if everything fits into the bitfields */
129bf215546Sopenharmony_ci      if (!(tiled_width < (1 << 14) && tiled_height < (1 << 14) && linear_pitch < (1 << 14) &&
130bf215546Sopenharmony_ci            linear_slice_pitch < (1 << 28) && copy_width < (1 << 14) && copy_height < (1 << 14)))
131bf215546Sopenharmony_ci         return false;
132bf215546Sopenharmony_ci
133bf215546Sopenharmony_ci      ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs,
134bf215546Sopenharmony_ci                                                     align(15 + dcc * 3, ib_pad_dw_mask + 1));
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0x00000000);
137bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs,
138bf215546Sopenharmony_ci                  CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW,
139bf215546Sopenharmony_ci                                  (tmz ? 4 : 0)) |
140bf215546Sopenharmony_ci                     dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 |
141bf215546Sopenharmony_ci                     1u << 31);
142bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs,
143bf215546Sopenharmony_ci                  (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8));
144bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, (uint32_t)(tiled_address >> 32));
145bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0);
146bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ((tiled_width - 1) << 16));
147bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, (tiled_height - 1));
148bf215546Sopenharmony_ci      radeon_emit(
149bf215546Sopenharmony_ci         cmd_buffer->cs,
150bf215546Sopenharmony_ci         util_logbase2(bpp) | image->planes[0].surface.u.gfx9.swizzle_mode << 3 |
151bf215546Sopenharmony_ci            image->planes[0].surface.u.gfx9.resource_type << 9 |
152bf215546Sopenharmony_ci            (is_v5 ? 0 /* tiled->buffer.b.b.last_level */ : image->planes[0].surface.u.gfx9.epitch)
153bf215546Sopenharmony_ci               << 16);
154bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, (uint32_t)linear_address);
155bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, (uint32_t)(linear_address >> 32));
156bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0);
157bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ((linear_pitch - 1) << 16));
158bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, linear_slice_pitch - 1);
159bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, (copy_width - 1) | ((copy_height - 1) << 16));
160bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0);
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci      if (dcc) {
163bf215546Sopenharmony_ci         unsigned hw_fmt, hw_type;
164bf215546Sopenharmony_ci         uint64_t md_address = tiled_address + image->planes[0].surface.meta_offset;
165bf215546Sopenharmony_ci
166bf215546Sopenharmony_ci         radv_translate_format_to_hw(&device->physical_device->rad_info, image->vk.format, &hw_fmt,
167bf215546Sopenharmony_ci                                     &hw_type);
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_ci         /* Add metadata */
170bf215546Sopenharmony_ci         radeon_emit(cmd_buffer->cs, (uint32_t)md_address);
171bf215546Sopenharmony_ci         radeon_emit(cmd_buffer->cs, (uint32_t)(md_address >> 32));
172bf215546Sopenharmony_ci         radeon_emit(cmd_buffer->cs,
173bf215546Sopenharmony_ci                     hw_fmt | vi_alpha_is_on_msb(device, image->vk.format) << 8 | hw_type << 9 |
174bf215546Sopenharmony_ci                        image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size << 24 |
175bf215546Sopenharmony_ci                        V_028C78_MAX_BLOCK_SIZE_256B << 26 | tmz << 29 |
176bf215546Sopenharmony_ci                        image->planes[0].surface.u.gfx9.color.dcc.pipe_aligned << 31);
177bf215546Sopenharmony_ci      }
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci      while (cmd_buffer->cs->cdw & ib_pad_dw_mask)
180bf215546Sopenharmony_ci         radeon_emit(cmd_buffer->cs, SDMA_NOP_PAD);
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci      assert(cmd_buffer->cs->cdw <= cdw_max);
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci      return true;
185bf215546Sopenharmony_ci   }
186bf215546Sopenharmony_ci
187bf215546Sopenharmony_ci   return false;
188bf215546Sopenharmony_ci}
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_cibool
191bf215546Sopenharmony_ciradv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
192bf215546Sopenharmony_ci                     struct radv_buffer *buffer, const VkBufferImageCopy2 *region)
193bf215546Sopenharmony_ci{
194bf215546Sopenharmony_ci   assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9);
195bf215546Sopenharmony_ci   return radv_sdma_v4_v5_copy_image_to_buffer(cmd_buffer, image, buffer, region);
196bf215546Sopenharmony_ci}
197