1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3bf215546Sopenharmony_ci * Copyright 2015-2021 Advanced Micro Devices, Inc. 4bf215546Sopenharmony_ci * All Rights Reserved. 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 9bf215546Sopenharmony_ci * on the rights to use, copy, modify, merge, publish, distribute, sub 10bf215546Sopenharmony_ci * license, and/or sell copies of the Software, and to permit persons to whom 11bf215546Sopenharmony_ci * the Software is furnished to do so, subject to the following conditions: 12bf215546Sopenharmony_ci * 13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 15bf215546Sopenharmony_ci * Software. 16bf215546Sopenharmony_ci * 17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20bf215546Sopenharmony_ci * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21bf215546Sopenharmony_ci * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22bf215546Sopenharmony_ci * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23bf215546Sopenharmony_ci * USE OR OTHER DEALINGS IN THE SOFTWARE. 24bf215546Sopenharmony_ci */ 25bf215546Sopenharmony_ci#include "util/u_memory.h" 26bf215546Sopenharmony_ci#include "radv_cs.h" 27bf215546Sopenharmony_ci#include "radv_private.h" 28bf215546Sopenharmony_ci#include "sid.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_cistatic bool 31bf215546Sopenharmony_ciradv_translate_format_to_hw(struct radeon_info *info, VkFormat format, unsigned *hw_fmt, 32bf215546Sopenharmony_ci unsigned *hw_type) 33bf215546Sopenharmony_ci{ 34bf215546Sopenharmony_ci const struct util_format_description *desc = vk_format_description(format); 35bf215546Sopenharmony_ci *hw_fmt = radv_translate_colorformat(format); 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci int firstchan; 38bf215546Sopenharmony_ci for (firstchan = 0; firstchan < 4; firstchan++) { 39bf215546Sopenharmony_ci if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) { 40bf215546Sopenharmony_ci break; 41bf215546Sopenharmony_ci } 42bf215546Sopenharmony_ci } 43bf215546Sopenharmony_ci if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) { 44bf215546Sopenharmony_ci *hw_type = V_028C70_NUMBER_FLOAT; 45bf215546Sopenharmony_ci } else { 46bf215546Sopenharmony_ci *hw_type = V_028C70_NUMBER_UNORM; 47bf215546Sopenharmony_ci if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 48bf215546Sopenharmony_ci *hw_type = V_028C70_NUMBER_SRGB; 49bf215546Sopenharmony_ci else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) { 50bf215546Sopenharmony_ci if (desc->channel[firstchan].pure_integer) { 51bf215546Sopenharmony_ci *hw_type = V_028C70_NUMBER_SINT; 52bf215546Sopenharmony_ci } else { 53bf215546Sopenharmony_ci assert(desc->channel[firstchan].normalized); 54bf215546Sopenharmony_ci *hw_type = V_028C70_NUMBER_SNORM; 55bf215546Sopenharmony_ci } 56bf215546Sopenharmony_ci } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) { 57bf215546Sopenharmony_ci if (desc->channel[firstchan].pure_integer) { 58bf215546Sopenharmony_ci *hw_type = V_028C70_NUMBER_UINT; 59bf215546Sopenharmony_ci } else { 60bf215546Sopenharmony_ci assert(desc->channel[firstchan].normalized); 61bf215546Sopenharmony_ci *hw_type = V_028C70_NUMBER_UNORM; 62bf215546Sopenharmony_ci } 63bf215546Sopenharmony_ci } else { 64bf215546Sopenharmony_ci return false; 65bf215546Sopenharmony_ci } 66bf215546Sopenharmony_ci } 67bf215546Sopenharmony_ci return true; 68bf215546Sopenharmony_ci} 69bf215546Sopenharmony_ci 70bf215546Sopenharmony_cistatic bool 71bf215546Sopenharmony_ciradv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 72bf215546Sopenharmony_ci struct radv_buffer *buffer, 73bf215546Sopenharmony_ci const VkBufferImageCopy2 *region) 74bf215546Sopenharmony_ci{ 75bf215546Sopenharmony_ci assert(image->plane_count == 1); 76bf215546Sopenharmony_ci struct radv_device *device = cmd_buffer->device; 77bf215546Sopenharmony_ci unsigned bpp = image->planes[0].surface.bpe; 78bf215546Sopenharmony_ci uint64_t dst_address = buffer->bo->va; 79bf215546Sopenharmony_ci uint64_t src_address = image->bindings[0].bo->va + image->planes[0].surface.u.gfx9.surf_offset; 80bf215546Sopenharmony_ci unsigned src_pitch = image->planes[0].surface.u.gfx9.surf_pitch; 81bf215546Sopenharmony_ci unsigned copy_width = DIV_ROUND_UP(image->info.width, image->planes[0].surface.blk_w); 82bf215546Sopenharmony_ci unsigned copy_height = DIV_ROUND_UP(image->info.height, image->planes[0].surface.blk_h); 83bf215546Sopenharmony_ci bool tmz = false; 84bf215546Sopenharmony_ci 85bf215546Sopenharmony_ci uint32_t ib_pad_dw_mask = cmd_buffer->device->physical_device->rad_info.ib_pad_dw_mask[AMD_IP_SDMA]; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci /* Linear -> linear sub-window copy. */ 88bf215546Sopenharmony_ci if (image->planes[0].surface.is_linear) { 89bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = 90bf215546Sopenharmony_ci radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, align(8, ib_pad_dw_mask + 1)); 91bf215546Sopenharmony_ci unsigned bytes = src_pitch * copy_height * bpp; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci if (!(bytes < (1u << 22))) 94bf215546Sopenharmony_ci return false; 95bf215546Sopenharmony_ci 96bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0x00000000); 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci src_address += image->planes[0].surface.u.gfx9.offset[0]; 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, 101bf215546Sopenharmony_ci CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0))); 102bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, bytes); 103bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0); 104bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, src_address); 105bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, src_address >> 32); 106bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, dst_address); 107bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, dst_address >> 32); 108bf215546Sopenharmony_ci 109bf215546Sopenharmony_ci while (cmd_buffer->cs->cdw & ib_pad_dw_mask) 110bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, SDMA_NOP_PAD); 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci return true; 115bf215546Sopenharmony_ci } 116bf215546Sopenharmony_ci /* Tiled sub-window copy -> Linear */ 117bf215546Sopenharmony_ci else { 118bf215546Sopenharmony_ci unsigned tiled_width = copy_width; 119bf215546Sopenharmony_ci unsigned tiled_height = copy_height; 120bf215546Sopenharmony_ci unsigned linear_pitch = region->bufferRowLength; 121bf215546Sopenharmony_ci unsigned linear_slice_pitch = region->bufferRowLength * copy_height; 122bf215546Sopenharmony_ci uint64_t tiled_address = src_address; 123bf215546Sopenharmony_ci uint64_t linear_address = dst_address; 124bf215546Sopenharmony_ci bool is_v5 = device->physical_device->rad_info.gfx_level >= GFX10; 125bf215546Sopenharmony_ci /* Only SDMA 5 supports DCC with SDMA */ 126bf215546Sopenharmony_ci bool dcc = radv_dcc_enabled(image, 0) && is_v5; 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci /* Check if everything fits into the bitfields */ 129bf215546Sopenharmony_ci if (!(tiled_width < (1 << 14) && tiled_height < (1 << 14) && linear_pitch < (1 << 14) && 130bf215546Sopenharmony_ci linear_slice_pitch < (1 << 28) && copy_width < (1 << 14) && copy_height < (1 << 14))) 131bf215546Sopenharmony_ci return false; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 134bf215546Sopenharmony_ci align(15 + dcc * 3, ib_pad_dw_mask + 1)); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0x00000000); 137bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 138bf215546Sopenharmony_ci CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 139bf215546Sopenharmony_ci (tmz ? 4 : 0)) | 140bf215546Sopenharmony_ci dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 | 141bf215546Sopenharmony_ci 1u << 31); 142bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 143bf215546Sopenharmony_ci (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8)); 144bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, (uint32_t)(tiled_address >> 32)); 145bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0); 146bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ((tiled_width - 1) << 16)); 147bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, (tiled_height - 1)); 148bf215546Sopenharmony_ci radeon_emit( 149bf215546Sopenharmony_ci cmd_buffer->cs, 150bf215546Sopenharmony_ci util_logbase2(bpp) | image->planes[0].surface.u.gfx9.swizzle_mode << 3 | 151bf215546Sopenharmony_ci image->planes[0].surface.u.gfx9.resource_type << 9 | 152bf215546Sopenharmony_ci (is_v5 ? 0 /* tiled->buffer.b.b.last_level */ : image->planes[0].surface.u.gfx9.epitch) 153bf215546Sopenharmony_ci << 16); 154bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, (uint32_t)linear_address); 155bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, (uint32_t)(linear_address >> 32)); 156bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0); 157bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, ((linear_pitch - 1) << 16)); 158bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, linear_slice_pitch - 1); 159bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, (copy_width - 1) | ((copy_height - 1) << 16)); 160bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 0); 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_ci if (dcc) { 163bf215546Sopenharmony_ci unsigned hw_fmt, hw_type; 164bf215546Sopenharmony_ci uint64_t md_address = tiled_address + image->planes[0].surface.meta_offset; 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci radv_translate_format_to_hw(&device->physical_device->rad_info, image->vk.format, &hw_fmt, 167bf215546Sopenharmony_ci &hw_type); 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_ci /* Add metadata */ 170bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, (uint32_t)md_address); 171bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, (uint32_t)(md_address >> 32)); 172bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, 173bf215546Sopenharmony_ci hw_fmt | vi_alpha_is_on_msb(device, image->vk.format) << 8 | hw_type << 9 | 174bf215546Sopenharmony_ci image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size << 24 | 175bf215546Sopenharmony_ci V_028C78_MAX_BLOCK_SIZE_256B << 26 | tmz << 29 | 176bf215546Sopenharmony_ci image->planes[0].surface.u.gfx9.color.dcc.pipe_aligned << 31); 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci while (cmd_buffer->cs->cdw & ib_pad_dw_mask) 180bf215546Sopenharmony_ci radeon_emit(cmd_buffer->cs, SDMA_NOP_PAD); 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci assert(cmd_buffer->cs->cdw <= cdw_max); 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci return true; 185bf215546Sopenharmony_ci } 186bf215546Sopenharmony_ci 187bf215546Sopenharmony_ci return false; 188bf215546Sopenharmony_ci} 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_cibool 191bf215546Sopenharmony_ciradv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 192bf215546Sopenharmony_ci struct radv_buffer *buffer, const VkBufferImageCopy2 *region) 193bf215546Sopenharmony_ci{ 194bf215546Sopenharmony_ci assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9); 195bf215546Sopenharmony_ci return radv_sdma_v4_v5_copy_image_to_buffer(cmd_buffer, image, buffer, region); 196bf215546Sopenharmony_ci} 197