1/* 2 * Copyright 2010 Jerome Glisse <glisse@freedesktop.org> 3 * Copyright 2015-2021 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * on the rights to use, copy, modify, merge, publish, distribute, sub 10 * license, and/or sell copies of the Software, and to permit persons to whom 11 * the Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 21 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 * USE OR OTHER DEALINGS IN THE SOFTWARE. 24 */ 25#include "util/u_memory.h" 26#include "radv_cs.h" 27#include "radv_private.h" 28#include "sid.h" 29 30static bool 31radv_translate_format_to_hw(struct radeon_info *info, VkFormat format, unsigned *hw_fmt, 32 unsigned *hw_type) 33{ 34 const struct util_format_description *desc = vk_format_description(format); 35 *hw_fmt = radv_translate_colorformat(format); 36 37 int firstchan; 38 for (firstchan = 0; firstchan < 4; firstchan++) { 39 if (desc->channel[firstchan].type != UTIL_FORMAT_TYPE_VOID) { 40 break; 41 } 42 } 43 if (firstchan == 4 || desc->channel[firstchan].type == UTIL_FORMAT_TYPE_FLOAT) { 44 *hw_type = V_028C70_NUMBER_FLOAT; 45 } else { 46 *hw_type = V_028C70_NUMBER_UNORM; 47 if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB) 48 *hw_type = V_028C70_NUMBER_SRGB; 49 else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_SIGNED) { 50 if (desc->channel[firstchan].pure_integer) { 51 *hw_type = V_028C70_NUMBER_SINT; 52 } else { 53 assert(desc->channel[firstchan].normalized); 54 *hw_type = V_028C70_NUMBER_SNORM; 55 } 56 } else if (desc->channel[firstchan].type == UTIL_FORMAT_TYPE_UNSIGNED) { 57 if (desc->channel[firstchan].pure_integer) { 58 *hw_type = V_028C70_NUMBER_UINT; 59 } else { 60 assert(desc->channel[firstchan].normalized); 61 *hw_type = V_028C70_NUMBER_UNORM; 62 } 63 } else { 64 return false; 65 } 66 } 67 return true; 68} 69 70static bool 71radv_sdma_v4_v5_copy_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 72 struct radv_buffer *buffer, 73 const VkBufferImageCopy2 *region) 74{ 75 assert(image->plane_count == 1); 76 struct radv_device *device = cmd_buffer->device; 77 unsigned bpp = image->planes[0].surface.bpe; 78 uint64_t dst_address = buffer->bo->va; 79 uint64_t src_address = image->bindings[0].bo->va + image->planes[0].surface.u.gfx9.surf_offset; 80 unsigned src_pitch = image->planes[0].surface.u.gfx9.surf_pitch; 81 unsigned copy_width = DIV_ROUND_UP(image->info.width, image->planes[0].surface.blk_w); 82 unsigned copy_height = DIV_ROUND_UP(image->info.height, image->planes[0].surface.blk_h); 83 bool tmz = false; 84 85 uint32_t ib_pad_dw_mask = cmd_buffer->device->physical_device->rad_info.ib_pad_dw_mask[AMD_IP_SDMA]; 86 87 /* Linear -> linear sub-window copy. */ 88 if (image->planes[0].surface.is_linear) { 89 ASSERTED unsigned cdw_max = 90 radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, align(8, ib_pad_dw_mask + 1)); 91 unsigned bytes = src_pitch * copy_height * bpp; 92 93 if (!(bytes < (1u << 22))) 94 return false; 95 96 radeon_emit(cmd_buffer->cs, 0x00000000); 97 98 src_address += image->planes[0].surface.u.gfx9.offset[0]; 99 100 radeon_emit(cmd_buffer->cs, CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, 101 CIK_SDMA_COPY_SUB_OPCODE_LINEAR, (tmz ? 4 : 0))); 102 radeon_emit(cmd_buffer->cs, bytes); 103 radeon_emit(cmd_buffer->cs, 0); 104 radeon_emit(cmd_buffer->cs, src_address); 105 radeon_emit(cmd_buffer->cs, src_address >> 32); 106 radeon_emit(cmd_buffer->cs, dst_address); 107 radeon_emit(cmd_buffer->cs, dst_address >> 32); 108 109 while (cmd_buffer->cs->cdw & ib_pad_dw_mask) 110 radeon_emit(cmd_buffer->cs, SDMA_NOP_PAD); 111 112 assert(cmd_buffer->cs->cdw <= cdw_max); 113 114 return true; 115 } 116 /* Tiled sub-window copy -> Linear */ 117 else { 118 unsigned tiled_width = copy_width; 119 unsigned tiled_height = copy_height; 120 unsigned linear_pitch = region->bufferRowLength; 121 unsigned linear_slice_pitch = region->bufferRowLength * copy_height; 122 uint64_t tiled_address = src_address; 123 uint64_t linear_address = dst_address; 124 bool is_v5 = device->physical_device->rad_info.gfx_level >= GFX10; 125 /* Only SDMA 5 supports DCC with SDMA */ 126 bool dcc = radv_dcc_enabled(image, 0) && is_v5; 127 128 /* Check if everything fits into the bitfields */ 129 if (!(tiled_width < (1 << 14) && tiled_height < (1 << 14) && linear_pitch < (1 << 14) && 130 linear_slice_pitch < (1 << 28) && copy_width < (1 << 14) && copy_height < (1 << 14))) 131 return false; 132 133 ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 134 align(15 + dcc * 3, ib_pad_dw_mask + 1)); 135 136 radeon_emit(cmd_buffer->cs, 0x00000000); 137 radeon_emit(cmd_buffer->cs, 138 CIK_SDMA_PACKET(CIK_SDMA_OPCODE_COPY, CIK_SDMA_COPY_SUB_OPCODE_TILED_SUB_WINDOW, 139 (tmz ? 4 : 0)) | 140 dcc << 19 | (is_v5 ? 0 : 0 /* tiled->buffer.b.b.last_level */) << 20 | 141 1u << 31); 142 radeon_emit(cmd_buffer->cs, 143 (uint32_t)tiled_address | (image->planes[0].surface.tile_swizzle << 8)); 144 radeon_emit(cmd_buffer->cs, (uint32_t)(tiled_address >> 32)); 145 radeon_emit(cmd_buffer->cs, 0); 146 radeon_emit(cmd_buffer->cs, ((tiled_width - 1) << 16)); 147 radeon_emit(cmd_buffer->cs, (tiled_height - 1)); 148 radeon_emit( 149 cmd_buffer->cs, 150 util_logbase2(bpp) | image->planes[0].surface.u.gfx9.swizzle_mode << 3 | 151 image->planes[0].surface.u.gfx9.resource_type << 9 | 152 (is_v5 ? 0 /* tiled->buffer.b.b.last_level */ : image->planes[0].surface.u.gfx9.epitch) 153 << 16); 154 radeon_emit(cmd_buffer->cs, (uint32_t)linear_address); 155 radeon_emit(cmd_buffer->cs, (uint32_t)(linear_address >> 32)); 156 radeon_emit(cmd_buffer->cs, 0); 157 radeon_emit(cmd_buffer->cs, ((linear_pitch - 1) << 16)); 158 radeon_emit(cmd_buffer->cs, linear_slice_pitch - 1); 159 radeon_emit(cmd_buffer->cs, (copy_width - 1) | ((copy_height - 1) << 16)); 160 radeon_emit(cmd_buffer->cs, 0); 161 162 if (dcc) { 163 unsigned hw_fmt, hw_type; 164 uint64_t md_address = tiled_address + image->planes[0].surface.meta_offset; 165 166 radv_translate_format_to_hw(&device->physical_device->rad_info, image->vk.format, &hw_fmt, 167 &hw_type); 168 169 /* Add metadata */ 170 radeon_emit(cmd_buffer->cs, (uint32_t)md_address); 171 radeon_emit(cmd_buffer->cs, (uint32_t)(md_address >> 32)); 172 radeon_emit(cmd_buffer->cs, 173 hw_fmt | vi_alpha_is_on_msb(device, image->vk.format) << 8 | hw_type << 9 | 174 image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size << 24 | 175 V_028C78_MAX_BLOCK_SIZE_256B << 26 | tmz << 29 | 176 image->planes[0].surface.u.gfx9.color.dcc.pipe_aligned << 31); 177 } 178 179 while (cmd_buffer->cs->cdw & ib_pad_dw_mask) 180 radeon_emit(cmd_buffer->cs, SDMA_NOP_PAD); 181 182 assert(cmd_buffer->cs->cdw <= cdw_max); 183 184 return true; 185 } 186 187 return false; 188} 189 190bool 191radv_sdma_copy_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, 192 struct radv_buffer *buffer, const VkBufferImageCopy2 *region) 193{ 194 assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9); 195 return radv_sdma_v4_v5_copy_image_to_buffer(cmd_buffer, image, buffer, region); 196} 197