1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright 2019-2020 Valve Corporation 3bf215546Sopenharmony_ci * SPDX-License-Identifier: MIT 4bf215546Sopenharmony_ci * 5bf215546Sopenharmony_ci * Authors: 6bf215546Sopenharmony_ci * Jonathan Marek <jonathan@marek.ca> 7bf215546Sopenharmony_ci */ 8bf215546Sopenharmony_ci 9bf215546Sopenharmony_ci#include "tu_clear_blit.h" 10bf215546Sopenharmony_ci 11bf215546Sopenharmony_ci#include "ir3/ir3_nir.h" 12bf215546Sopenharmony_ci 13bf215546Sopenharmony_ci#include "util/format_r11g11b10f.h" 14bf215546Sopenharmony_ci#include "util/format_rgb9e5.h" 15bf215546Sopenharmony_ci#include "util/format_srgb.h" 16bf215546Sopenharmony_ci#include "util/half_float.h" 17bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h" 18bf215546Sopenharmony_ci 19bf215546Sopenharmony_ci#include "tu_cmd_buffer.h" 20bf215546Sopenharmony_ci#include "tu_cs.h" 21bf215546Sopenharmony_ci#include "tu_formats.h" 22bf215546Sopenharmony_ci#include "tu_image.h" 23bf215546Sopenharmony_ci#include "tu_tracepoints.h" 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_cistatic uint32_t 26bf215546Sopenharmony_citu_pack_float32_for_unorm(float val, int bits) 27bf215546Sopenharmony_ci{ 28bf215546Sopenharmony_ci return _mesa_lroundevenf(CLAMP(val, 0.0f, 1.0f) * (float) ((1 << bits) - 1)); 29bf215546Sopenharmony_ci} 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_ci/* r2d_ = BLIT_OP_SCALE operations */ 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_cistatic enum a6xx_2d_ifmt 34bf215546Sopenharmony_ciformat_to_ifmt(enum pipe_format format) 35bf215546Sopenharmony_ci{ 36bf215546Sopenharmony_ci if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT || 37bf215546Sopenharmony_ci format == PIPE_FORMAT_Z24X8_UNORM) 38bf215546Sopenharmony_ci return R2D_UNORM8; 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci /* get_component_bits doesn't work with depth/stencil formats: */ 41bf215546Sopenharmony_ci if (format == PIPE_FORMAT_Z16_UNORM || format == PIPE_FORMAT_Z32_FLOAT) 42bf215546Sopenharmony_ci return R2D_FLOAT32; 43bf215546Sopenharmony_ci if (format == PIPE_FORMAT_S8_UINT) 44bf215546Sopenharmony_ci return R2D_INT8; 45bf215546Sopenharmony_ci if (format == PIPE_FORMAT_A8_UNORM) 46bf215546Sopenharmony_ci return R2D_UNORM8; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci /* use the size of the red channel to find the corresponding "ifmt" */ 49bf215546Sopenharmony_ci bool is_int = util_format_is_pure_integer(format); 50bf215546Sopenharmony_ci switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { 51bf215546Sopenharmony_ci case 4: case 5: case 8: 52bf215546Sopenharmony_ci return is_int ? R2D_INT8 : R2D_UNORM8; 53bf215546Sopenharmony_ci case 10: case 11: 54bf215546Sopenharmony_ci return is_int ? R2D_INT16 : R2D_FLOAT16; 55bf215546Sopenharmony_ci case 16: 56bf215546Sopenharmony_ci if (util_format_is_float(format)) 57bf215546Sopenharmony_ci return R2D_FLOAT16; 58bf215546Sopenharmony_ci return is_int ? R2D_INT16 : R2D_FLOAT32; 59bf215546Sopenharmony_ci case 32: 60bf215546Sopenharmony_ci return is_int ? R2D_INT32 : R2D_FLOAT32; 61bf215546Sopenharmony_ci default: 62bf215546Sopenharmony_ci unreachable("bad format"); 63bf215546Sopenharmony_ci return 0; 64bf215546Sopenharmony_ci } 65bf215546Sopenharmony_ci} 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_cistatic void 68bf215546Sopenharmony_cir2d_coords(struct tu_cs *cs, 69bf215546Sopenharmony_ci const VkOffset2D *dst, 70bf215546Sopenharmony_ci const VkOffset2D *src, 71bf215546Sopenharmony_ci const VkExtent2D *extent) 72bf215546Sopenharmony_ci{ 73bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 74bf215546Sopenharmony_ci A6XX_GRAS_2D_DST_TL(.x = dst->x, .y = dst->y), 75bf215546Sopenharmony_ci A6XX_GRAS_2D_DST_BR(.x = dst->x + extent->width - 1, .y = dst->y + extent->height - 1)); 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci if (!src) 78bf215546Sopenharmony_ci return; 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 81bf215546Sopenharmony_ci A6XX_GRAS_2D_SRC_TL_X(src->x), 82bf215546Sopenharmony_ci A6XX_GRAS_2D_SRC_BR_X(src->x + extent->width - 1), 83bf215546Sopenharmony_ci A6XX_GRAS_2D_SRC_TL_Y(src->y), 84bf215546Sopenharmony_ci A6XX_GRAS_2D_SRC_BR_Y(src->y + extent->height - 1)); 85bf215546Sopenharmony_ci} 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_cistatic void 88bf215546Sopenharmony_cir2d_clear_value(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val) 89bf215546Sopenharmony_ci{ 90bf215546Sopenharmony_ci uint32_t clear_value[4] = {}; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci switch (format) { 93bf215546Sopenharmony_ci case PIPE_FORMAT_Z24_UNORM_S8_UINT: 94bf215546Sopenharmony_ci case PIPE_FORMAT_Z24X8_UNORM: 95bf215546Sopenharmony_ci /* cleared as r8g8b8a8_unorm using special format */ 96bf215546Sopenharmony_ci clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24); 97bf215546Sopenharmony_ci clear_value[1] = clear_value[0] >> 8; 98bf215546Sopenharmony_ci clear_value[2] = clear_value[0] >> 16; 99bf215546Sopenharmony_ci clear_value[3] = val->depthStencil.stencil; 100bf215546Sopenharmony_ci break; 101bf215546Sopenharmony_ci case PIPE_FORMAT_Z16_UNORM: 102bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT: 103bf215546Sopenharmony_ci /* R2D_FLOAT32 */ 104bf215546Sopenharmony_ci clear_value[0] = fui(val->depthStencil.depth); 105bf215546Sopenharmony_ci break; 106bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT: 107bf215546Sopenharmony_ci clear_value[0] = val->depthStencil.stencil; 108bf215546Sopenharmony_ci break; 109bf215546Sopenharmony_ci case PIPE_FORMAT_R9G9B9E5_FLOAT: 110bf215546Sopenharmony_ci /* cleared as UINT32 */ 111bf215546Sopenharmony_ci clear_value[0] = float3_to_rgb9e5(val->color.float32); 112bf215546Sopenharmony_ci break; 113bf215546Sopenharmony_ci default: 114bf215546Sopenharmony_ci assert(!util_format_is_depth_or_stencil(format)); 115bf215546Sopenharmony_ci const struct util_format_description *desc = util_format_description(format); 116bf215546Sopenharmony_ci enum a6xx_2d_ifmt ifmt = format_to_ifmt(format); 117bf215546Sopenharmony_ci 118bf215546Sopenharmony_ci assert(desc->layout == UTIL_FORMAT_LAYOUT_PLAIN || 119bf215546Sopenharmony_ci format == PIPE_FORMAT_R11G11B10_FLOAT); 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci for (unsigned i = 0; i < desc->nr_channels; i++) { 122bf215546Sopenharmony_ci const struct util_format_channel_description *ch = &desc->channel[i]; 123bf215546Sopenharmony_ci if (ifmt == R2D_UNORM8) { 124bf215546Sopenharmony_ci float linear = val->color.float32[i]; 125bf215546Sopenharmony_ci if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && i < 3) 126bf215546Sopenharmony_ci linear = util_format_linear_to_srgb_float(val->color.float32[i]); 127bf215546Sopenharmony_ci 128bf215546Sopenharmony_ci if (ch->type == UTIL_FORMAT_TYPE_SIGNED) 129bf215546Sopenharmony_ci clear_value[i] = _mesa_lroundevenf(CLAMP(linear, -1.0f, 1.0f) * 127.0f); 130bf215546Sopenharmony_ci else 131bf215546Sopenharmony_ci clear_value[i] = tu_pack_float32_for_unorm(linear, 8); 132bf215546Sopenharmony_ci } else if (ifmt == R2D_FLOAT16) { 133bf215546Sopenharmony_ci clear_value[i] = _mesa_float_to_half(val->color.float32[i]); 134bf215546Sopenharmony_ci } else { 135bf215546Sopenharmony_ci assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 || 136bf215546Sopenharmony_ci ifmt == R2D_INT16 || ifmt == R2D_INT8); 137bf215546Sopenharmony_ci clear_value[i] = val->color.uint32[i]; 138bf215546Sopenharmony_ci } 139bf215546Sopenharmony_ci } 140bf215546Sopenharmony_ci break; 141bf215546Sopenharmony_ci } 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4); 144bf215546Sopenharmony_ci tu_cs_emit_array(cs, clear_value, 4); 145bf215546Sopenharmony_ci} 146bf215546Sopenharmony_ci 147bf215546Sopenharmony_cistatic void 148bf215546Sopenharmony_cifixup_src_format(enum pipe_format *src_format, enum pipe_format dst_format, 149bf215546Sopenharmony_ci enum a6xx_format *fmt) 150bf215546Sopenharmony_ci{ 151bf215546Sopenharmony_ci /* When blitting S8 -> D24S8 or vice versa, we have to override S8, which 152bf215546Sopenharmony_ci * is normally R8_UINT for sampling/blitting purposes, to a unorm format. 153bf215546Sopenharmony_ci * We also have to move stencil, which is normally in the .w channel, into 154bf215546Sopenharmony_ci * the right channel. Reintepreting the S8 texture as A8_UNORM solves both 155bf215546Sopenharmony_ci * problems, and avoids using a swap, which seems to sometimes not work 156bf215546Sopenharmony_ci * with a D24S8 source, or a texture swizzle which is only supported with 157bf215546Sopenharmony_ci * the 3d path. Sometimes this blit happens on already-constructed 158bf215546Sopenharmony_ci * fdl6_view's, e.g. for sysmem resolves, so this has to happen as a fixup. 159bf215546Sopenharmony_ci */ 160bf215546Sopenharmony_ci if (*src_format == PIPE_FORMAT_S8_UINT && 161bf215546Sopenharmony_ci (dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || 162bf215546Sopenharmony_ci dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)) { 163bf215546Sopenharmony_ci *fmt = FMT6_A8_UNORM; 164bf215546Sopenharmony_ci *src_format = PIPE_FORMAT_A8_UNORM; 165bf215546Sopenharmony_ci } 166bf215546Sopenharmony_ci} 167bf215546Sopenharmony_ci 168bf215546Sopenharmony_cistatic void 169bf215546Sopenharmony_cifixup_dst_format(enum pipe_format src_format, enum pipe_format *dst_format, 170bf215546Sopenharmony_ci enum a6xx_format *fmt) 171bf215546Sopenharmony_ci{ 172bf215546Sopenharmony_ci if (*dst_format == PIPE_FORMAT_S8_UINT && 173bf215546Sopenharmony_ci (src_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || 174bf215546Sopenharmony_ci src_format == PIPE_FORMAT_Z24_UNORM_S8_UINT_AS_R8G8B8A8)) { 175bf215546Sopenharmony_ci *dst_format = PIPE_FORMAT_A8_UNORM; 176bf215546Sopenharmony_ci *fmt = FMT6_A8_UNORM; 177bf215546Sopenharmony_ci } 178bf215546Sopenharmony_ci} 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_cistatic void 181bf215546Sopenharmony_cir2d_src(struct tu_cmd_buffer *cmd, 182bf215546Sopenharmony_ci struct tu_cs *cs, 183bf215546Sopenharmony_ci const struct fdl6_view *iview, 184bf215546Sopenharmony_ci uint32_t layer, 185bf215546Sopenharmony_ci VkFilter filter, 186bf215546Sopenharmony_ci enum pipe_format dst_format) 187bf215546Sopenharmony_ci{ 188bf215546Sopenharmony_ci uint32_t src_info = iview->SP_PS_2D_SRC_INFO; 189bf215546Sopenharmony_ci if (filter != VK_FILTER_NEAREST) 190bf215546Sopenharmony_ci src_info |= A6XX_SP_PS_2D_SRC_INFO_FILTER; 191bf215546Sopenharmony_ci 192bf215546Sopenharmony_ci enum a6xx_format fmt = (src_info & A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK); 193bf215546Sopenharmony_ci enum pipe_format src_format = iview->format; 194bf215546Sopenharmony_ci fixup_src_format(&src_format, dst_format, &fmt); 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci src_info = 197bf215546Sopenharmony_ci (src_info & ~A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT__MASK) | 198bf215546Sopenharmony_ci A6XX_SP_PS_2D_SRC_INFO_COLOR_FORMAT(fmt); 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5); 201bf215546Sopenharmony_ci tu_cs_emit(cs, src_info); 202bf215546Sopenharmony_ci tu_cs_emit(cs, iview->SP_PS_2D_SRC_SIZE); 203bf215546Sopenharmony_ci tu_cs_image_ref_2d(cs, iview, layer, true); 204bf215546Sopenharmony_ci 205bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS, 3); 206bf215546Sopenharmony_ci tu_cs_image_flag_ref(cs, iview, layer); 207bf215546Sopenharmony_ci} 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_cistatic void 210bf215546Sopenharmony_cir2d_src_depth(struct tu_cmd_buffer *cmd, 211bf215546Sopenharmony_ci struct tu_cs *cs, 212bf215546Sopenharmony_ci const struct tu_image_view *iview, 213bf215546Sopenharmony_ci uint32_t layer, 214bf215546Sopenharmony_ci VkFilter filter) 215bf215546Sopenharmony_ci{ 216bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5); 217bf215546Sopenharmony_ci tu_cs_emit(cs, tu_image_view_depth(iview, SP_PS_2D_SRC_INFO)); 218bf215546Sopenharmony_ci tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE); 219bf215546Sopenharmony_ci tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer); 220bf215546Sopenharmony_ci /* SP_PS_2D_SRC_PITCH has shifted pitch field */ 221bf215546Sopenharmony_ci tu_cs_emit(cs, iview->depth_PITCH << 9); 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS, 3); 224bf215546Sopenharmony_ci tu_cs_image_flag_ref(cs, &iview->view, layer); 225bf215546Sopenharmony_ci} 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_cistatic void 228bf215546Sopenharmony_cir2d_src_stencil(struct tu_cmd_buffer *cmd, 229bf215546Sopenharmony_ci struct tu_cs *cs, 230bf215546Sopenharmony_ci const struct tu_image_view *iview, 231bf215546Sopenharmony_ci uint32_t layer, 232bf215546Sopenharmony_ci VkFilter filter) 233bf215546Sopenharmony_ci{ 234bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5); 235bf215546Sopenharmony_ci tu_cs_emit(cs, tu_image_view_stencil(iview, SP_PS_2D_SRC_INFO) & ~A6XX_SP_PS_2D_SRC_INFO_FLAGS); 236bf215546Sopenharmony_ci tu_cs_emit(cs, iview->view.SP_PS_2D_SRC_SIZE); 237bf215546Sopenharmony_ci tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer); 238bf215546Sopenharmony_ci /* SP_PS_2D_SRC_PITCH has shifted pitch field */ 239bf215546Sopenharmony_ci tu_cs_emit(cs, iview->stencil_PITCH << 9); 240bf215546Sopenharmony_ci} 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_cistatic void 243bf215546Sopenharmony_cir2d_src_buffer(struct tu_cmd_buffer *cmd, 244bf215546Sopenharmony_ci struct tu_cs *cs, 245bf215546Sopenharmony_ci enum pipe_format format, 246bf215546Sopenharmony_ci uint64_t va, uint32_t pitch, 247bf215546Sopenharmony_ci uint32_t width, uint32_t height, 248bf215546Sopenharmony_ci enum pipe_format dst_format) 249bf215546Sopenharmony_ci{ 250bf215546Sopenharmony_ci struct tu_native_format fmt = tu6_format_texture(format, TILE6_LINEAR); 251bf215546Sopenharmony_ci enum a6xx_format color_format = fmt.fmt; 252bf215546Sopenharmony_ci fixup_src_format(&format, dst_format, &color_format); 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 255bf215546Sopenharmony_ci A6XX_SP_PS_2D_SRC_INFO( 256bf215546Sopenharmony_ci .color_format = color_format, 257bf215546Sopenharmony_ci .color_swap = fmt.swap, 258bf215546Sopenharmony_ci .srgb = util_format_is_srgb(format), 259bf215546Sopenharmony_ci .unk20 = 1, 260bf215546Sopenharmony_ci .unk22 = 1), 261bf215546Sopenharmony_ci A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height), 262bf215546Sopenharmony_ci A6XX_SP_PS_2D_SRC(.qword = va), 263bf215546Sopenharmony_ci A6XX_SP_PS_2D_SRC_PITCH(.pitch = pitch)); 264bf215546Sopenharmony_ci} 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_cistatic void 267bf215546Sopenharmony_cir2d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, 268bf215546Sopenharmony_ci enum pipe_format src_format) 269bf215546Sopenharmony_ci{ 270bf215546Sopenharmony_ci uint32_t dst_info = iview->RB_2D_DST_INFO; 271bf215546Sopenharmony_ci enum a6xx_format fmt = dst_info & A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK; 272bf215546Sopenharmony_ci enum pipe_format dst_format = iview->format; 273bf215546Sopenharmony_ci fixup_dst_format(src_format, &dst_format, &fmt); 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci dst_info = 276bf215546Sopenharmony_ci (dst_info & ~A6XX_RB_2D_DST_INFO_COLOR_FORMAT__MASK) | fmt; 277bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); 278bf215546Sopenharmony_ci tu_cs_emit(cs, dst_info); 279bf215546Sopenharmony_ci tu_cs_image_ref_2d(cs, iview, layer, false); 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS, 3); 282bf215546Sopenharmony_ci tu_cs_image_flag_ref(cs, iview, layer); 283bf215546Sopenharmony_ci} 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_cistatic void 286bf215546Sopenharmony_cir2d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) 287bf215546Sopenharmony_ci{ 288bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); 289bf215546Sopenharmony_ci tu_cs_emit(cs, tu_image_view_depth(iview, RB_2D_DST_INFO)); 290bf215546Sopenharmony_ci tu_cs_emit_qw(cs, iview->depth_base_addr + iview->depth_layer_size * layer); 291bf215546Sopenharmony_ci tu_cs_emit(cs, iview->depth_PITCH); 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS, 3); 294bf215546Sopenharmony_ci tu_cs_image_flag_ref(cs, &iview->view, layer); 295bf215546Sopenharmony_ci} 296bf215546Sopenharmony_ci 297bf215546Sopenharmony_cistatic void 298bf215546Sopenharmony_cir2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) 299bf215546Sopenharmony_ci{ 300bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4); 301bf215546Sopenharmony_ci tu_cs_emit(cs, tu_image_view_stencil(iview, RB_2D_DST_INFO) & ~A6XX_RB_2D_DST_INFO_FLAGS); 302bf215546Sopenharmony_ci tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer); 303bf215546Sopenharmony_ci tu_cs_emit(cs, iview->stencil_PITCH); 304bf215546Sopenharmony_ci} 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_cistatic void 307bf215546Sopenharmony_cir2d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch, 308bf215546Sopenharmony_ci enum pipe_format src_format) 309bf215546Sopenharmony_ci{ 310bf215546Sopenharmony_ci struct tu_native_format fmt = tu6_format_color(format, TILE6_LINEAR); 311bf215546Sopenharmony_ci enum a6xx_format color_fmt = fmt.fmt; 312bf215546Sopenharmony_ci fixup_dst_format(src_format, &format, &color_fmt); 313bf215546Sopenharmony_ci fmt.fmt = color_fmt; 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 316bf215546Sopenharmony_ci A6XX_RB_2D_DST_INFO( 317bf215546Sopenharmony_ci .color_format = fmt.fmt, 318bf215546Sopenharmony_ci .color_swap = fmt.swap, 319bf215546Sopenharmony_ci .srgb = util_format_is_srgb(format)), 320bf215546Sopenharmony_ci A6XX_RB_2D_DST(.qword = va), 321bf215546Sopenharmony_ci A6XX_RB_2D_DST_PITCH(pitch)); 322bf215546Sopenharmony_ci} 323bf215546Sopenharmony_ci 324bf215546Sopenharmony_cistatic void 325bf215546Sopenharmony_cir2d_setup_common(struct tu_cmd_buffer *cmd, 326bf215546Sopenharmony_ci struct tu_cs *cs, 327bf215546Sopenharmony_ci enum pipe_format src_format, 328bf215546Sopenharmony_ci enum pipe_format dst_format, 329bf215546Sopenharmony_ci VkImageAspectFlags aspect_mask, 330bf215546Sopenharmony_ci unsigned blit_param, 331bf215546Sopenharmony_ci bool clear, 332bf215546Sopenharmony_ci bool ubwc, 333bf215546Sopenharmony_ci bool scissor) 334bf215546Sopenharmony_ci{ 335bf215546Sopenharmony_ci enum a6xx_format fmt = tu6_base_format(dst_format); 336bf215546Sopenharmony_ci fixup_dst_format(src_format, &dst_format, &fmt); 337bf215546Sopenharmony_ci enum a6xx_2d_ifmt ifmt = format_to_ifmt(dst_format); 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci uint32_t unknown_8c01 = 0; 340bf215546Sopenharmony_ci 341bf215546Sopenharmony_ci if ((dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || 342bf215546Sopenharmony_ci dst_format == PIPE_FORMAT_Z24X8_UNORM) && ubwc) { 343bf215546Sopenharmony_ci fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; 344bf215546Sopenharmony_ci } 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci /* note: the only format with partial clearing is D24S8 */ 347bf215546Sopenharmony_ci if (dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { 348bf215546Sopenharmony_ci /* preserve stencil channel */ 349bf215546Sopenharmony_ci if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) 350bf215546Sopenharmony_ci unknown_8c01 = 0x08000041; 351bf215546Sopenharmony_ci /* preserve depth channels */ 352bf215546Sopenharmony_ci if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) 353bf215546Sopenharmony_ci unknown_8c01 = 0x00084001; 354bf215546Sopenharmony_ci } 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_UNKNOWN_8C01, 1); 357bf215546Sopenharmony_ci tu_cs_emit(cs, unknown_8c01); 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL( 360bf215546Sopenharmony_ci .scissor = scissor, 361bf215546Sopenharmony_ci .rotate = blit_param, 362bf215546Sopenharmony_ci .solid_color = clear, 363bf215546Sopenharmony_ci .d24s8 = fmt == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear, 364bf215546Sopenharmony_ci .color_format = fmt, 365bf215546Sopenharmony_ci .mask = 0xf, 366bf215546Sopenharmony_ci .ifmt = util_format_is_srgb(dst_format) ? R2D_UNORM8_SRGB : ifmt, 367bf215546Sopenharmony_ci ).value; 368bf215546Sopenharmony_ci 369bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1); 370bf215546Sopenharmony_ci tu_cs_emit(cs, blit_cntl); 371bf215546Sopenharmony_ci 372bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1); 373bf215546Sopenharmony_ci tu_cs_emit(cs, blit_cntl); 374bf215546Sopenharmony_ci 375bf215546Sopenharmony_ci if (fmt == FMT6_10_10_10_2_UNORM_DEST) 376bf215546Sopenharmony_ci fmt = FMT6_16_16_16_16_FLOAT; 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_2D_DST_FORMAT( 379bf215546Sopenharmony_ci .sint = util_format_is_pure_sint(dst_format), 380bf215546Sopenharmony_ci .uint = util_format_is_pure_uint(dst_format), 381bf215546Sopenharmony_ci .color_format = fmt, 382bf215546Sopenharmony_ci .srgb = util_format_is_srgb(dst_format), 383bf215546Sopenharmony_ci .mask = 0xf)); 384bf215546Sopenharmony_ci} 385bf215546Sopenharmony_ci 386bf215546Sopenharmony_cistatic void 387bf215546Sopenharmony_cir2d_setup(struct tu_cmd_buffer *cmd, 388bf215546Sopenharmony_ci struct tu_cs *cs, 389bf215546Sopenharmony_ci enum pipe_format src_format, 390bf215546Sopenharmony_ci enum pipe_format dst_format, 391bf215546Sopenharmony_ci VkImageAspectFlags aspect_mask, 392bf215546Sopenharmony_ci unsigned blit_param, 393bf215546Sopenharmony_ci bool clear, 394bf215546Sopenharmony_ci bool ubwc, 395bf215546Sopenharmony_ci VkSampleCountFlagBits samples) 396bf215546Sopenharmony_ci{ 397bf215546Sopenharmony_ci assert(samples == VK_SAMPLE_COUNT_1_BIT); 398bf215546Sopenharmony_ci 399bf215546Sopenharmony_ci if (!cmd->state.pass) { 400bf215546Sopenharmony_ci tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); 401bf215546Sopenharmony_ci } 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci r2d_setup_common(cmd, cs, src_format, dst_format, aspect_mask, blit_param, clear, ubwc, false); 404bf215546Sopenharmony_ci} 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_cistatic void 407bf215546Sopenharmony_cir2d_teardown(struct tu_cmd_buffer *cmd, 408bf215546Sopenharmony_ci struct tu_cs *cs) 409bf215546Sopenharmony_ci{ 410bf215546Sopenharmony_ci /* nothing to do here */ 411bf215546Sopenharmony_ci} 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_cistatic void 414bf215546Sopenharmony_cir2d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 415bf215546Sopenharmony_ci{ 416bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_BLIT, 1); 417bf215546Sopenharmony_ci tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); 418bf215546Sopenharmony_ci} 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci/* r3d_ = shader path operations */ 421bf215546Sopenharmony_ci 422bf215546Sopenharmony_cistatic nir_ssa_def * 423bf215546Sopenharmony_ciload_const(nir_builder *b, unsigned base, unsigned components) 424bf215546Sopenharmony_ci{ 425bf215546Sopenharmony_ci return nir_load_uniform(b, components, 32, nir_imm_int(b, 0), 426bf215546Sopenharmony_ci .base = base); 427bf215546Sopenharmony_ci} 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_cistatic nir_shader * 430bf215546Sopenharmony_cibuild_blit_vs_shader(void) 431bf215546Sopenharmony_ci{ 432bf215546Sopenharmony_ci nir_builder _b = 433bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "blit vs"); 434bf215546Sopenharmony_ci nir_builder *b = &_b; 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci nir_variable *out_pos = 437bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), 438bf215546Sopenharmony_ci "gl_Position"); 439bf215546Sopenharmony_ci out_pos->data.location = VARYING_SLOT_POS; 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci nir_ssa_def *vert0_pos = load_const(b, 0, 2); 442bf215546Sopenharmony_ci nir_ssa_def *vert1_pos = load_const(b, 4, 2); 443bf215546Sopenharmony_ci nir_ssa_def *vertex = nir_load_vertex_id(b); 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_ci nir_ssa_def *pos = nir_bcsel(b, nir_i2b1(b, vertex), vert1_pos, vert0_pos); 446bf215546Sopenharmony_ci pos = nir_vec4(b, nir_channel(b, pos, 0), 447bf215546Sopenharmony_ci nir_channel(b, pos, 1), 448bf215546Sopenharmony_ci nir_imm_float(b, 0.0), 449bf215546Sopenharmony_ci nir_imm_float(b, 1.0)); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci nir_store_var(b, out_pos, pos, 0xf); 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci nir_variable *out_coords = 454bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_out, glsl_vec_type(3), 455bf215546Sopenharmony_ci "coords"); 456bf215546Sopenharmony_ci out_coords->data.location = VARYING_SLOT_VAR0; 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_ci nir_ssa_def *vert0_coords = load_const(b, 2, 2); 459bf215546Sopenharmony_ci nir_ssa_def *vert1_coords = load_const(b, 6, 2); 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci /* Only used with "z scale" blit path which uses a 3d texture */ 462bf215546Sopenharmony_ci nir_ssa_def *z_coord = load_const(b, 8, 1); 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_ci nir_ssa_def *coords = nir_bcsel(b, nir_i2b1(b, vertex), vert1_coords, vert0_coords); 465bf215546Sopenharmony_ci coords = nir_vec3(b, nir_channel(b, coords, 0), nir_channel(b, coords, 1), 466bf215546Sopenharmony_ci z_coord); 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci nir_store_var(b, out_coords, coords, 0x7); 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci return b->shader; 471bf215546Sopenharmony_ci} 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_cistatic nir_shader * 474bf215546Sopenharmony_cibuild_clear_vs_shader(void) 475bf215546Sopenharmony_ci{ 476bf215546Sopenharmony_ci nir_builder _b = 477bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "blit vs"); 478bf215546Sopenharmony_ci nir_builder *b = &_b; 479bf215546Sopenharmony_ci 480bf215546Sopenharmony_ci nir_variable *out_pos = 481bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), 482bf215546Sopenharmony_ci "gl_Position"); 483bf215546Sopenharmony_ci out_pos->data.location = VARYING_SLOT_POS; 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci nir_ssa_def *vert0_pos = load_const(b, 0, 2); 486bf215546Sopenharmony_ci nir_ssa_def *vert1_pos = load_const(b, 4, 2); 487bf215546Sopenharmony_ci /* c0.z is used to clear depth */ 488bf215546Sopenharmony_ci nir_ssa_def *depth = load_const(b, 2, 1); 489bf215546Sopenharmony_ci nir_ssa_def *vertex = nir_load_vertex_id(b); 490bf215546Sopenharmony_ci 491bf215546Sopenharmony_ci nir_ssa_def *pos = nir_bcsel(b, nir_i2b1(b, vertex), vert1_pos, vert0_pos); 492bf215546Sopenharmony_ci pos = nir_vec4(b, nir_channel(b, pos, 0), 493bf215546Sopenharmony_ci nir_channel(b, pos, 1), 494bf215546Sopenharmony_ci depth, nir_imm_float(b, 1.0)); 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci nir_store_var(b, out_pos, pos, 0xf); 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci nir_variable *out_layer = 499bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_out, glsl_uint_type(), 500bf215546Sopenharmony_ci "gl_Layer"); 501bf215546Sopenharmony_ci out_layer->data.location = VARYING_SLOT_LAYER; 502bf215546Sopenharmony_ci nir_ssa_def *layer = load_const(b, 3, 1); 503bf215546Sopenharmony_ci nir_store_var(b, out_layer, layer, 1); 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci return b->shader; 506bf215546Sopenharmony_ci} 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_cistatic nir_shader * 509bf215546Sopenharmony_cibuild_blit_fs_shader(bool zscale) 510bf215546Sopenharmony_ci{ 511bf215546Sopenharmony_ci nir_builder _b = 512bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, 513bf215546Sopenharmony_ci zscale ? "zscale blit fs" : "blit fs"); 514bf215546Sopenharmony_ci nir_builder *b = &_b; 515bf215546Sopenharmony_ci 516bf215546Sopenharmony_ci nir_variable *out_color = 517bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), 518bf215546Sopenharmony_ci "color0"); 519bf215546Sopenharmony_ci out_color->data.location = FRAG_RESULT_DATA0; 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci unsigned coord_components = zscale ? 3 : 2; 522bf215546Sopenharmony_ci nir_variable *in_coords = 523bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_in, 524bf215546Sopenharmony_ci glsl_vec_type(coord_components), 525bf215546Sopenharmony_ci "coords"); 526bf215546Sopenharmony_ci in_coords->data.location = VARYING_SLOT_VAR0; 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci nir_tex_instr *tex = nir_tex_instr_create(b->shader, 1); 529bf215546Sopenharmony_ci /* Note: since we're just copying data, we rely on the HW ignoring the 530bf215546Sopenharmony_ci * dest_type. 531bf215546Sopenharmony_ci */ 532bf215546Sopenharmony_ci tex->dest_type = nir_type_int32; 533bf215546Sopenharmony_ci tex->is_array = false; 534bf215546Sopenharmony_ci tex->is_shadow = false; 535bf215546Sopenharmony_ci tex->sampler_dim = zscale ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D; 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci tex->texture_index = 0; 538bf215546Sopenharmony_ci tex->sampler_index = 0; 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci b->shader->info.num_textures = 1; 541bf215546Sopenharmony_ci BITSET_SET(b->shader->info.textures_used, 0); 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci tex->src[0].src_type = nir_tex_src_coord; 544bf215546Sopenharmony_ci tex->src[0].src = nir_src_for_ssa(nir_load_var(b, in_coords)); 545bf215546Sopenharmony_ci tex->coord_components = coord_components; 546bf215546Sopenharmony_ci 547bf215546Sopenharmony_ci nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); 548bf215546Sopenharmony_ci nir_builder_instr_insert(b, &tex->instr); 549bf215546Sopenharmony_ci 550bf215546Sopenharmony_ci nir_store_var(b, out_color, &tex->dest.ssa, 0xf); 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci return b->shader; 553bf215546Sopenharmony_ci} 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci/* We can only read multisample textures via txf_ms, so we need a separate 556bf215546Sopenharmony_ci * variant for them. 557bf215546Sopenharmony_ci */ 558bf215546Sopenharmony_cistatic nir_shader * 559bf215546Sopenharmony_cibuild_ms_copy_fs_shader(void) 560bf215546Sopenharmony_ci{ 561bf215546Sopenharmony_ci nir_builder _b = 562bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, 563bf215546Sopenharmony_ci "multisample copy fs"); 564bf215546Sopenharmony_ci nir_builder *b = &_b; 565bf215546Sopenharmony_ci 566bf215546Sopenharmony_ci nir_variable *out_color = 567bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), 568bf215546Sopenharmony_ci "color0"); 569bf215546Sopenharmony_ci out_color->data.location = FRAG_RESULT_DATA0; 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci nir_variable *in_coords = 572bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_in, 573bf215546Sopenharmony_ci glsl_vec_type(2), 574bf215546Sopenharmony_ci "coords"); 575bf215546Sopenharmony_ci in_coords->data.location = VARYING_SLOT_VAR0; 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci nir_tex_instr *tex = nir_tex_instr_create(b->shader, 2); 578bf215546Sopenharmony_ci 579bf215546Sopenharmony_ci tex->op = nir_texop_txf_ms; 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_ci /* Note: since we're just copying data, we rely on the HW ignoring the 582bf215546Sopenharmony_ci * dest_type. 583bf215546Sopenharmony_ci */ 584bf215546Sopenharmony_ci tex->dest_type = nir_type_int32; 585bf215546Sopenharmony_ci tex->is_array = false; 586bf215546Sopenharmony_ci tex->is_shadow = false; 587bf215546Sopenharmony_ci tex->sampler_dim = GLSL_SAMPLER_DIM_MS; 588bf215546Sopenharmony_ci 589bf215546Sopenharmony_ci tex->texture_index = 0; 590bf215546Sopenharmony_ci tex->sampler_index = 0; 591bf215546Sopenharmony_ci 592bf215546Sopenharmony_ci b->shader->info.num_textures = 1; 593bf215546Sopenharmony_ci BITSET_SET(b->shader->info.textures_used, 0); 594bf215546Sopenharmony_ci BITSET_SET(b->shader->info.textures_used_by_txf, 0); 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci nir_ssa_def *coord = nir_f2i32(b, nir_load_var(b, in_coords)); 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci tex->src[0].src_type = nir_tex_src_coord; 599bf215546Sopenharmony_ci tex->src[0].src = nir_src_for_ssa(coord); 600bf215546Sopenharmony_ci tex->coord_components = 2; 601bf215546Sopenharmony_ci 602bf215546Sopenharmony_ci tex->src[1].src_type = nir_tex_src_ms_index; 603bf215546Sopenharmony_ci tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(b)); 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, NULL); 606bf215546Sopenharmony_ci nir_builder_instr_insert(b, &tex->instr); 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci nir_store_var(b, out_color, &tex->dest.ssa, 0xf); 609bf215546Sopenharmony_ci 610bf215546Sopenharmony_ci return b->shader; 611bf215546Sopenharmony_ci} 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_cistatic nir_shader * 614bf215546Sopenharmony_cibuild_clear_fs_shader(unsigned mrts) 615bf215546Sopenharmony_ci{ 616bf215546Sopenharmony_ci nir_builder _b = 617bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, 618bf215546Sopenharmony_ci "mrt%u clear fs", mrts); 619bf215546Sopenharmony_ci nir_builder *b = &_b; 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci for (unsigned i = 0; i < mrts; i++) { 622bf215546Sopenharmony_ci nir_variable *out_color = 623bf215546Sopenharmony_ci nir_variable_create(b->shader, nir_var_shader_out, glsl_vec4_type(), 624bf215546Sopenharmony_ci "color"); 625bf215546Sopenharmony_ci out_color->data.location = FRAG_RESULT_DATA0 + i; 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci nir_ssa_def *color = load_const(b, 4 * i, 4); 628bf215546Sopenharmony_ci nir_store_var(b, out_color, color, 0xf); 629bf215546Sopenharmony_ci } 630bf215546Sopenharmony_ci 631bf215546Sopenharmony_ci return b->shader; 632bf215546Sopenharmony_ci} 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_cistatic void 635bf215546Sopenharmony_cicompile_shader(struct tu_device *dev, struct nir_shader *nir, 636bf215546Sopenharmony_ci unsigned consts, unsigned *offset, enum global_shader idx) 637bf215546Sopenharmony_ci{ 638bf215546Sopenharmony_ci nir->options = ir3_get_compiler_options(dev->compiler); 639bf215546Sopenharmony_ci 640bf215546Sopenharmony_ci nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage); 641bf215546Sopenharmony_ci nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage); 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci ir3_finalize_nir(dev->compiler, nir); 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci struct ir3_shader *sh = 646bf215546Sopenharmony_ci ir3_shader_from_nir(dev->compiler, nir, &(struct ir3_shader_options) { 647bf215546Sopenharmony_ci .api_wavesize = IR3_SINGLE_OR_DOUBLE, 648bf215546Sopenharmony_ci .real_wavesize = IR3_SINGLE_OR_DOUBLE, 649bf215546Sopenharmony_ci .reserved_user_consts = align(consts, 4), 650bf215546Sopenharmony_ci }, NULL); 651bf215546Sopenharmony_ci 652bf215546Sopenharmony_ci struct ir3_shader_key key = {}; 653bf215546Sopenharmony_ci bool created; 654bf215546Sopenharmony_ci struct ir3_shader_variant *so = 655bf215546Sopenharmony_ci ir3_shader_get_variant(sh, &key, false, false, &created); 656bf215546Sopenharmony_ci 657bf215546Sopenharmony_ci struct tu6_global *global = dev->global_bo->map; 658bf215546Sopenharmony_ci 659bf215546Sopenharmony_ci assert(*offset + so->info.sizedwords <= ARRAY_SIZE(global->shaders)); 660bf215546Sopenharmony_ci dev->global_shaders[idx] = sh; 661bf215546Sopenharmony_ci dev->global_shader_variants[idx] = so; 662bf215546Sopenharmony_ci memcpy(&global->shaders[*offset], so->bin, 663bf215546Sopenharmony_ci sizeof(uint32_t) * so->info.sizedwords); 664bf215546Sopenharmony_ci dev->global_shader_va[idx] = dev->global_bo->iova + 665bf215546Sopenharmony_ci gb_offset(shaders[*offset]); 666bf215546Sopenharmony_ci *offset += align(so->info.sizedwords, 32); 667bf215546Sopenharmony_ci} 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_civoid 670bf215546Sopenharmony_citu_init_clear_blit_shaders(struct tu_device *dev) 671bf215546Sopenharmony_ci{ 672bf215546Sopenharmony_ci unsigned offset = 0; 673bf215546Sopenharmony_ci compile_shader(dev, build_blit_vs_shader(), 3, &offset, GLOBAL_SH_VS_BLIT); 674bf215546Sopenharmony_ci compile_shader(dev, build_clear_vs_shader(), 2, &offset, GLOBAL_SH_VS_CLEAR); 675bf215546Sopenharmony_ci compile_shader(dev, build_blit_fs_shader(false), 0, &offset, GLOBAL_SH_FS_BLIT); 676bf215546Sopenharmony_ci compile_shader(dev, build_blit_fs_shader(true), 0, &offset, GLOBAL_SH_FS_BLIT_ZSCALE); 677bf215546Sopenharmony_ci compile_shader(dev, build_ms_copy_fs_shader(), 0, &offset, GLOBAL_SH_FS_COPY_MS); 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci for (uint32_t num_rts = 0; num_rts <= MAX_RTS; num_rts++) { 680bf215546Sopenharmony_ci compile_shader(dev, build_clear_fs_shader(num_rts), num_rts, &offset, 681bf215546Sopenharmony_ci GLOBAL_SH_FS_CLEAR0 + num_rts); 682bf215546Sopenharmony_ci } 683bf215546Sopenharmony_ci} 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_civoid 686bf215546Sopenharmony_citu_destroy_clear_blit_shaders(struct tu_device *dev) 687bf215546Sopenharmony_ci{ 688bf215546Sopenharmony_ci for (unsigned i = 0; i < GLOBAL_SH_COUNT; i++) { 689bf215546Sopenharmony_ci if (dev->global_shaders[i]) 690bf215546Sopenharmony_ci ir3_shader_destroy(dev->global_shaders[i]); 691bf215546Sopenharmony_ci } 692bf215546Sopenharmony_ci} 693bf215546Sopenharmony_ci 694bf215546Sopenharmony_cistatic void 695bf215546Sopenharmony_cir3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, 696bf215546Sopenharmony_ci uint32_t rts_mask, bool z_scale, VkSampleCountFlagBits samples) 697bf215546Sopenharmony_ci{ 698bf215546Sopenharmony_ci enum global_shader vs_id = 699bf215546Sopenharmony_ci blit ? GLOBAL_SH_VS_BLIT : GLOBAL_SH_VS_CLEAR; 700bf215546Sopenharmony_ci 701bf215546Sopenharmony_ci struct ir3_shader_variant *vs = cmd->device->global_shader_variants[vs_id]; 702bf215546Sopenharmony_ci uint64_t vs_iova = cmd->device->global_shader_va[vs_id]; 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci enum global_shader fs_id = GLOBAL_SH_FS_BLIT; 705bf215546Sopenharmony_ci 706bf215546Sopenharmony_ci if (z_scale) 707bf215546Sopenharmony_ci fs_id = GLOBAL_SH_FS_BLIT_ZSCALE; 708bf215546Sopenharmony_ci else if (samples != VK_SAMPLE_COUNT_1_BIT) 709bf215546Sopenharmony_ci fs_id = GLOBAL_SH_FS_COPY_MS; 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci unsigned num_rts = util_bitcount(rts_mask); 712bf215546Sopenharmony_ci if (!blit) 713bf215546Sopenharmony_ci fs_id = GLOBAL_SH_FS_CLEAR0 + num_rts; 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci struct ir3_shader_variant *fs = cmd->device->global_shader_variants[fs_id]; 716bf215546Sopenharmony_ci uint64_t fs_iova = cmd->device->global_shader_va[fs_id]; 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD( 719bf215546Sopenharmony_ci .vs_state = true, 720bf215546Sopenharmony_ci .hs_state = true, 721bf215546Sopenharmony_ci .ds_state = true, 722bf215546Sopenharmony_ci .gs_state = true, 723bf215546Sopenharmony_ci .fs_state = true, 724bf215546Sopenharmony_ci .cs_state = true, 725bf215546Sopenharmony_ci .gfx_ibo = true, 726bf215546Sopenharmony_ci .cs_ibo = true, 727bf215546Sopenharmony_ci .gfx_shared_const = true, 728bf215546Sopenharmony_ci .gfx_bindless = 0x1f, 729bf215546Sopenharmony_ci .cs_bindless = 0x1f)); 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, vs); 732bf215546Sopenharmony_ci tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL); 733bf215546Sopenharmony_ci tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL); 734bf215546Sopenharmony_ci tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL); 735bf215546Sopenharmony_ci tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, fs); 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_ci struct tu_pvtmem_config pvtmem = {}; 738bf215546Sopenharmony_ci tu6_emit_xs(cs, MESA_SHADER_VERTEX, vs, &pvtmem, vs_iova); 739bf215546Sopenharmony_ci tu6_emit_xs(cs, MESA_SHADER_FRAGMENT, fs, &pvtmem, fs_iova); 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0()); 742bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0()); 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci if (cmd->device->physical_device->info->a6xx.has_cp_reg_write) { 745bf215546Sopenharmony_ci /* Copy what the blob does here. This will emit an extra 0x3f 746bf215546Sopenharmony_ci * CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what 747bf215546Sopenharmony_ci * this is working around yet. 748bf215546Sopenharmony_ci */ 749bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3); 750bf215546Sopenharmony_ci tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE)); 751bf215546Sopenharmony_ci tu_cs_emit(cs, REG_A6XX_PC_MULTIVIEW_CNTL); 752bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 753bf215546Sopenharmony_ci } else { 754bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_PC_MULTIVIEW_CNTL()); 755bf215546Sopenharmony_ci } 756bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_VFD_MULTIVIEW_CNTL()); 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci tu6_emit_vpc(cs, vs, NULL, NULL, NULL, fs, 0); 759bf215546Sopenharmony_ci 760bf215546Sopenharmony_ci /* REPL_MODE for varying with RECTLIST (2 vertices only) */ 761bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_VPC_VARYING_INTERP_MODE(0, 0)); 762bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0)); 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci tu6_emit_fs_inputs(cs, fs); 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 767bf215546Sopenharmony_ci A6XX_GRAS_CL_CNTL( 768bf215546Sopenharmony_ci .persp_division_disable = 1, 769bf215546Sopenharmony_ci .vp_xform_disable = 1, 770bf215546Sopenharmony_ci .vp_clip_code_ignore = 1, 771bf215546Sopenharmony_ci .clip_disable = 1)); 772bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable? 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_PC_RASTER_CNTL()); 775bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_VPC_UNKNOWN_9107()); 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 778bf215546Sopenharmony_ci A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x = 0, .y = 0), 779bf215546Sopenharmony_ci A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff)); 780bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 781bf215546Sopenharmony_ci A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = 0, .y = 0), 782bf215546Sopenharmony_ci A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff)); 783bf215546Sopenharmony_ci 784bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 785bf215546Sopenharmony_ci A6XX_VFD_INDEX_OFFSET(), 786bf215546Sopenharmony_ci A6XX_VFD_INSTANCE_START_OFFSET()); 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci if (rts_mask) { 789bf215546Sopenharmony_ci unsigned rts_count = util_last_bit(rts_mask); 790bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), rts_count); 791bf215546Sopenharmony_ci unsigned rt = 0; 792bf215546Sopenharmony_ci for (unsigned i = 0; i < rts_count; i++) { 793bf215546Sopenharmony_ci unsigned regid = 0; 794bf215546Sopenharmony_ci if (rts_mask & (1u << i)) 795bf215546Sopenharmony_ci regid = ir3_find_output_regid(fs, FRAG_RESULT_DATA0 + rt++); 796bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(regid)); 797bf215546Sopenharmony_ci } 798bf215546Sopenharmony_ci } 799bf215546Sopenharmony_ci 800bf215546Sopenharmony_ci cmd->state.line_mode = RECTANGULAR; 801bf215546Sopenharmony_ci tu6_emit_msaa(cs, samples, cmd->state.line_mode); 802bf215546Sopenharmony_ci} 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_cistatic void 805bf215546Sopenharmony_cir3d_coords_raw(struct tu_cs *cs, const float *coords) 806bf215546Sopenharmony_ci{ 807bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 8); 808bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | 809bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 810bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 811bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) | 812bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(2)); 813bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 814bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 815bf215546Sopenharmony_ci tu_cs_emit_array(cs, (const uint32_t *) coords, 8); 816bf215546Sopenharmony_ci} 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci/* z coordinate for "z scale" blit path which uses a 3d texture */ 819bf215546Sopenharmony_cistatic void 820bf215546Sopenharmony_cir3d_coord_z(struct tu_cs *cs, float z) 821bf215546Sopenharmony_ci{ 822bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 4); 823bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(2) | 824bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 825bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 826bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) | 827bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(1)); 828bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 829bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 830bf215546Sopenharmony_ci tu_cs_emit(cs, fui(z)); 831bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 832bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 833bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 834bf215546Sopenharmony_ci} 835bf215546Sopenharmony_ci 836bf215546Sopenharmony_cistatic void 837bf215546Sopenharmony_cir3d_coords(struct tu_cs *cs, 838bf215546Sopenharmony_ci const VkOffset2D *dst, 839bf215546Sopenharmony_ci const VkOffset2D *src, 840bf215546Sopenharmony_ci const VkExtent2D *extent) 841bf215546Sopenharmony_ci{ 842bf215546Sopenharmony_ci int32_t src_x1 = src ? src->x : 0; 843bf215546Sopenharmony_ci int32_t src_y1 = src ? src->y : 0; 844bf215546Sopenharmony_ci r3d_coords_raw(cs, (float[]) { 845bf215546Sopenharmony_ci dst->x, dst->y, 846bf215546Sopenharmony_ci src_x1, src_y1, 847bf215546Sopenharmony_ci dst->x + extent->width, dst->y + extent->height, 848bf215546Sopenharmony_ci src_x1 + extent->width, src_y1 + extent->height, 849bf215546Sopenharmony_ci }); 850bf215546Sopenharmony_ci} 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_cistatic void 853bf215546Sopenharmony_cir3d_clear_value(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val) 854bf215546Sopenharmony_ci{ 855bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4); 856bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | 857bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 858bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 859bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | 860bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(1)); 861bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 862bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 863bf215546Sopenharmony_ci switch (format) { 864bf215546Sopenharmony_ci case PIPE_FORMAT_Z24X8_UNORM: 865bf215546Sopenharmony_ci case PIPE_FORMAT_Z24_UNORM_S8_UINT: { 866bf215546Sopenharmony_ci /* cleared as r8g8b8a8_unorm using special format */ 867bf215546Sopenharmony_ci uint32_t tmp = tu_pack_float32_for_unorm(val->depthStencil.depth, 24); 868bf215546Sopenharmony_ci tu_cs_emit(cs, fui((tmp & 0xff) / 255.0f)); 869bf215546Sopenharmony_ci tu_cs_emit(cs, fui((tmp >> 8 & 0xff) / 255.0f)); 870bf215546Sopenharmony_ci tu_cs_emit(cs, fui((tmp >> 16 & 0xff) / 255.0f)); 871bf215546Sopenharmony_ci tu_cs_emit(cs, fui((val->depthStencil.stencil & 0xff) / 255.0f)); 872bf215546Sopenharmony_ci } break; 873bf215546Sopenharmony_ci case PIPE_FORMAT_Z16_UNORM: 874bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT: 875bf215546Sopenharmony_ci tu_cs_emit(cs, fui(val->depthStencil.depth)); 876bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 877bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 878bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 879bf215546Sopenharmony_ci break; 880bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT: 881bf215546Sopenharmony_ci tu_cs_emit(cs, val->depthStencil.stencil & 0xff); 882bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 883bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 884bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 885bf215546Sopenharmony_ci break; 886bf215546Sopenharmony_ci default: 887bf215546Sopenharmony_ci /* as color formats use clear value as-is */ 888bf215546Sopenharmony_ci assert(!util_format_is_depth_or_stencil(format)); 889bf215546Sopenharmony_ci tu_cs_emit_array(cs, val->color.uint32, 4); 890bf215546Sopenharmony_ci break; 891bf215546Sopenharmony_ci } 892bf215546Sopenharmony_ci} 893bf215546Sopenharmony_ci 894bf215546Sopenharmony_cistatic void 895bf215546Sopenharmony_cir3d_src_common(struct tu_cmd_buffer *cmd, 896bf215546Sopenharmony_ci struct tu_cs *cs, 897bf215546Sopenharmony_ci const uint32_t *tex_const, 898bf215546Sopenharmony_ci uint32_t offset_base, 899bf215546Sopenharmony_ci uint32_t offset_ubwc, 900bf215546Sopenharmony_ci VkFilter filter) 901bf215546Sopenharmony_ci{ 902bf215546Sopenharmony_ci struct tu_cs_memory texture = { }; 903bf215546Sopenharmony_ci VkResult result = tu_cs_alloc(&cmd->sub_cs, 904bf215546Sopenharmony_ci 2, /* allocate space for a sampler too */ 905bf215546Sopenharmony_ci A6XX_TEX_CONST_DWORDS, &texture); 906bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 907bf215546Sopenharmony_ci cmd->record_result = result; 908bf215546Sopenharmony_ci return; 909bf215546Sopenharmony_ci } 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci memcpy(texture.map, tex_const, A6XX_TEX_CONST_DWORDS * 4); 912bf215546Sopenharmony_ci 913bf215546Sopenharmony_ci /* patch addresses for layer offset */ 914bf215546Sopenharmony_ci *(uint64_t*) (texture.map + 4) += offset_base; 915bf215546Sopenharmony_ci uint64_t ubwc_addr = (texture.map[7] | (uint64_t) texture.map[8] << 32) + offset_ubwc; 916bf215546Sopenharmony_ci texture.map[7] = ubwc_addr; 917bf215546Sopenharmony_ci texture.map[8] = ubwc_addr >> 32; 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci texture.map[A6XX_TEX_CONST_DWORDS + 0] = 920bf215546Sopenharmony_ci A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(filter, false)) | 921bf215546Sopenharmony_ci A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(filter, false)) | 922bf215546Sopenharmony_ci A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE) | 923bf215546Sopenharmony_ci A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE) | 924bf215546Sopenharmony_ci A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE) | 925bf215546Sopenharmony_ci 0x60000; /* XXX used by blob, doesn't seem necessary */ 926bf215546Sopenharmony_ci texture.map[A6XX_TEX_CONST_DWORDS + 1] = 927bf215546Sopenharmony_ci A6XX_TEX_SAMP_1_UNNORM_COORDS | 928bf215546Sopenharmony_ci A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR; 929bf215546Sopenharmony_ci texture.map[A6XX_TEX_CONST_DWORDS + 2] = 0; 930bf215546Sopenharmony_ci texture.map[A6XX_TEX_CONST_DWORDS + 3] = 0; 931bf215546Sopenharmony_ci 932bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3); 933bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | 934bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) | 935bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | 936bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) | 937bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(1)); 938bf215546Sopenharmony_ci tu_cs_emit_qw(cs, texture.iova + A6XX_TEX_CONST_DWORDS * 4); 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_SAMP(.qword = texture.iova + A6XX_TEX_CONST_DWORDS * 4)); 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3); 943bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | 944bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 945bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) | 946bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) | 947bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(1)); 948bf215546Sopenharmony_ci tu_cs_emit_qw(cs, texture.iova); 949bf215546Sopenharmony_ci 950bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_CONST(.qword = texture.iova)); 951bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_COUNT(1)); 952bf215546Sopenharmony_ci} 953bf215546Sopenharmony_ci 954bf215546Sopenharmony_cistatic void 955bf215546Sopenharmony_cir3d_src(struct tu_cmd_buffer *cmd, 956bf215546Sopenharmony_ci struct tu_cs *cs, 957bf215546Sopenharmony_ci const struct fdl6_view *iview, 958bf215546Sopenharmony_ci uint32_t layer, 959bf215546Sopenharmony_ci VkFilter filter, 960bf215546Sopenharmony_ci enum pipe_format dst_format) 961bf215546Sopenharmony_ci{ 962bf215546Sopenharmony_ci uint32_t desc[A6XX_TEX_CONST_DWORDS]; 963bf215546Sopenharmony_ci memcpy(desc, iview->descriptor, sizeof(desc)); 964bf215546Sopenharmony_ci 965bf215546Sopenharmony_ci enum a6xx_format fmt = (desc[0] & A6XX_TEX_CONST_0_FMT__MASK) >> 966bf215546Sopenharmony_ci A6XX_TEX_CONST_0_FMT__SHIFT; 967bf215546Sopenharmony_ci enum pipe_format src_format = iview->format; 968bf215546Sopenharmony_ci fixup_src_format(&src_format, dst_format, &fmt); 969bf215546Sopenharmony_ci desc[0] = (desc[0] & ~A6XX_TEX_CONST_0_FMT__MASK) | 970bf215546Sopenharmony_ci A6XX_TEX_CONST_0_FMT(fmt); 971bf215546Sopenharmony_ci 972bf215546Sopenharmony_ci r3d_src_common(cmd, cs, desc, 973bf215546Sopenharmony_ci iview->layer_size * layer, 974bf215546Sopenharmony_ci iview->ubwc_layer_size * layer, 975bf215546Sopenharmony_ci filter); 976bf215546Sopenharmony_ci} 977bf215546Sopenharmony_ci 978bf215546Sopenharmony_cistatic void 979bf215546Sopenharmony_cir3d_src_buffer(struct tu_cmd_buffer *cmd, 980bf215546Sopenharmony_ci struct tu_cs *cs, 981bf215546Sopenharmony_ci enum pipe_format format, 982bf215546Sopenharmony_ci uint64_t va, uint32_t pitch, 983bf215546Sopenharmony_ci uint32_t width, uint32_t height, 984bf215546Sopenharmony_ci enum pipe_format dst_format) 985bf215546Sopenharmony_ci{ 986bf215546Sopenharmony_ci uint32_t desc[A6XX_TEX_CONST_DWORDS]; 987bf215546Sopenharmony_ci 988bf215546Sopenharmony_ci struct tu_native_format fmt = tu6_format_texture(format, TILE6_LINEAR); 989bf215546Sopenharmony_ci enum a6xx_format color_format = fmt.fmt; 990bf215546Sopenharmony_ci fixup_src_format(&format, dst_format, &color_format); 991bf215546Sopenharmony_ci 992bf215546Sopenharmony_ci desc[0] = 993bf215546Sopenharmony_ci COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) | 994bf215546Sopenharmony_ci A6XX_TEX_CONST_0_FMT(color_format) | 995bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWAP(fmt.swap) | 996bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) | 997bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) | 998bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) | 999bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W); 1000bf215546Sopenharmony_ci desc[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height); 1001bf215546Sopenharmony_ci desc[2] = 1002bf215546Sopenharmony_ci A6XX_TEX_CONST_2_PITCH(pitch) | 1003bf215546Sopenharmony_ci A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D); 1004bf215546Sopenharmony_ci desc[3] = 0; 1005bf215546Sopenharmony_ci desc[4] = va; 1006bf215546Sopenharmony_ci desc[5] = va >> 32; 1007bf215546Sopenharmony_ci for (uint32_t i = 6; i < A6XX_TEX_CONST_DWORDS; i++) 1008bf215546Sopenharmony_ci desc[i] = 0; 1009bf215546Sopenharmony_ci 1010bf215546Sopenharmony_ci r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); 1011bf215546Sopenharmony_ci} 1012bf215546Sopenharmony_ci 1013bf215546Sopenharmony_cistatic void 1014bf215546Sopenharmony_cir3d_src_gmem(struct tu_cmd_buffer *cmd, 1015bf215546Sopenharmony_ci struct tu_cs *cs, 1016bf215546Sopenharmony_ci const struct tu_image_view *iview, 1017bf215546Sopenharmony_ci enum pipe_format format, 1018bf215546Sopenharmony_ci enum pipe_format dst_format, 1019bf215546Sopenharmony_ci uint32_t gmem_offset, 1020bf215546Sopenharmony_ci uint32_t cpp) 1021bf215546Sopenharmony_ci{ 1022bf215546Sopenharmony_ci uint32_t desc[A6XX_TEX_CONST_DWORDS]; 1023bf215546Sopenharmony_ci memcpy(desc, iview->view.descriptor, sizeof(desc)); 1024bf215546Sopenharmony_ci 1025bf215546Sopenharmony_ci enum a6xx_format fmt = tu6_format_texture(format, TILE6_LINEAR).fmt; 1026bf215546Sopenharmony_ci fixup_src_format(&format, dst_format, &fmt); 1027bf215546Sopenharmony_ci 1028bf215546Sopenharmony_ci /* patch the format so that depth/stencil get the right format and swizzle */ 1029bf215546Sopenharmony_ci desc[0] &= ~(A6XX_TEX_CONST_0_FMT__MASK | 1030bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWIZ_X__MASK | A6XX_TEX_CONST_0_SWIZ_Y__MASK | 1031bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWIZ_Z__MASK | A6XX_TEX_CONST_0_SWIZ_W__MASK); 1032bf215546Sopenharmony_ci desc[0] |= A6XX_TEX_CONST_0_FMT(fmt) | 1033bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) | 1034bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWIZ_Y(A6XX_TEX_Y) | 1035bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWIZ_Z(A6XX_TEX_Z) | 1036bf215546Sopenharmony_ci A6XX_TEX_CONST_0_SWIZ_W(A6XX_TEX_W); 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_ci /* patched for gmem */ 1039bf215546Sopenharmony_ci desc[0] &= ~(A6XX_TEX_CONST_0_SWAP__MASK | A6XX_TEX_CONST_0_TILE_MODE__MASK); 1040bf215546Sopenharmony_ci desc[0] |= A6XX_TEX_CONST_0_TILE_MODE(TILE6_2); 1041bf215546Sopenharmony_ci desc[2] = 1042bf215546Sopenharmony_ci A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D) | 1043bf215546Sopenharmony_ci A6XX_TEX_CONST_2_PITCH(cmd->state.tiling->tile0.width * cpp); 1044bf215546Sopenharmony_ci desc[3] = 0; 1045bf215546Sopenharmony_ci desc[4] = cmd->device->physical_device->gmem_base + gmem_offset; 1046bf215546Sopenharmony_ci desc[5] = A6XX_TEX_CONST_5_DEPTH(1); 1047bf215546Sopenharmony_ci for (unsigned i = 6; i < A6XX_TEX_CONST_DWORDS; i++) 1048bf215546Sopenharmony_ci desc[i] = 0; 1049bf215546Sopenharmony_ci 1050bf215546Sopenharmony_ci r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST); 1051bf215546Sopenharmony_ci} 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_cistatic void 1054bf215546Sopenharmony_cir3d_dst(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, 1055bf215546Sopenharmony_ci enum pipe_format src_format) 1056bf215546Sopenharmony_ci{ 1057bf215546Sopenharmony_ci uint32_t mrt_buf_info = iview->RB_MRT_BUF_INFO; 1058bf215546Sopenharmony_ci 1059bf215546Sopenharmony_ci enum a6xx_format fmt = mrt_buf_info & A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK; 1060bf215546Sopenharmony_ci enum pipe_format dst_format = iview->format; 1061bf215546Sopenharmony_ci fixup_dst_format(src_format, &dst_format, &fmt); 1062bf215546Sopenharmony_ci mrt_buf_info = 1063bf215546Sopenharmony_ci (mrt_buf_info & ~A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT__MASK) | 1064bf215546Sopenharmony_ci A6XX_RB_MRT_BUF_INFO_COLOR_FORMAT(fmt); 1065bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); 1066bf215546Sopenharmony_ci tu_cs_emit(cs, mrt_buf_info); 1067bf215546Sopenharmony_ci tu_cs_image_ref(cs, iview, layer); 1068bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 1069bf215546Sopenharmony_ci 1070bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3); 1071bf215546Sopenharmony_ci tu_cs_image_flag_ref(cs, iview, layer); 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci /* Use color format from RB_MRT_BUF_INFO. This register is relevant for 1074bf215546Sopenharmony_ci * FMT6_NV12_Y. 1075bf215546Sopenharmony_ci */ 1076bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = fmt)); 1077bf215546Sopenharmony_ci 1078bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled)); 1079bf215546Sopenharmony_ci} 1080bf215546Sopenharmony_ci 1081bf215546Sopenharmony_cistatic void 1082bf215546Sopenharmony_cir3d_dst_depth(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) 1083bf215546Sopenharmony_ci{ 1084bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); 1085bf215546Sopenharmony_ci tu_cs_emit(cs, tu_image_view_depth(iview, RB_MRT_BUF_INFO)); 1086bf215546Sopenharmony_ci tu_cs_image_depth_ref(cs, iview, layer); 1087bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 1088bf215546Sopenharmony_ci 1089bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3); 1090bf215546Sopenharmony_ci tu_cs_image_flag_ref(cs, &iview->view, layer); 1091bf215546Sopenharmony_ci 1092bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->view.ubwc_enabled)); 1093bf215546Sopenharmony_ci} 1094bf215546Sopenharmony_ci 1095bf215546Sopenharmony_cistatic void 1096bf215546Sopenharmony_cir3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer) 1097bf215546Sopenharmony_ci{ 1098bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6); 1099bf215546Sopenharmony_ci tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO)); 1100bf215546Sopenharmony_ci tu_cs_image_stencil_ref(cs, iview, layer); 1101bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); 1104bf215546Sopenharmony_ci} 1105bf215546Sopenharmony_ci 1106bf215546Sopenharmony_cistatic void 1107bf215546Sopenharmony_cir3d_dst_buffer(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch, 1108bf215546Sopenharmony_ci enum pipe_format src_format) 1109bf215546Sopenharmony_ci{ 1110bf215546Sopenharmony_ci struct tu_native_format fmt = tu6_format_color(format, TILE6_LINEAR); 1111bf215546Sopenharmony_ci 1112bf215546Sopenharmony_ci enum a6xx_format color_fmt = fmt.fmt; 1113bf215546Sopenharmony_ci fixup_dst_format(src_format, &format, &color_fmt); 1114bf215546Sopenharmony_ci 1115bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 1116bf215546Sopenharmony_ci A6XX_RB_MRT_BUF_INFO(0, .color_format = color_fmt, .color_swap = fmt.swap), 1117bf215546Sopenharmony_ci A6XX_RB_MRT_PITCH(0, pitch), 1118bf215546Sopenharmony_ci A6XX_RB_MRT_ARRAY_PITCH(0, 0), 1119bf215546Sopenharmony_ci A6XX_RB_MRT_BASE(0, .qword = va), 1120bf215546Sopenharmony_ci A6XX_RB_MRT_BASE_GMEM(0, 0)); 1121bf215546Sopenharmony_ci 1122bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL()); 1123bf215546Sopenharmony_ci} 1124bf215546Sopenharmony_ci 1125bf215546Sopenharmony_cistatic uint8_t 1126bf215546Sopenharmony_ciaspect_write_mask(enum pipe_format format, VkImageAspectFlags aspect_mask) 1127bf215546Sopenharmony_ci{ 1128bf215546Sopenharmony_ci uint8_t mask = 0xf; 1129bf215546Sopenharmony_ci assert(aspect_mask); 1130bf215546Sopenharmony_ci /* note: the only format with partial writing is D24S8, 1131bf215546Sopenharmony_ci * clear/blit uses the _AS_R8G8B8A8 format to access it 1132bf215546Sopenharmony_ci */ 1133bf215546Sopenharmony_ci if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { 1134bf215546Sopenharmony_ci if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) 1135bf215546Sopenharmony_ci mask = 0x7; 1136bf215546Sopenharmony_ci if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) 1137bf215546Sopenharmony_ci mask = 0x8; 1138bf215546Sopenharmony_ci } 1139bf215546Sopenharmony_ci return mask; 1140bf215546Sopenharmony_ci} 1141bf215546Sopenharmony_ci 1142bf215546Sopenharmony_cistatic void 1143bf215546Sopenharmony_cir3d_setup(struct tu_cmd_buffer *cmd, 1144bf215546Sopenharmony_ci struct tu_cs *cs, 1145bf215546Sopenharmony_ci enum pipe_format src_format, 1146bf215546Sopenharmony_ci enum pipe_format dst_format, 1147bf215546Sopenharmony_ci VkImageAspectFlags aspect_mask, 1148bf215546Sopenharmony_ci unsigned blit_param, 1149bf215546Sopenharmony_ci bool clear, 1150bf215546Sopenharmony_ci bool ubwc, 1151bf215546Sopenharmony_ci VkSampleCountFlagBits samples) 1152bf215546Sopenharmony_ci{ 1153bf215546Sopenharmony_ci enum a6xx_format fmt = tu6_base_format(dst_format); 1154bf215546Sopenharmony_ci fixup_dst_format(src_format, &dst_format, &fmt); 1155bf215546Sopenharmony_ci 1156bf215546Sopenharmony_ci if ((dst_format == PIPE_FORMAT_Z24_UNORM_S8_UINT || 1157bf215546Sopenharmony_ci dst_format == PIPE_FORMAT_Z24X8_UNORM) && ubwc) { 1158bf215546Sopenharmony_ci fmt = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8; 1159bf215546Sopenharmony_ci } 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_ci if (!cmd->state.pass) { 1162bf215546Sopenharmony_ci tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM); 1163bf215546Sopenharmony_ci tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff); 1164bf215546Sopenharmony_ci } 1165bf215546Sopenharmony_ci 1166bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000)); 1167bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.dword = 0xc00000)); 1168bf215546Sopenharmony_ci 1169bf215546Sopenharmony_ci r3d_common(cmd, cs, !clear, 1, blit_param, samples); 1170bf215546Sopenharmony_ci 1171bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); 1172bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) | 1173bf215546Sopenharmony_ci A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) | 1174bf215546Sopenharmony_ci 0xfc000000); 1175bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1)); 1176bf215546Sopenharmony_ci 1177bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 1178bf215546Sopenharmony_ci A6XX_RB_FS_OUTPUT_CNTL0(), 1179bf215546Sopenharmony_ci A6XX_RB_FS_OUTPUT_CNTL1(.mrt = 1)); 1180bf215546Sopenharmony_ci 1181bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL()); 1182bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.sample_mask = 0xffff)); 1183bf215546Sopenharmony_ci 1184bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL()); 1185bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL()); 1186bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL()); 1187bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL()); 1188bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK()); 1189bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK()); 1190bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_STENCILREF()); 1191bf215546Sopenharmony_ci 1192bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_RENDER_COMPONENTS(.rt0 = 0xf)); 1193bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf)); 1194bf215546Sopenharmony_ci 1195bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0, 1196bf215546Sopenharmony_ci .color_format = fmt, 1197bf215546Sopenharmony_ci .color_sint = util_format_is_pure_sint(dst_format), 1198bf215546Sopenharmony_ci .color_uint = util_format_is_pure_uint(dst_format))); 1199bf215546Sopenharmony_ci 1200bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0, 1201bf215546Sopenharmony_ci .component_enable = aspect_write_mask(dst_format, aspect_mask))); 1202bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(util_format_is_srgb(dst_format))); 1203bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(util_format_is_srgb(dst_format))); 1204bf215546Sopenharmony_ci 1205bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0)); 1206bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0)); 1207bf215546Sopenharmony_ci 1208bf215546Sopenharmony_ci tu_cs_emit_write_reg(cs, REG_A6XX_GRAS_SC_CNTL, 1209bf215546Sopenharmony_ci A6XX_GRAS_SC_CNTL_CCUSINGLECACHELINESIZE(2)); 1210bf215546Sopenharmony_ci 1211bf215546Sopenharmony_ci /* Disable sample counting in order to not affect occlusion query. */ 1212bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = true)); 1213bf215546Sopenharmony_ci 1214bf215546Sopenharmony_ci if (cmd->state.prim_generated_query_running_before_rp) { 1215bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, STOP_PRIMITIVE_CTRS); 1216bf215546Sopenharmony_ci } 1217bf215546Sopenharmony_ci 1218bf215546Sopenharmony_ci if (cmd->state.predication_active) { 1219bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1); 1220bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 1221bf215546Sopenharmony_ci } 1222bf215546Sopenharmony_ci} 1223bf215546Sopenharmony_ci 1224bf215546Sopenharmony_cistatic void 1225bf215546Sopenharmony_cir3d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 1226bf215546Sopenharmony_ci{ 1227bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); 1228bf215546Sopenharmony_ci tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST) | 1229bf215546Sopenharmony_ci CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | 1230bf215546Sopenharmony_ci CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY)); 1231bf215546Sopenharmony_ci tu_cs_emit(cs, 1); /* instance count */ 1232bf215546Sopenharmony_ci tu_cs_emit(cs, 2); /* vertex count */ 1233bf215546Sopenharmony_ci} 1234bf215546Sopenharmony_ci 1235bf215546Sopenharmony_cistatic void 1236bf215546Sopenharmony_cir3d_run_vis(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 1237bf215546Sopenharmony_ci{ 1238bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3); 1239bf215546Sopenharmony_ci tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST) | 1240bf215546Sopenharmony_ci CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) | 1241bf215546Sopenharmony_ci CP_DRAW_INDX_OFFSET_0_VIS_CULL(USE_VISIBILITY)); 1242bf215546Sopenharmony_ci tu_cs_emit(cs, 1); /* instance count */ 1243bf215546Sopenharmony_ci tu_cs_emit(cs, 2); /* vertex count */ 1244bf215546Sopenharmony_ci} 1245bf215546Sopenharmony_ci 1246bf215546Sopenharmony_cistatic void 1247bf215546Sopenharmony_cir3d_teardown(struct tu_cmd_buffer *cmd, struct tu_cs *cs) 1248bf215546Sopenharmony_ci{ 1249bf215546Sopenharmony_ci if (cmd->state.predication_active) { 1250bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1); 1251bf215546Sopenharmony_ci tu_cs_emit(cs, 1); 1252bf215546Sopenharmony_ci } 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_ci /* Re-enable sample counting. */ 1255bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = false)); 1256bf215546Sopenharmony_ci 1257bf215546Sopenharmony_ci if (cmd->state.prim_generated_query_running_before_rp) { 1258bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, START_PRIMITIVE_CTRS); 1259bf215546Sopenharmony_ci } 1260bf215546Sopenharmony_ci} 1261bf215546Sopenharmony_ci 1262bf215546Sopenharmony_ci/* blit ops - common interface for 2d/shader paths */ 1263bf215546Sopenharmony_ci 1264bf215546Sopenharmony_cistruct blit_ops { 1265bf215546Sopenharmony_ci void (*coords)(struct tu_cs *cs, 1266bf215546Sopenharmony_ci const VkOffset2D *dst, 1267bf215546Sopenharmony_ci const VkOffset2D *src, 1268bf215546Sopenharmony_ci const VkExtent2D *extent); 1269bf215546Sopenharmony_ci void (*clear_value)(struct tu_cs *cs, enum pipe_format format, const VkClearValue *val); 1270bf215546Sopenharmony_ci void (*src)( 1271bf215546Sopenharmony_ci struct tu_cmd_buffer *cmd, 1272bf215546Sopenharmony_ci struct tu_cs *cs, 1273bf215546Sopenharmony_ci const struct fdl6_view *iview, 1274bf215546Sopenharmony_ci uint32_t layer, 1275bf215546Sopenharmony_ci VkFilter filter, 1276bf215546Sopenharmony_ci enum pipe_format dst_format); 1277bf215546Sopenharmony_ci void (*src_buffer)(struct tu_cmd_buffer *cmd, struct tu_cs *cs, 1278bf215546Sopenharmony_ci enum pipe_format format, 1279bf215546Sopenharmony_ci uint64_t va, uint32_t pitch, 1280bf215546Sopenharmony_ci uint32_t width, uint32_t height, 1281bf215546Sopenharmony_ci enum pipe_format dst_format); 1282bf215546Sopenharmony_ci void (*dst)(struct tu_cs *cs, const struct fdl6_view *iview, uint32_t layer, 1283bf215546Sopenharmony_ci enum pipe_format src_format); 1284bf215546Sopenharmony_ci void (*dst_depth)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); 1285bf215546Sopenharmony_ci void (*dst_stencil)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer); 1286bf215546Sopenharmony_ci void (*dst_buffer)(struct tu_cs *cs, enum pipe_format format, uint64_t va, uint32_t pitch, 1287bf215546Sopenharmony_ci enum pipe_format src_format); 1288bf215546Sopenharmony_ci void (*setup)(struct tu_cmd_buffer *cmd, 1289bf215546Sopenharmony_ci struct tu_cs *cs, 1290bf215546Sopenharmony_ci enum pipe_format src_format, 1291bf215546Sopenharmony_ci enum pipe_format dst_format, 1292bf215546Sopenharmony_ci VkImageAspectFlags aspect_mask, 1293bf215546Sopenharmony_ci unsigned blit_param, /* CmdBlitImage: rotation in 2D path and z scaling in 3D path */ 1294bf215546Sopenharmony_ci bool clear, 1295bf215546Sopenharmony_ci bool ubwc, 1296bf215546Sopenharmony_ci VkSampleCountFlagBits samples); 1297bf215546Sopenharmony_ci void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs); 1298bf215546Sopenharmony_ci void (*teardown)(struct tu_cmd_buffer *cmd, 1299bf215546Sopenharmony_ci struct tu_cs *cs); 1300bf215546Sopenharmony_ci}; 1301bf215546Sopenharmony_ci 1302bf215546Sopenharmony_cistatic const struct blit_ops r2d_ops = { 1303bf215546Sopenharmony_ci .coords = r2d_coords, 1304bf215546Sopenharmony_ci .clear_value = r2d_clear_value, 1305bf215546Sopenharmony_ci .src = r2d_src, 1306bf215546Sopenharmony_ci .src_buffer = r2d_src_buffer, 1307bf215546Sopenharmony_ci .dst = r2d_dst, 1308bf215546Sopenharmony_ci .dst_depth = r2d_dst_depth, 1309bf215546Sopenharmony_ci .dst_stencil = r2d_dst_stencil, 1310bf215546Sopenharmony_ci .dst_buffer = r2d_dst_buffer, 1311bf215546Sopenharmony_ci .setup = r2d_setup, 1312bf215546Sopenharmony_ci .run = r2d_run, 1313bf215546Sopenharmony_ci .teardown = r2d_teardown, 1314bf215546Sopenharmony_ci}; 1315bf215546Sopenharmony_ci 1316bf215546Sopenharmony_cistatic const struct blit_ops r3d_ops = { 1317bf215546Sopenharmony_ci .coords = r3d_coords, 1318bf215546Sopenharmony_ci .clear_value = r3d_clear_value, 1319bf215546Sopenharmony_ci .src = r3d_src, 1320bf215546Sopenharmony_ci .src_buffer = r3d_src_buffer, 1321bf215546Sopenharmony_ci .dst = r3d_dst, 1322bf215546Sopenharmony_ci .dst_depth = r3d_dst_depth, 1323bf215546Sopenharmony_ci .dst_stencil = r3d_dst_stencil, 1324bf215546Sopenharmony_ci .dst_buffer = r3d_dst_buffer, 1325bf215546Sopenharmony_ci .setup = r3d_setup, 1326bf215546Sopenharmony_ci .run = r3d_run, 1327bf215546Sopenharmony_ci .teardown = r3d_teardown, 1328bf215546Sopenharmony_ci}; 1329bf215546Sopenharmony_ci 1330bf215546Sopenharmony_ci/* passthrough set coords from 3D extents */ 1331bf215546Sopenharmony_cistatic void 1332bf215546Sopenharmony_cicoords(const struct blit_ops *ops, 1333bf215546Sopenharmony_ci struct tu_cs *cs, 1334bf215546Sopenharmony_ci const VkOffset3D *dst, 1335bf215546Sopenharmony_ci const VkOffset3D *src, 1336bf215546Sopenharmony_ci const VkExtent3D *extent) 1337bf215546Sopenharmony_ci{ 1338bf215546Sopenharmony_ci ops->coords(cs, (const VkOffset2D*) dst, (const VkOffset2D*) src, (const VkExtent2D*) extent); 1339bf215546Sopenharmony_ci} 1340bf215546Sopenharmony_ci 1341bf215546Sopenharmony_ci/* Decides the VK format to treat our data as for a memcpy-style blit. We have 1342bf215546Sopenharmony_ci * to be a bit careful because we have to pick a format with matching UBWC 1343bf215546Sopenharmony_ci * compression behavior, so no just returning R8_UINT/R16_UINT/R32_UINT for 1344bf215546Sopenharmony_ci * everything. 1345bf215546Sopenharmony_ci */ 1346bf215546Sopenharmony_cistatic enum pipe_format 1347bf215546Sopenharmony_cicopy_format(VkFormat vk_format, VkImageAspectFlags aspect_mask) 1348bf215546Sopenharmony_ci{ 1349bf215546Sopenharmony_ci if (vk_format_is_compressed(vk_format)) { 1350bf215546Sopenharmony_ci switch (vk_format_get_blocksize(vk_format)) { 1351bf215546Sopenharmony_ci case 1: return PIPE_FORMAT_R8_UINT; 1352bf215546Sopenharmony_ci case 2: return PIPE_FORMAT_R16_UINT; 1353bf215546Sopenharmony_ci case 4: return PIPE_FORMAT_R32_UINT; 1354bf215546Sopenharmony_ci case 8: return PIPE_FORMAT_R32G32_UINT; 1355bf215546Sopenharmony_ci case 16:return PIPE_FORMAT_R32G32B32A32_UINT; 1356bf215546Sopenharmony_ci default: 1357bf215546Sopenharmony_ci unreachable("unhandled format size"); 1358bf215546Sopenharmony_ci } 1359bf215546Sopenharmony_ci } 1360bf215546Sopenharmony_ci 1361bf215546Sopenharmony_ci enum pipe_format format = tu_vk_format_to_pipe_format(vk_format); 1362bf215546Sopenharmony_ci 1363bf215546Sopenharmony_ci /* For SNORM formats, copy them as the equivalent UNORM format. If we treat 1364bf215546Sopenharmony_ci * them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81 1365bf215546Sopenharmony_ci * (also -1.0), when we're supposed to be memcpying the bits. See 1366bf215546Sopenharmony_ci * https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion. 1367bf215546Sopenharmony_ci */ 1368bf215546Sopenharmony_ci format = util_format_snorm_to_unorm(format); 1369bf215546Sopenharmony_ci 1370bf215546Sopenharmony_ci switch (format) { 1371bf215546Sopenharmony_ci case PIPE_FORMAT_R9G9B9E5_FLOAT: 1372bf215546Sopenharmony_ci return PIPE_FORMAT_R32_UINT; 1373bf215546Sopenharmony_ci 1374bf215546Sopenharmony_ci case PIPE_FORMAT_G8_B8R8_420_UNORM: 1375bf215546Sopenharmony_ci if (aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT) 1376bf215546Sopenharmony_ci return PIPE_FORMAT_R8G8_UNORM; 1377bf215546Sopenharmony_ci else 1378bf215546Sopenharmony_ci return PIPE_FORMAT_Y8_UNORM; 1379bf215546Sopenharmony_ci case PIPE_FORMAT_G8_B8_R8_420_UNORM: 1380bf215546Sopenharmony_ci return PIPE_FORMAT_R8_UNORM; 1381bf215546Sopenharmony_ci 1382bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 1383bf215546Sopenharmony_ci if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) 1384bf215546Sopenharmony_ci return PIPE_FORMAT_S8_UINT; 1385bf215546Sopenharmony_ci assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT); 1386bf215546Sopenharmony_ci return PIPE_FORMAT_Z32_FLOAT; 1387bf215546Sopenharmony_ci 1388bf215546Sopenharmony_ci default: 1389bf215546Sopenharmony_ci return format; 1390bf215546Sopenharmony_ci } 1391bf215546Sopenharmony_ci} 1392bf215546Sopenharmony_ci 1393bf215546Sopenharmony_civoid 1394bf215546Sopenharmony_citu6_clear_lrz(struct tu_cmd_buffer *cmd, 1395bf215546Sopenharmony_ci struct tu_cs *cs, 1396bf215546Sopenharmony_ci struct tu_image *image, 1397bf215546Sopenharmony_ci const VkClearValue *value) 1398bf215546Sopenharmony_ci{ 1399bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 1400bf215546Sopenharmony_ci 1401bf215546Sopenharmony_ci /* It is assumed that LRZ cache is invalidated at this point for 1402bf215546Sopenharmony_ci * the writes here to become visible to LRZ. 1403bf215546Sopenharmony_ci * 1404bf215546Sopenharmony_ci * LRZ writes are going through UCHE cache, flush UCHE before changing 1405bf215546Sopenharmony_ci * LRZ via CCU. Don't need to invalidate CCU since we are presumably 1406bf215546Sopenharmony_ci * writing whole cache lines we assume to be 64 bytes. 1407bf215546Sopenharmony_ci */ 1408bf215546Sopenharmony_ci tu6_emit_event_write(cmd, &cmd->cs, CACHE_FLUSH_TS); 1409bf215546Sopenharmony_ci 1410bf215546Sopenharmony_ci ops->setup(cmd, cs, PIPE_FORMAT_Z16_UNORM, PIPE_FORMAT_Z16_UNORM, 1411bf215546Sopenharmony_ci VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false, 1412bf215546Sopenharmony_ci VK_SAMPLE_COUNT_1_BIT); 1413bf215546Sopenharmony_ci ops->clear_value(cs, PIPE_FORMAT_Z16_UNORM, value); 1414bf215546Sopenharmony_ci ops->dst_buffer(cs, PIPE_FORMAT_Z16_UNORM, 1415bf215546Sopenharmony_ci image->iova + image->lrz_offset, 1416bf215546Sopenharmony_ci image->lrz_pitch * 2, PIPE_FORMAT_Z16_UNORM); 1417bf215546Sopenharmony_ci ops->coords(cs, &(VkOffset2D) {}, NULL, &(VkExtent2D) {image->lrz_pitch, image->lrz_height}); 1418bf215546Sopenharmony_ci ops->run(cmd, cs); 1419bf215546Sopenharmony_ci ops->teardown(cmd, cs); 1420bf215546Sopenharmony_ci 1421bf215546Sopenharmony_ci /* Clearing writes via CCU color in the PS stage, and LRZ is read via 1422bf215546Sopenharmony_ci * UCHE in the earlier GRAS stage. 1423bf215546Sopenharmony_ci */ 1424bf215546Sopenharmony_ci cmd->state.cache.flush_bits |= 1425bf215546Sopenharmony_ci TU_CMD_FLAG_CCU_FLUSH_COLOR | TU_CMD_FLAG_CACHE_INVALIDATE | 1426bf215546Sopenharmony_ci TU_CMD_FLAG_WAIT_FOR_IDLE; 1427bf215546Sopenharmony_ci} 1428bf215546Sopenharmony_ci 1429bf215546Sopenharmony_civoid 1430bf215546Sopenharmony_citu6_dirty_lrz_fc(struct tu_cmd_buffer *cmd, 1431bf215546Sopenharmony_ci struct tu_cs *cs, 1432bf215546Sopenharmony_ci struct tu_image *image) 1433bf215546Sopenharmony_ci{ 1434bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 1435bf215546Sopenharmony_ci VkClearValue clear = { .color = { .uint32[0] = 0xffffffff } }; 1436bf215546Sopenharmony_ci 1437bf215546Sopenharmony_ci /* LRZ fast-clear buffer is always allocated with 512 bytes size. */ 1438bf215546Sopenharmony_ci ops->setup(cmd, cs, PIPE_FORMAT_R32_UINT, PIPE_FORMAT_R32_UINT, 1439bf215546Sopenharmony_ci VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false, 1440bf215546Sopenharmony_ci VK_SAMPLE_COUNT_1_BIT); 1441bf215546Sopenharmony_ci ops->clear_value(cs, PIPE_FORMAT_R32_UINT, &clear); 1442bf215546Sopenharmony_ci ops->dst_buffer(cs, PIPE_FORMAT_R32_UINT, 1443bf215546Sopenharmony_ci image->iova + image->lrz_fc_offset, 512, 1444bf215546Sopenharmony_ci PIPE_FORMAT_R32_UINT); 1445bf215546Sopenharmony_ci ops->coords(cs, &(VkOffset2D) {}, NULL, &(VkExtent2D) {128, 1}); 1446bf215546Sopenharmony_ci ops->run(cmd, cs); 1447bf215546Sopenharmony_ci ops->teardown(cmd, cs); 1448bf215546Sopenharmony_ci} 1449bf215546Sopenharmony_ci 1450bf215546Sopenharmony_cistatic void 1451bf215546Sopenharmony_citu_image_view_copy_blit(struct fdl6_view *iview, 1452bf215546Sopenharmony_ci struct tu_image *image, 1453bf215546Sopenharmony_ci enum pipe_format format, 1454bf215546Sopenharmony_ci const VkImageSubresourceLayers *subres, 1455bf215546Sopenharmony_ci uint32_t layer, 1456bf215546Sopenharmony_ci bool z_scale) 1457bf215546Sopenharmony_ci{ 1458bf215546Sopenharmony_ci VkImageAspectFlags aspect_mask = subres->aspectMask; 1459bf215546Sopenharmony_ci 1460bf215546Sopenharmony_ci /* always use the AS_R8G8B8A8 format for these */ 1461bf215546Sopenharmony_ci if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT || 1462bf215546Sopenharmony_ci format == PIPE_FORMAT_Z24X8_UNORM) { 1463bf215546Sopenharmony_ci aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT; 1464bf215546Sopenharmony_ci } 1465bf215546Sopenharmony_ci 1466bf215546Sopenharmony_ci const struct fdl_layout *layout = 1467bf215546Sopenharmony_ci &image->layout[tu6_plane_index(image->vk.format, aspect_mask)]; 1468bf215546Sopenharmony_ci 1469bf215546Sopenharmony_ci fdl6_view_init(iview, &layout, &(struct fdl_view_args) { 1470bf215546Sopenharmony_ci .iova = image->iova, 1471bf215546Sopenharmony_ci .base_array_layer = subres->baseArrayLayer + layer, 1472bf215546Sopenharmony_ci .layer_count = 1, 1473bf215546Sopenharmony_ci .base_miplevel = subres->mipLevel, 1474bf215546Sopenharmony_ci .level_count = 1, 1475bf215546Sopenharmony_ci .format = tu_format_for_aspect(format, aspect_mask), 1476bf215546Sopenharmony_ci .swiz = { 1477bf215546Sopenharmony_ci PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W 1478bf215546Sopenharmony_ci }, 1479bf215546Sopenharmony_ci .type = z_scale ? FDL_VIEW_TYPE_3D : FDL_VIEW_TYPE_2D, 1480bf215546Sopenharmony_ci }, false); 1481bf215546Sopenharmony_ci} 1482bf215546Sopenharmony_ci 1483bf215546Sopenharmony_cistatic void 1484bf215546Sopenharmony_citu_image_view_copy(struct fdl6_view *iview, 1485bf215546Sopenharmony_ci struct tu_image *image, 1486bf215546Sopenharmony_ci enum pipe_format format, 1487bf215546Sopenharmony_ci const VkImageSubresourceLayers *subres, 1488bf215546Sopenharmony_ci uint32_t layer) 1489bf215546Sopenharmony_ci{ 1490bf215546Sopenharmony_ci tu_image_view_copy_blit(iview, image, format, subres, layer, false); 1491bf215546Sopenharmony_ci} 1492bf215546Sopenharmony_ci 1493bf215546Sopenharmony_cistatic void 1494bf215546Sopenharmony_citu_image_view_blit(struct fdl6_view *iview, 1495bf215546Sopenharmony_ci struct tu_image *image, 1496bf215546Sopenharmony_ci const VkImageSubresourceLayers *subres, 1497bf215546Sopenharmony_ci uint32_t layer) 1498bf215546Sopenharmony_ci{ 1499bf215546Sopenharmony_ci enum pipe_format format = 1500bf215546Sopenharmony_ci tu6_plane_format(image->vk.format, tu6_plane_index(image->vk.format, 1501bf215546Sopenharmony_ci subres->aspectMask)); 1502bf215546Sopenharmony_ci tu_image_view_copy_blit(iview, image, format, subres, layer, false); 1503bf215546Sopenharmony_ci} 1504bf215546Sopenharmony_ci 1505bf215546Sopenharmony_cistatic void 1506bf215546Sopenharmony_citu6_blit_image(struct tu_cmd_buffer *cmd, 1507bf215546Sopenharmony_ci struct tu_image *src_image, 1508bf215546Sopenharmony_ci struct tu_image *dst_image, 1509bf215546Sopenharmony_ci const VkImageBlit2 *info, 1510bf215546Sopenharmony_ci VkFilter filter) 1511bf215546Sopenharmony_ci{ 1512bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 1513bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->cs; 1514bf215546Sopenharmony_ci bool z_scale = false; 1515bf215546Sopenharmony_ci uint32_t layers = info->dstOffsets[1].z - info->dstOffsets[0].z; 1516bf215546Sopenharmony_ci 1517bf215546Sopenharmony_ci /* 2D blit can't do rotation mirroring from just coordinates */ 1518bf215546Sopenharmony_ci static const enum a6xx_rotation rotate[2][2] = { 1519bf215546Sopenharmony_ci {ROTATE_0, ROTATE_HFLIP}, 1520bf215546Sopenharmony_ci {ROTATE_VFLIP, ROTATE_180}, 1521bf215546Sopenharmony_ci }; 1522bf215546Sopenharmony_ci 1523bf215546Sopenharmony_ci bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) != 1524bf215546Sopenharmony_ci (info->dstOffsets[1].x < info->dstOffsets[0].x); 1525bf215546Sopenharmony_ci bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) != 1526bf215546Sopenharmony_ci (info->dstOffsets[1].y < info->dstOffsets[0].y); 1527bf215546Sopenharmony_ci 1528bf215546Sopenharmony_ci int32_t src0_z = info->srcOffsets[0].z; 1529bf215546Sopenharmony_ci int32_t src1_z = info->srcOffsets[1].z; 1530bf215546Sopenharmony_ci 1531bf215546Sopenharmony_ci if ((info->srcOffsets[1].z - info->srcOffsets[0].z != 1532bf215546Sopenharmony_ci info->dstOffsets[1].z - info->dstOffsets[0].z) || 1533bf215546Sopenharmony_ci info->srcOffsets[1].z < info->srcOffsets[0].z) { 1534bf215546Sopenharmony_ci z_scale = true; 1535bf215546Sopenharmony_ci } 1536bf215546Sopenharmony_ci 1537bf215546Sopenharmony_ci if (info->dstOffsets[1].z < info->dstOffsets[0].z) { 1538bf215546Sopenharmony_ci layers = info->dstOffsets[0].z - info->dstOffsets[1].z; 1539bf215546Sopenharmony_ci src0_z = info->srcOffsets[1].z; 1540bf215546Sopenharmony_ci src1_z = info->srcOffsets[0].z; 1541bf215546Sopenharmony_ci } 1542bf215546Sopenharmony_ci 1543bf215546Sopenharmony_ci if (info->dstSubresource.layerCount > 1) { 1544bf215546Sopenharmony_ci assert(layers <= 1); 1545bf215546Sopenharmony_ci layers = info->dstSubresource.layerCount; 1546bf215546Sopenharmony_ci } 1547bf215546Sopenharmony_ci 1548bf215546Sopenharmony_ci /* BC1_RGB_* formats need to have their last components overriden with 1 1549bf215546Sopenharmony_ci * when sampling, which is normally handled with the texture descriptor 1550bf215546Sopenharmony_ci * swizzle. The 2d path can't handle that, so use the 3d path. 1551bf215546Sopenharmony_ci * 1552bf215546Sopenharmony_ci * TODO: we could use RB_2D_BLIT_CNTL::MASK to make these formats work with 1553bf215546Sopenharmony_ci * the 2d path. 1554bf215546Sopenharmony_ci */ 1555bf215546Sopenharmony_ci 1556bf215546Sopenharmony_ci unsigned blit_param = rotate[mirror_y][mirror_x]; 1557bf215546Sopenharmony_ci if (dst_image->layout[0].nr_samples > 1 || 1558bf215546Sopenharmony_ci src_image->vk.format == VK_FORMAT_BC1_RGB_UNORM_BLOCK || 1559bf215546Sopenharmony_ci src_image->vk.format == VK_FORMAT_BC1_RGB_SRGB_BLOCK || 1560bf215546Sopenharmony_ci filter == VK_FILTER_CUBIC_EXT || 1561bf215546Sopenharmony_ci z_scale) { 1562bf215546Sopenharmony_ci ops = &r3d_ops; 1563bf215546Sopenharmony_ci blit_param = z_scale; 1564bf215546Sopenharmony_ci } 1565bf215546Sopenharmony_ci 1566bf215546Sopenharmony_ci /* use the right format in setup() for D32_S8 1567bf215546Sopenharmony_ci * TODO: this probably should use a helper 1568bf215546Sopenharmony_ci */ 1569bf215546Sopenharmony_ci enum pipe_format src_format = 1570bf215546Sopenharmony_ci tu6_plane_format(src_image->vk.format, 1571bf215546Sopenharmony_ci tu6_plane_index(src_image->vk.format, 1572bf215546Sopenharmony_ci info->srcSubresource.aspectMask)); 1573bf215546Sopenharmony_ci enum pipe_format dst_format = 1574bf215546Sopenharmony_ci tu6_plane_format(dst_image->vk.format, 1575bf215546Sopenharmony_ci tu6_plane_index(src_image->vk.format, 1576bf215546Sopenharmony_ci info->srcSubresource.aspectMask)); 1577bf215546Sopenharmony_ci trace_start_blit(&cmd->trace, cs); 1578bf215546Sopenharmony_ci 1579bf215546Sopenharmony_ci ops->setup(cmd, cs, src_format, dst_format, info->dstSubresource.aspectMask, 1580bf215546Sopenharmony_ci blit_param, false, dst_image->layout[0].ubwc, 1581bf215546Sopenharmony_ci dst_image->layout[0].nr_samples); 1582bf215546Sopenharmony_ci 1583bf215546Sopenharmony_ci if (ops == &r3d_ops) { 1584bf215546Sopenharmony_ci r3d_coords_raw(cs, (float[]) { 1585bf215546Sopenharmony_ci info->dstOffsets[0].x, info->dstOffsets[0].y, 1586bf215546Sopenharmony_ci info->srcOffsets[0].x, info->srcOffsets[0].y, 1587bf215546Sopenharmony_ci info->dstOffsets[1].x, info->dstOffsets[1].y, 1588bf215546Sopenharmony_ci info->srcOffsets[1].x, info->srcOffsets[1].y 1589bf215546Sopenharmony_ci }); 1590bf215546Sopenharmony_ci } else { 1591bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 1592bf215546Sopenharmony_ci A6XX_GRAS_2D_DST_TL(.x = MIN2(info->dstOffsets[0].x, info->dstOffsets[1].x), 1593bf215546Sopenharmony_ci .y = MIN2(info->dstOffsets[0].y, info->dstOffsets[1].y)), 1594bf215546Sopenharmony_ci A6XX_GRAS_2D_DST_BR(.x = MAX2(info->dstOffsets[0].x, info->dstOffsets[1].x) - 1, 1595bf215546Sopenharmony_ci .y = MAX2(info->dstOffsets[0].y, info->dstOffsets[1].y) - 1)); 1596bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 1597bf215546Sopenharmony_ci A6XX_GRAS_2D_SRC_TL_X(MIN2(info->srcOffsets[0].x, info->srcOffsets[1].x)), 1598bf215546Sopenharmony_ci A6XX_GRAS_2D_SRC_BR_X(MAX2(info->srcOffsets[0].x, info->srcOffsets[1].x) - 1), 1599bf215546Sopenharmony_ci A6XX_GRAS_2D_SRC_TL_Y(MIN2(info->srcOffsets[0].y, info->srcOffsets[1].y)), 1600bf215546Sopenharmony_ci A6XX_GRAS_2D_SRC_BR_Y(MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1)); 1601bf215546Sopenharmony_ci } 1602bf215546Sopenharmony_ci 1603bf215546Sopenharmony_ci struct fdl6_view dst, src; 1604bf215546Sopenharmony_ci tu_image_view_blit(&dst, dst_image, &info->dstSubresource, 1605bf215546Sopenharmony_ci MIN2(info->dstOffsets[0].z, info->dstOffsets[1].z)); 1606bf215546Sopenharmony_ci 1607bf215546Sopenharmony_ci if (z_scale) { 1608bf215546Sopenharmony_ci tu_image_view_copy_blit(&src, src_image, src_format, 1609bf215546Sopenharmony_ci &info->srcSubresource, 0, true); 1610bf215546Sopenharmony_ci ops->src(cmd, cs, &src, 0, filter, dst_format); 1611bf215546Sopenharmony_ci } else { 1612bf215546Sopenharmony_ci tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffsets[0].z); 1613bf215546Sopenharmony_ci } 1614bf215546Sopenharmony_ci 1615bf215546Sopenharmony_ci for (uint32_t i = 0; i < layers; i++) { 1616bf215546Sopenharmony_ci if (z_scale) { 1617bf215546Sopenharmony_ci float t = ((float) i + 0.5f) / (float) layers; 1618bf215546Sopenharmony_ci r3d_coord_z(cs, t * (src1_z - src0_z) + src0_z); 1619bf215546Sopenharmony_ci } else { 1620bf215546Sopenharmony_ci ops->src(cmd, cs, &src, i, filter, dst_format); 1621bf215546Sopenharmony_ci } 1622bf215546Sopenharmony_ci ops->dst(cs, &dst, i, src_format); 1623bf215546Sopenharmony_ci ops->run(cmd, cs); 1624bf215546Sopenharmony_ci } 1625bf215546Sopenharmony_ci 1626bf215546Sopenharmony_ci ops->teardown(cmd, cs); 1627bf215546Sopenharmony_ci 1628bf215546Sopenharmony_ci trace_end_blit(&cmd->trace, cs, 1629bf215546Sopenharmony_ci ops == &r3d_ops, 1630bf215546Sopenharmony_ci src_image->vk.format, 1631bf215546Sopenharmony_ci dst_image->vk.format, 1632bf215546Sopenharmony_ci layers); 1633bf215546Sopenharmony_ci} 1634bf215546Sopenharmony_ci 1635bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1636bf215546Sopenharmony_citu_CmdBlitImage2KHR(VkCommandBuffer commandBuffer, 1637bf215546Sopenharmony_ci const VkBlitImageInfo2* pBlitImageInfo) 1638bf215546Sopenharmony_ci 1639bf215546Sopenharmony_ci{ 1640bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1641bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image, src_image, pBlitImageInfo->srcImage); 1642bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image, dst_image, pBlitImageInfo->dstImage); 1643bf215546Sopenharmony_ci 1644bf215546Sopenharmony_ci for (uint32_t i = 0; i < pBlitImageInfo->regionCount; ++i) { 1645bf215546Sopenharmony_ci /* can't blit both depth and stencil at once with D32_S8 1646bf215546Sopenharmony_ci * TODO: more advanced 3D blit path to support it instead? 1647bf215546Sopenharmony_ci */ 1648bf215546Sopenharmony_ci if (src_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT || 1649bf215546Sopenharmony_ci dst_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 1650bf215546Sopenharmony_ci VkImageBlit2 region = pBlitImageInfo->pRegions[i]; 1651bf215546Sopenharmony_ci u_foreach_bit(b, region.dstSubresource.aspectMask) { 1652bf215546Sopenharmony_ci region.srcSubresource.aspectMask = BIT(b); 1653bf215546Sopenharmony_ci region.dstSubresource.aspectMask = BIT(b); 1654bf215546Sopenharmony_ci tu6_blit_image(cmd, src_image, dst_image, ®ion, pBlitImageInfo->filter); 1655bf215546Sopenharmony_ci } 1656bf215546Sopenharmony_ci continue; 1657bf215546Sopenharmony_ci } 1658bf215546Sopenharmony_ci tu6_blit_image(cmd, src_image, dst_image, pBlitImageInfo->pRegions + i, 1659bf215546Sopenharmony_ci pBlitImageInfo->filter); 1660bf215546Sopenharmony_ci } 1661bf215546Sopenharmony_ci 1662bf215546Sopenharmony_ci if (dst_image->lrz_height) { 1663bf215546Sopenharmony_ci tu_disable_lrz(cmd, &cmd->cs, dst_image); 1664bf215546Sopenharmony_ci } 1665bf215546Sopenharmony_ci} 1666bf215546Sopenharmony_ci 1667bf215546Sopenharmony_cistatic void 1668bf215546Sopenharmony_cicopy_compressed(VkFormat format, 1669bf215546Sopenharmony_ci VkOffset3D *offset, 1670bf215546Sopenharmony_ci VkExtent3D *extent, 1671bf215546Sopenharmony_ci uint32_t *width, 1672bf215546Sopenharmony_ci uint32_t *height) 1673bf215546Sopenharmony_ci{ 1674bf215546Sopenharmony_ci if (!vk_format_is_compressed(format)) 1675bf215546Sopenharmony_ci return; 1676bf215546Sopenharmony_ci 1677bf215546Sopenharmony_ci uint32_t block_width = vk_format_get_blockwidth(format); 1678bf215546Sopenharmony_ci uint32_t block_height = vk_format_get_blockheight(format); 1679bf215546Sopenharmony_ci 1680bf215546Sopenharmony_ci offset->x /= block_width; 1681bf215546Sopenharmony_ci offset->y /= block_height; 1682bf215546Sopenharmony_ci 1683bf215546Sopenharmony_ci if (extent) { 1684bf215546Sopenharmony_ci extent->width = DIV_ROUND_UP(extent->width, block_width); 1685bf215546Sopenharmony_ci extent->height = DIV_ROUND_UP(extent->height, block_height); 1686bf215546Sopenharmony_ci } 1687bf215546Sopenharmony_ci if (width) 1688bf215546Sopenharmony_ci *width = DIV_ROUND_UP(*width, block_width); 1689bf215546Sopenharmony_ci if (height) 1690bf215546Sopenharmony_ci *height = DIV_ROUND_UP(*height, block_height); 1691bf215546Sopenharmony_ci} 1692bf215546Sopenharmony_ci 1693bf215546Sopenharmony_cistatic void 1694bf215546Sopenharmony_citu_copy_buffer_to_image(struct tu_cmd_buffer *cmd, 1695bf215546Sopenharmony_ci struct tu_buffer *src_buffer, 1696bf215546Sopenharmony_ci struct tu_image *dst_image, 1697bf215546Sopenharmony_ci const VkBufferImageCopy2 *info) 1698bf215546Sopenharmony_ci{ 1699bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->cs; 1700bf215546Sopenharmony_ci uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount); 1701bf215546Sopenharmony_ci enum pipe_format src_format = 1702bf215546Sopenharmony_ci copy_format(dst_image->vk.format, info->imageSubresource.aspectMask); 1703bf215546Sopenharmony_ci enum pipe_format dst_format = 1704bf215546Sopenharmony_ci copy_format(dst_image->vk.format, info->imageSubresource.aspectMask); 1705bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 1706bf215546Sopenharmony_ci 1707bf215546Sopenharmony_ci /* special case for buffer to stencil */ 1708bf215546Sopenharmony_ci if (dst_image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT && 1709bf215546Sopenharmony_ci info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { 1710bf215546Sopenharmony_ci src_format = PIPE_FORMAT_S8_UINT; 1711bf215546Sopenharmony_ci } 1712bf215546Sopenharmony_ci 1713bf215546Sopenharmony_ci /* note: could use "R8_UNORM" when no UBWC */ 1714bf215546Sopenharmony_ci if (src_format == PIPE_FORMAT_Y8_UNORM) 1715bf215546Sopenharmony_ci ops = &r3d_ops; 1716bf215546Sopenharmony_ci 1717bf215546Sopenharmony_ci VkOffset3D offset = info->imageOffset; 1718bf215546Sopenharmony_ci VkExtent3D extent = info->imageExtent; 1719bf215546Sopenharmony_ci uint32_t src_width = info->bufferRowLength ?: extent.width; 1720bf215546Sopenharmony_ci uint32_t src_height = info->bufferImageHeight ?: extent.height; 1721bf215546Sopenharmony_ci 1722bf215546Sopenharmony_ci copy_compressed(dst_image->vk.format, &offset, &extent, &src_width, &src_height); 1723bf215546Sopenharmony_ci 1724bf215546Sopenharmony_ci uint32_t pitch = src_width * util_format_get_blocksize(src_format); 1725bf215546Sopenharmony_ci uint32_t layer_size = src_height * pitch; 1726bf215546Sopenharmony_ci 1727bf215546Sopenharmony_ci ops->setup(cmd, cs, src_format, dst_format, 1728bf215546Sopenharmony_ci info->imageSubresource.aspectMask, 0, false, dst_image->layout[0].ubwc, 1729bf215546Sopenharmony_ci dst_image->layout[0].nr_samples); 1730bf215546Sopenharmony_ci 1731bf215546Sopenharmony_ci struct fdl6_view dst; 1732bf215546Sopenharmony_ci tu_image_view_copy(&dst, dst_image, dst_format, &info->imageSubresource, offset.z); 1733bf215546Sopenharmony_ci 1734bf215546Sopenharmony_ci for (uint32_t i = 0; i < layers; i++) { 1735bf215546Sopenharmony_ci ops->dst(cs, &dst, i, src_format); 1736bf215546Sopenharmony_ci 1737bf215546Sopenharmony_ci uint64_t src_va = src_buffer->iova + info->bufferOffset + layer_size * i; 1738bf215546Sopenharmony_ci if ((src_va & 63) || (pitch & 63)) { 1739bf215546Sopenharmony_ci for (uint32_t y = 0; y < extent.height; y++) { 1740bf215546Sopenharmony_ci uint32_t x = (src_va & 63) / util_format_get_blocksize(src_format); 1741bf215546Sopenharmony_ci ops->src_buffer(cmd, cs, src_format, src_va & ~63, pitch, 1742bf215546Sopenharmony_ci x + extent.width, 1, dst_format); 1743bf215546Sopenharmony_ci ops->coords(cs, &(VkOffset2D){offset.x, offset.y + y}, &(VkOffset2D){x}, 1744bf215546Sopenharmony_ci &(VkExtent2D) {extent.width, 1}); 1745bf215546Sopenharmony_ci ops->run(cmd, cs); 1746bf215546Sopenharmony_ci src_va += pitch; 1747bf215546Sopenharmony_ci } 1748bf215546Sopenharmony_ci } else { 1749bf215546Sopenharmony_ci ops->src_buffer(cmd, cs, src_format, src_va, pitch, extent.width, extent.height, dst_format); 1750bf215546Sopenharmony_ci coords(ops, cs, &offset, &(VkOffset3D){}, &extent); 1751bf215546Sopenharmony_ci ops->run(cmd, cs); 1752bf215546Sopenharmony_ci } 1753bf215546Sopenharmony_ci } 1754bf215546Sopenharmony_ci 1755bf215546Sopenharmony_ci ops->teardown(cmd, cs); 1756bf215546Sopenharmony_ci} 1757bf215546Sopenharmony_ci 1758bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1759bf215546Sopenharmony_citu_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer, 1760bf215546Sopenharmony_ci const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) 1761bf215546Sopenharmony_ci{ 1762bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1763bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image, dst_image, pCopyBufferToImageInfo->dstImage); 1764bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer); 1765bf215546Sopenharmony_ci 1766bf215546Sopenharmony_ci for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; ++i) 1767bf215546Sopenharmony_ci tu_copy_buffer_to_image(cmd, src_buffer, dst_image, 1768bf215546Sopenharmony_ci pCopyBufferToImageInfo->pRegions + i); 1769bf215546Sopenharmony_ci 1770bf215546Sopenharmony_ci if (dst_image->lrz_height) { 1771bf215546Sopenharmony_ci tu_disable_lrz(cmd, &cmd->cs, dst_image); 1772bf215546Sopenharmony_ci } 1773bf215546Sopenharmony_ci} 1774bf215546Sopenharmony_ci 1775bf215546Sopenharmony_cistatic void 1776bf215546Sopenharmony_citu_copy_image_to_buffer(struct tu_cmd_buffer *cmd, 1777bf215546Sopenharmony_ci struct tu_image *src_image, 1778bf215546Sopenharmony_ci struct tu_buffer *dst_buffer, 1779bf215546Sopenharmony_ci const VkBufferImageCopy2 *info) 1780bf215546Sopenharmony_ci{ 1781bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->cs; 1782bf215546Sopenharmony_ci uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount); 1783bf215546Sopenharmony_ci enum pipe_format dst_format = 1784bf215546Sopenharmony_ci copy_format(src_image->vk.format, info->imageSubresource.aspectMask); 1785bf215546Sopenharmony_ci enum pipe_format src_format = 1786bf215546Sopenharmony_ci copy_format(src_image->vk.format, info->imageSubresource.aspectMask); 1787bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 1788bf215546Sopenharmony_ci 1789bf215546Sopenharmony_ci if (src_image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT && 1790bf215546Sopenharmony_ci info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) { 1791bf215546Sopenharmony_ci dst_format = PIPE_FORMAT_S8_UINT; 1792bf215546Sopenharmony_ci } 1793bf215546Sopenharmony_ci 1794bf215546Sopenharmony_ci /* note: could use "R8_UNORM" when no UBWC */ 1795bf215546Sopenharmony_ci if (dst_format == PIPE_FORMAT_Y8_UNORM) 1796bf215546Sopenharmony_ci ops = &r3d_ops; 1797bf215546Sopenharmony_ci 1798bf215546Sopenharmony_ci VkOffset3D offset = info->imageOffset; 1799bf215546Sopenharmony_ci VkExtent3D extent = info->imageExtent; 1800bf215546Sopenharmony_ci uint32_t dst_width = info->bufferRowLength ?: extent.width; 1801bf215546Sopenharmony_ci uint32_t dst_height = info->bufferImageHeight ?: extent.height; 1802bf215546Sopenharmony_ci 1803bf215546Sopenharmony_ci copy_compressed(src_image->vk.format, &offset, &extent, &dst_width, &dst_height); 1804bf215546Sopenharmony_ci 1805bf215546Sopenharmony_ci uint32_t pitch = dst_width * util_format_get_blocksize(dst_format); 1806bf215546Sopenharmony_ci uint32_t layer_size = pitch * dst_height; 1807bf215546Sopenharmony_ci 1808bf215546Sopenharmony_ci ops->setup(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false, 1809bf215546Sopenharmony_ci VK_SAMPLE_COUNT_1_BIT); 1810bf215546Sopenharmony_ci 1811bf215546Sopenharmony_ci struct fdl6_view src; 1812bf215546Sopenharmony_ci tu_image_view_copy(&src, src_image, src_format, &info->imageSubresource, offset.z); 1813bf215546Sopenharmony_ci 1814bf215546Sopenharmony_ci for (uint32_t i = 0; i < layers; i++) { 1815bf215546Sopenharmony_ci ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, dst_format); 1816bf215546Sopenharmony_ci 1817bf215546Sopenharmony_ci uint64_t dst_va = dst_buffer->iova + info->bufferOffset + layer_size * i; 1818bf215546Sopenharmony_ci if ((dst_va & 63) || (pitch & 63)) { 1819bf215546Sopenharmony_ci for (uint32_t y = 0; y < extent.height; y++) { 1820bf215546Sopenharmony_ci uint32_t x = (dst_va & 63) / util_format_get_blocksize(dst_format); 1821bf215546Sopenharmony_ci ops->dst_buffer(cs, dst_format, dst_va & ~63, 0, src_format); 1822bf215546Sopenharmony_ci ops->coords(cs, &(VkOffset2D) {x}, &(VkOffset2D){offset.x, offset.y + y}, 1823bf215546Sopenharmony_ci &(VkExtent2D) {extent.width, 1}); 1824bf215546Sopenharmony_ci ops->run(cmd, cs); 1825bf215546Sopenharmony_ci dst_va += pitch; 1826bf215546Sopenharmony_ci } 1827bf215546Sopenharmony_ci } else { 1828bf215546Sopenharmony_ci ops->dst_buffer(cs, dst_format, dst_va, pitch, src_format); 1829bf215546Sopenharmony_ci coords(ops, cs, &(VkOffset3D) {0, 0}, &offset, &extent); 1830bf215546Sopenharmony_ci ops->run(cmd, cs); 1831bf215546Sopenharmony_ci } 1832bf215546Sopenharmony_ci } 1833bf215546Sopenharmony_ci 1834bf215546Sopenharmony_ci ops->teardown(cmd, cs); 1835bf215546Sopenharmony_ci} 1836bf215546Sopenharmony_ci 1837bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 1838bf215546Sopenharmony_citu_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer, 1839bf215546Sopenharmony_ci const VkCopyImageToBufferInfo2* pCopyImageToBufferInfo) 1840bf215546Sopenharmony_ci{ 1841bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 1842bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image, src_image, pCopyImageToBufferInfo->srcImage); 1843bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer); 1844bf215546Sopenharmony_ci 1845bf215546Sopenharmony_ci for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; ++i) 1846bf215546Sopenharmony_ci tu_copy_image_to_buffer(cmd, src_image, dst_buffer, 1847bf215546Sopenharmony_ci pCopyImageToBufferInfo->pRegions + i); 1848bf215546Sopenharmony_ci} 1849bf215546Sopenharmony_ci 1850bf215546Sopenharmony_ci/* Tiled formats don't support swapping, which means that we can't support 1851bf215546Sopenharmony_ci * formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some 1852bf215546Sopenharmony_ci * formats like B5G5R5A1 have a separate linear-only format when sampling. 1853bf215546Sopenharmony_ci * Currently we fake support for tiled swapped formats and use the unswapped 1854bf215546Sopenharmony_ci * format instead, but this means that reinterpreting copies to and from 1855bf215546Sopenharmony_ci * swapped formats can't be performed correctly unless we can swizzle the 1856bf215546Sopenharmony_ci * components by reinterpreting the other image as the "correct" swapped 1857bf215546Sopenharmony_ci * format, i.e. only when the other image is linear. 1858bf215546Sopenharmony_ci */ 1859bf215546Sopenharmony_ci 1860bf215546Sopenharmony_cistatic bool 1861bf215546Sopenharmony_ciis_swapped_format(enum pipe_format format) 1862bf215546Sopenharmony_ci{ 1863bf215546Sopenharmony_ci struct tu_native_format linear = tu6_format_texture(format, TILE6_LINEAR); 1864bf215546Sopenharmony_ci struct tu_native_format tiled = tu6_format_texture(format, TILE6_3); 1865bf215546Sopenharmony_ci return linear.fmt != tiled.fmt || linear.swap != tiled.swap; 1866bf215546Sopenharmony_ci} 1867bf215546Sopenharmony_ci 1868bf215546Sopenharmony_ci/* R8G8_* formats have a different tiling layout than other cpp=2 formats, and 1869bf215546Sopenharmony_ci * therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice 1870bf215546Sopenharmony_ci * versa). This should mirror the logic in fdl6_layout. 1871bf215546Sopenharmony_ci */ 1872bf215546Sopenharmony_cistatic bool 1873bf215546Sopenharmony_ciimage_is_r8g8(struct tu_image *image) 1874bf215546Sopenharmony_ci{ 1875bf215546Sopenharmony_ci return image->layout[0].cpp == 2 && 1876bf215546Sopenharmony_ci vk_format_get_nr_components(image->vk.format) == 2; 1877bf215546Sopenharmony_ci} 1878bf215546Sopenharmony_ci 1879bf215546Sopenharmony_cistatic void 1880bf215546Sopenharmony_citu_copy_image_to_image(struct tu_cmd_buffer *cmd, 1881bf215546Sopenharmony_ci struct tu_image *src_image, 1882bf215546Sopenharmony_ci struct tu_image *dst_image, 1883bf215546Sopenharmony_ci const VkImageCopy2 *info) 1884bf215546Sopenharmony_ci{ 1885bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 1886bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->cs; 1887bf215546Sopenharmony_ci 1888bf215546Sopenharmony_ci if (dst_image->layout[0].nr_samples > 1) 1889bf215546Sopenharmony_ci ops = &r3d_ops; 1890bf215546Sopenharmony_ci 1891bf215546Sopenharmony_ci enum pipe_format format = PIPE_FORMAT_NONE; 1892bf215546Sopenharmony_ci VkOffset3D src_offset = info->srcOffset; 1893bf215546Sopenharmony_ci VkOffset3D dst_offset = info->dstOffset; 1894bf215546Sopenharmony_ci VkExtent3D extent = info->extent; 1895bf215546Sopenharmony_ci uint32_t layers_to_copy = MAX2(info->extent.depth, info->srcSubresource.layerCount); 1896bf215546Sopenharmony_ci 1897bf215546Sopenharmony_ci /* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between 1898bf215546Sopenharmony_ci * Images": 1899bf215546Sopenharmony_ci * 1900bf215546Sopenharmony_ci * When copying between compressed and uncompressed formats the extent 1901bf215546Sopenharmony_ci * members represent the texel dimensions of the source image and not 1902bf215546Sopenharmony_ci * the destination. When copying from a compressed image to an 1903bf215546Sopenharmony_ci * uncompressed image the image texel dimensions written to the 1904bf215546Sopenharmony_ci * uncompressed image will be source extent divided by the compressed 1905bf215546Sopenharmony_ci * texel block dimensions. When copying from an uncompressed image to a 1906bf215546Sopenharmony_ci * compressed image the image texel dimensions written to the compressed 1907bf215546Sopenharmony_ci * image will be the source extent multiplied by the compressed texel 1908bf215546Sopenharmony_ci * block dimensions. 1909bf215546Sopenharmony_ci * 1910bf215546Sopenharmony_ci * This means we only have to adjust the extent if the source image is 1911bf215546Sopenharmony_ci * compressed. 1912bf215546Sopenharmony_ci */ 1913bf215546Sopenharmony_ci copy_compressed(src_image->vk.format, &src_offset, &extent, NULL, NULL); 1914bf215546Sopenharmony_ci copy_compressed(dst_image->vk.format, &dst_offset, NULL, NULL, NULL); 1915bf215546Sopenharmony_ci 1916bf215546Sopenharmony_ci enum pipe_format dst_format = copy_format(dst_image->vk.format, info->dstSubresource.aspectMask); 1917bf215546Sopenharmony_ci enum pipe_format src_format = copy_format(src_image->vk.format, info->srcSubresource.aspectMask); 1918bf215546Sopenharmony_ci 1919bf215546Sopenharmony_ci /* note: could use "R8_UNORM" when no UBWC */ 1920bf215546Sopenharmony_ci if (dst_format == PIPE_FORMAT_Y8_UNORM || 1921bf215546Sopenharmony_ci src_format == PIPE_FORMAT_Y8_UNORM) 1922bf215546Sopenharmony_ci ops = &r3d_ops; 1923bf215546Sopenharmony_ci 1924bf215546Sopenharmony_ci bool use_staging_blit = false; 1925bf215546Sopenharmony_ci 1926bf215546Sopenharmony_ci if (src_format == dst_format) { 1927bf215546Sopenharmony_ci /* Images that share a format can always be copied directly because it's 1928bf215546Sopenharmony_ci * the same as a blit. 1929bf215546Sopenharmony_ci */ 1930bf215546Sopenharmony_ci format = src_format; 1931bf215546Sopenharmony_ci } else if (!src_image->layout[0].tile_mode) { 1932bf215546Sopenharmony_ci /* If an image is linear, we can always safely reinterpret it with the 1933bf215546Sopenharmony_ci * other image's format and then do a regular blit. 1934bf215546Sopenharmony_ci */ 1935bf215546Sopenharmony_ci format = dst_format; 1936bf215546Sopenharmony_ci } else if (!dst_image->layout[0].tile_mode) { 1937bf215546Sopenharmony_ci format = src_format; 1938bf215546Sopenharmony_ci } else if (image_is_r8g8(src_image) != image_is_r8g8(dst_image)) { 1939bf215546Sopenharmony_ci /* We can't currently copy r8g8 images to/from other cpp=2 images, 1940bf215546Sopenharmony_ci * due to the different tile layout. 1941bf215546Sopenharmony_ci */ 1942bf215546Sopenharmony_ci use_staging_blit = true; 1943bf215546Sopenharmony_ci } else if (is_swapped_format(src_format) || 1944bf215546Sopenharmony_ci is_swapped_format(dst_format)) { 1945bf215546Sopenharmony_ci /* If either format has a non-identity swap, then we can't copy 1946bf215546Sopenharmony_ci * to/from it. 1947bf215546Sopenharmony_ci */ 1948bf215546Sopenharmony_ci use_staging_blit = true; 1949bf215546Sopenharmony_ci } else if (!src_image->layout[0].ubwc) { 1950bf215546Sopenharmony_ci format = dst_format; 1951bf215546Sopenharmony_ci } else if (!dst_image->layout[0].ubwc) { 1952bf215546Sopenharmony_ci format = src_format; 1953bf215546Sopenharmony_ci } else { 1954bf215546Sopenharmony_ci /* Both formats use UBWC and so neither can be reinterpreted. 1955bf215546Sopenharmony_ci * TODO: We could do an in-place decompression of the dst instead. 1956bf215546Sopenharmony_ci */ 1957bf215546Sopenharmony_ci perf_debug(cmd->device, "TODO: Do in-place UBWC decompression for UBWC->UBWC blits"); 1958bf215546Sopenharmony_ci use_staging_blit = true; 1959bf215546Sopenharmony_ci } 1960bf215546Sopenharmony_ci 1961bf215546Sopenharmony_ci struct fdl6_view dst, src; 1962bf215546Sopenharmony_ci 1963bf215546Sopenharmony_ci if (use_staging_blit) { 1964bf215546Sopenharmony_ci tu_image_view_copy(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z); 1965bf215546Sopenharmony_ci tu_image_view_copy(&src, src_image, src_format, &info->srcSubresource, src_offset.z); 1966bf215546Sopenharmony_ci 1967bf215546Sopenharmony_ci struct fdl_layout staging_layout = { 0 }; 1968bf215546Sopenharmony_ci VkOffset3D staging_offset = { 0 }; 1969bf215546Sopenharmony_ci 1970bf215546Sopenharmony_ci staging_layout.tile_mode = TILE6_LINEAR; 1971bf215546Sopenharmony_ci staging_layout.ubwc = false; 1972bf215546Sopenharmony_ci 1973bf215546Sopenharmony_ci fdl6_layout(&staging_layout, 1974bf215546Sopenharmony_ci src_format, 1975bf215546Sopenharmony_ci src_image->layout[0].nr_samples, 1976bf215546Sopenharmony_ci extent.width, 1977bf215546Sopenharmony_ci extent.height, 1978bf215546Sopenharmony_ci extent.depth, 1979bf215546Sopenharmony_ci 1, 1980bf215546Sopenharmony_ci info->srcSubresource.layerCount, 1981bf215546Sopenharmony_ci extent.depth > 1, 1982bf215546Sopenharmony_ci NULL); 1983bf215546Sopenharmony_ci 1984bf215546Sopenharmony_ci struct tu_bo *staging_bo; 1985bf215546Sopenharmony_ci VkResult result = tu_get_scratch_bo(cmd->device, 1986bf215546Sopenharmony_ci staging_layout.size, 1987bf215546Sopenharmony_ci &staging_bo); 1988bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 1989bf215546Sopenharmony_ci cmd->record_result = result; 1990bf215546Sopenharmony_ci return; 1991bf215546Sopenharmony_ci } 1992bf215546Sopenharmony_ci 1993bf215546Sopenharmony_ci struct fdl6_view staging; 1994bf215546Sopenharmony_ci const struct fdl_layout *staging_layout_ptr = &staging_layout; 1995bf215546Sopenharmony_ci fdl6_view_init(&staging, &staging_layout_ptr, &(struct fdl_view_args) { 1996bf215546Sopenharmony_ci .iova = staging_bo->iova, 1997bf215546Sopenharmony_ci .base_array_layer = 0, 1998bf215546Sopenharmony_ci .layer_count = 1, 1999bf215546Sopenharmony_ci .base_miplevel = 0, 2000bf215546Sopenharmony_ci .level_count = info->srcSubresource.layerCount, 2001bf215546Sopenharmony_ci .format = tu_format_for_aspect(src_format, VK_IMAGE_ASPECT_COLOR_BIT), 2002bf215546Sopenharmony_ci .swiz = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 2003bf215546Sopenharmony_ci .type = FDL_VIEW_TYPE_2D, 2004bf215546Sopenharmony_ci }, false); 2005bf215546Sopenharmony_ci 2006bf215546Sopenharmony_ci ops->setup(cmd, cs, src_format, src_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false, 2007bf215546Sopenharmony_ci dst_image->layout[0].nr_samples); 2008bf215546Sopenharmony_ci coords(ops, cs, &staging_offset, &src_offset, &extent); 2009bf215546Sopenharmony_ci 2010bf215546Sopenharmony_ci for (uint32_t i = 0; i < layers_to_copy; i++) { 2011bf215546Sopenharmony_ci ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, src_format); 2012bf215546Sopenharmony_ci ops->dst(cs, &staging, i, src_format); 2013bf215546Sopenharmony_ci ops->run(cmd, cs); 2014bf215546Sopenharmony_ci } 2015bf215546Sopenharmony_ci 2016bf215546Sopenharmony_ci /* When executed by the user there has to be a pipeline barrier here, 2017bf215546Sopenharmony_ci * but since we're doing it manually we'll have to flush ourselves. 2018bf215546Sopenharmony_ci */ 2019bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); 2020bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); 2021bf215546Sopenharmony_ci tu_cs_emit_wfi(cs); 2022bf215546Sopenharmony_ci 2023bf215546Sopenharmony_ci fdl6_view_init(&staging, &staging_layout_ptr, &(struct fdl_view_args) { 2024bf215546Sopenharmony_ci .iova = staging_bo->iova, 2025bf215546Sopenharmony_ci .base_array_layer = 0, 2026bf215546Sopenharmony_ci .layer_count = 1, 2027bf215546Sopenharmony_ci .base_miplevel = 0, 2028bf215546Sopenharmony_ci .level_count = info->srcSubresource.layerCount, 2029bf215546Sopenharmony_ci .format = tu_format_for_aspect(dst_format, VK_IMAGE_ASPECT_COLOR_BIT), 2030bf215546Sopenharmony_ci .swiz = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 2031bf215546Sopenharmony_ci .type = FDL_VIEW_TYPE_2D, 2032bf215546Sopenharmony_ci }, false); 2033bf215546Sopenharmony_ci 2034bf215546Sopenharmony_ci ops->setup(cmd, cs, dst_format, dst_format, info->dstSubresource.aspectMask, 2035bf215546Sopenharmony_ci 0, false, dst_image->layout[0].ubwc, 2036bf215546Sopenharmony_ci dst_image->layout[0].nr_samples); 2037bf215546Sopenharmony_ci coords(ops, cs, &dst_offset, &staging_offset, &extent); 2038bf215546Sopenharmony_ci 2039bf215546Sopenharmony_ci for (uint32_t i = 0; i < layers_to_copy; i++) { 2040bf215546Sopenharmony_ci ops->src(cmd, cs, &staging, i, VK_FILTER_NEAREST, dst_format); 2041bf215546Sopenharmony_ci ops->dst(cs, &dst, i, dst_format); 2042bf215546Sopenharmony_ci ops->run(cmd, cs); 2043bf215546Sopenharmony_ci } 2044bf215546Sopenharmony_ci } else { 2045bf215546Sopenharmony_ci tu_image_view_copy(&dst, dst_image, format, &info->dstSubresource, dst_offset.z); 2046bf215546Sopenharmony_ci tu_image_view_copy(&src, src_image, format, &info->srcSubresource, src_offset.z); 2047bf215546Sopenharmony_ci 2048bf215546Sopenharmony_ci ops->setup(cmd, cs, format, format, info->dstSubresource.aspectMask, 2049bf215546Sopenharmony_ci 0, false, dst_image->layout[0].ubwc, 2050bf215546Sopenharmony_ci dst_image->layout[0].nr_samples); 2051bf215546Sopenharmony_ci coords(ops, cs, &dst_offset, &src_offset, &extent); 2052bf215546Sopenharmony_ci 2053bf215546Sopenharmony_ci for (uint32_t i = 0; i < layers_to_copy; i++) { 2054bf215546Sopenharmony_ci ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, format); 2055bf215546Sopenharmony_ci ops->dst(cs, &dst, i, format); 2056bf215546Sopenharmony_ci ops->run(cmd, cs); 2057bf215546Sopenharmony_ci } 2058bf215546Sopenharmony_ci } 2059bf215546Sopenharmony_ci 2060bf215546Sopenharmony_ci ops->teardown(cmd, cs); 2061bf215546Sopenharmony_ci} 2062bf215546Sopenharmony_ci 2063bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2064bf215546Sopenharmony_citu_CmdCopyImage2KHR(VkCommandBuffer commandBuffer, 2065bf215546Sopenharmony_ci const VkCopyImageInfo2* pCopyImageInfo) 2066bf215546Sopenharmony_ci{ 2067bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 2068bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image, src_image, pCopyImageInfo->srcImage); 2069bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image, dst_image, pCopyImageInfo->dstImage); 2070bf215546Sopenharmony_ci 2071bf215546Sopenharmony_ci for (uint32_t i = 0; i < pCopyImageInfo->regionCount; ++i) { 2072bf215546Sopenharmony_ci if (src_image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 2073bf215546Sopenharmony_ci VkImageCopy2 info = pCopyImageInfo->pRegions[i]; 2074bf215546Sopenharmony_ci u_foreach_bit(b, info.dstSubresource.aspectMask) { 2075bf215546Sopenharmony_ci info.srcSubresource.aspectMask = BIT(b); 2076bf215546Sopenharmony_ci info.dstSubresource.aspectMask = BIT(b); 2077bf215546Sopenharmony_ci tu_copy_image_to_image(cmd, src_image, dst_image, &info); 2078bf215546Sopenharmony_ci } 2079bf215546Sopenharmony_ci continue; 2080bf215546Sopenharmony_ci } 2081bf215546Sopenharmony_ci 2082bf215546Sopenharmony_ci tu_copy_image_to_image(cmd, src_image, dst_image, 2083bf215546Sopenharmony_ci pCopyImageInfo->pRegions + i); 2084bf215546Sopenharmony_ci } 2085bf215546Sopenharmony_ci 2086bf215546Sopenharmony_ci if (dst_image->lrz_height) { 2087bf215546Sopenharmony_ci tu_disable_lrz(cmd, &cmd->cs, dst_image); 2088bf215546Sopenharmony_ci } 2089bf215546Sopenharmony_ci} 2090bf215546Sopenharmony_ci 2091bf215546Sopenharmony_cistatic void 2092bf215546Sopenharmony_cicopy_buffer(struct tu_cmd_buffer *cmd, 2093bf215546Sopenharmony_ci uint64_t dst_va, 2094bf215546Sopenharmony_ci uint64_t src_va, 2095bf215546Sopenharmony_ci uint64_t size, 2096bf215546Sopenharmony_ci uint32_t block_size) 2097bf215546Sopenharmony_ci{ 2098bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 2099bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->cs; 2100bf215546Sopenharmony_ci enum pipe_format format = block_size == 4 ? PIPE_FORMAT_R32_UINT : PIPE_FORMAT_R8_UNORM; 2101bf215546Sopenharmony_ci uint64_t blocks = size / block_size; 2102bf215546Sopenharmony_ci 2103bf215546Sopenharmony_ci ops->setup(cmd, cs, format, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false, 2104bf215546Sopenharmony_ci VK_SAMPLE_COUNT_1_BIT); 2105bf215546Sopenharmony_ci 2106bf215546Sopenharmony_ci while (blocks) { 2107bf215546Sopenharmony_ci uint32_t src_x = (src_va & 63) / block_size; 2108bf215546Sopenharmony_ci uint32_t dst_x = (dst_va & 63) / block_size; 2109bf215546Sopenharmony_ci uint32_t width = MIN2(MIN2(blocks, 0x4000 - src_x), 0x4000 - dst_x); 2110bf215546Sopenharmony_ci 2111bf215546Sopenharmony_ci ops->src_buffer(cmd, cs, format, src_va & ~63, 0, src_x + width, 1, format); 2112bf215546Sopenharmony_ci ops->dst_buffer( cs, format, dst_va & ~63, 0, format); 2113bf215546Sopenharmony_ci ops->coords(cs, &(VkOffset2D) {dst_x}, &(VkOffset2D) {src_x}, &(VkExtent2D) {width, 1}); 2114bf215546Sopenharmony_ci ops->run(cmd, cs); 2115bf215546Sopenharmony_ci 2116bf215546Sopenharmony_ci src_va += width * block_size; 2117bf215546Sopenharmony_ci dst_va += width * block_size; 2118bf215546Sopenharmony_ci blocks -= width; 2119bf215546Sopenharmony_ci } 2120bf215546Sopenharmony_ci 2121bf215546Sopenharmony_ci ops->teardown(cmd, cs); 2122bf215546Sopenharmony_ci} 2123bf215546Sopenharmony_ci 2124bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2125bf215546Sopenharmony_citu_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer, 2126bf215546Sopenharmony_ci const VkCopyBufferInfo2 *pCopyBufferInfo) 2127bf215546Sopenharmony_ci{ 2128bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 2129bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_buffer, src_buffer, pCopyBufferInfo->srcBuffer); 2130bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_buffer, dst_buffer, pCopyBufferInfo->dstBuffer); 2131bf215546Sopenharmony_ci 2132bf215546Sopenharmony_ci for (unsigned i = 0; i < pCopyBufferInfo->regionCount; ++i) { 2133bf215546Sopenharmony_ci const VkBufferCopy2 *region = &pCopyBufferInfo->pRegions[i]; 2134bf215546Sopenharmony_ci copy_buffer(cmd, 2135bf215546Sopenharmony_ci dst_buffer->iova + region->dstOffset, 2136bf215546Sopenharmony_ci src_buffer->iova + region->srcOffset, 2137bf215546Sopenharmony_ci region->size, 1); 2138bf215546Sopenharmony_ci } 2139bf215546Sopenharmony_ci} 2140bf215546Sopenharmony_ci 2141bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2142bf215546Sopenharmony_citu_CmdUpdateBuffer(VkCommandBuffer commandBuffer, 2143bf215546Sopenharmony_ci VkBuffer dstBuffer, 2144bf215546Sopenharmony_ci VkDeviceSize dstOffset, 2145bf215546Sopenharmony_ci VkDeviceSize dataSize, 2146bf215546Sopenharmony_ci const void *pData) 2147bf215546Sopenharmony_ci{ 2148bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 2149bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer); 2150bf215546Sopenharmony_ci 2151bf215546Sopenharmony_ci struct tu_cs_memory tmp; 2152bf215546Sopenharmony_ci VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64 / 4, &tmp); 2153bf215546Sopenharmony_ci if (result != VK_SUCCESS) { 2154bf215546Sopenharmony_ci cmd->record_result = result; 2155bf215546Sopenharmony_ci return; 2156bf215546Sopenharmony_ci } 2157bf215546Sopenharmony_ci 2158bf215546Sopenharmony_ci memcpy(tmp.map, pData, dataSize); 2159bf215546Sopenharmony_ci copy_buffer(cmd, buffer->iova + dstOffset, tmp.iova, dataSize, 4); 2160bf215546Sopenharmony_ci} 2161bf215546Sopenharmony_ci 2162bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2163bf215546Sopenharmony_citu_CmdFillBuffer(VkCommandBuffer commandBuffer, 2164bf215546Sopenharmony_ci VkBuffer dstBuffer, 2165bf215546Sopenharmony_ci VkDeviceSize dstOffset, 2166bf215546Sopenharmony_ci VkDeviceSize fillSize, 2167bf215546Sopenharmony_ci uint32_t data) 2168bf215546Sopenharmony_ci{ 2169bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 2170bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer); 2171bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 2172bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->cs; 2173bf215546Sopenharmony_ci 2174bf215546Sopenharmony_ci if (fillSize == VK_WHOLE_SIZE) 2175bf215546Sopenharmony_ci fillSize = buffer->size - dstOffset; 2176bf215546Sopenharmony_ci 2177bf215546Sopenharmony_ci uint64_t dst_va = buffer->iova + dstOffset; 2178bf215546Sopenharmony_ci uint32_t blocks = fillSize / 4; 2179bf215546Sopenharmony_ci 2180bf215546Sopenharmony_ci ops->setup(cmd, cs, PIPE_FORMAT_R32_UINT, PIPE_FORMAT_R32_UINT, 2181bf215546Sopenharmony_ci VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false, 2182bf215546Sopenharmony_ci VK_SAMPLE_COUNT_1_BIT); 2183bf215546Sopenharmony_ci ops->clear_value(cs, PIPE_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}}); 2184bf215546Sopenharmony_ci 2185bf215546Sopenharmony_ci while (blocks) { 2186bf215546Sopenharmony_ci uint32_t dst_x = (dst_va & 63) / 4; 2187bf215546Sopenharmony_ci uint32_t width = MIN2(blocks, 0x4000 - dst_x); 2188bf215546Sopenharmony_ci 2189bf215546Sopenharmony_ci ops->dst_buffer(cs, PIPE_FORMAT_R32_UINT, dst_va & ~63, 0, PIPE_FORMAT_R32_UINT); 2190bf215546Sopenharmony_ci ops->coords(cs, &(VkOffset2D) {dst_x}, NULL, &(VkExtent2D) {width, 1}); 2191bf215546Sopenharmony_ci ops->run(cmd, cs); 2192bf215546Sopenharmony_ci 2193bf215546Sopenharmony_ci dst_va += width * 4; 2194bf215546Sopenharmony_ci blocks -= width; 2195bf215546Sopenharmony_ci } 2196bf215546Sopenharmony_ci 2197bf215546Sopenharmony_ci ops->teardown(cmd, cs); 2198bf215546Sopenharmony_ci} 2199bf215546Sopenharmony_ci 2200bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2201bf215546Sopenharmony_citu_CmdResolveImage2KHR(VkCommandBuffer commandBuffer, 2202bf215546Sopenharmony_ci const VkResolveImageInfo2* pResolveImageInfo) 2203bf215546Sopenharmony_ci{ 2204bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 2205bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image, src_image, pResolveImageInfo->srcImage); 2206bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image, dst_image, pResolveImageInfo->dstImage); 2207bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 2208bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->cs; 2209bf215546Sopenharmony_ci 2210bf215546Sopenharmony_ci enum pipe_format src_format = 2211bf215546Sopenharmony_ci tu_vk_format_to_pipe_format(src_image->vk.format); 2212bf215546Sopenharmony_ci enum pipe_format dst_format = 2213bf215546Sopenharmony_ci tu_vk_format_to_pipe_format(dst_image->vk.format); 2214bf215546Sopenharmony_ci ops->setup(cmd, cs, src_format, dst_format, 2215bf215546Sopenharmony_ci VK_IMAGE_ASPECT_COLOR_BIT, 0, false, dst_image->layout[0].ubwc, 2216bf215546Sopenharmony_ci VK_SAMPLE_COUNT_1_BIT); 2217bf215546Sopenharmony_ci 2218bf215546Sopenharmony_ci for (uint32_t i = 0; i < pResolveImageInfo->regionCount; ++i) { 2219bf215546Sopenharmony_ci const VkImageResolve2 *info = &pResolveImageInfo->pRegions[i]; 2220bf215546Sopenharmony_ci uint32_t layers = MAX2(info->extent.depth, info->dstSubresource.layerCount); 2221bf215546Sopenharmony_ci 2222bf215546Sopenharmony_ci assert(info->srcSubresource.layerCount == info->dstSubresource.layerCount); 2223bf215546Sopenharmony_ci /* TODO: aspect masks possible ? */ 2224bf215546Sopenharmony_ci 2225bf215546Sopenharmony_ci coords(ops, cs, &info->dstOffset, &info->srcOffset, &info->extent); 2226bf215546Sopenharmony_ci 2227bf215546Sopenharmony_ci struct fdl6_view dst, src; 2228bf215546Sopenharmony_ci tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffset.z); 2229bf215546Sopenharmony_ci tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffset.z); 2230bf215546Sopenharmony_ci 2231bf215546Sopenharmony_ci for (uint32_t i = 0; i < layers; i++) { 2232bf215546Sopenharmony_ci ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST, dst_format); 2233bf215546Sopenharmony_ci ops->dst(cs, &dst, i, src_format); 2234bf215546Sopenharmony_ci ops->run(cmd, cs); 2235bf215546Sopenharmony_ci } 2236bf215546Sopenharmony_ci } 2237bf215546Sopenharmony_ci 2238bf215546Sopenharmony_ci ops->teardown(cmd, cs); 2239bf215546Sopenharmony_ci} 2240bf215546Sopenharmony_ci 2241bf215546Sopenharmony_ci#define for_each_layer(layer, layer_mask, layers) \ 2242bf215546Sopenharmony_ci for (uint32_t layer = 0; \ 2243bf215546Sopenharmony_ci layer < ((layer_mask) ? (util_logbase2(layer_mask) + 1) : layers); \ 2244bf215546Sopenharmony_ci layer++) \ 2245bf215546Sopenharmony_ci if (!layer_mask || (layer_mask & BIT(layer))) 2246bf215546Sopenharmony_ci 2247bf215546Sopenharmony_cistatic void 2248bf215546Sopenharmony_ciresolve_sysmem(struct tu_cmd_buffer *cmd, 2249bf215546Sopenharmony_ci struct tu_cs *cs, 2250bf215546Sopenharmony_ci VkFormat vk_src_format, 2251bf215546Sopenharmony_ci VkFormat vk_dst_format, 2252bf215546Sopenharmony_ci const struct tu_image_view *src, 2253bf215546Sopenharmony_ci const struct tu_image_view *dst, 2254bf215546Sopenharmony_ci uint32_t layer_mask, 2255bf215546Sopenharmony_ci uint32_t layers, 2256bf215546Sopenharmony_ci const VkRect2D *rect, 2257bf215546Sopenharmony_ci bool src_separate_ds, 2258bf215546Sopenharmony_ci bool dst_separate_ds) 2259bf215546Sopenharmony_ci{ 2260bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 2261bf215546Sopenharmony_ci 2262bf215546Sopenharmony_ci trace_start_sysmem_resolve(&cmd->trace, cs); 2263bf215546Sopenharmony_ci 2264bf215546Sopenharmony_ci enum pipe_format src_format = tu_vk_format_to_pipe_format(vk_src_format); 2265bf215546Sopenharmony_ci enum pipe_format dst_format = tu_vk_format_to_pipe_format(vk_dst_format); 2266bf215546Sopenharmony_ci 2267bf215546Sopenharmony_ci ops->setup(cmd, cs, src_format, dst_format, 2268bf215546Sopenharmony_ci VK_IMAGE_ASPECT_COLOR_BIT, 0, false, dst->view.ubwc_enabled, 2269bf215546Sopenharmony_ci VK_SAMPLE_COUNT_1_BIT); 2270bf215546Sopenharmony_ci ops->coords(cs, &rect->offset, &rect->offset, &rect->extent); 2271bf215546Sopenharmony_ci 2272bf215546Sopenharmony_ci for_each_layer(i, layer_mask, layers) { 2273bf215546Sopenharmony_ci if (src_separate_ds) { 2274bf215546Sopenharmony_ci if (vk_src_format == VK_FORMAT_D32_SFLOAT) { 2275bf215546Sopenharmony_ci r2d_src_depth(cmd, cs, src, i, VK_FILTER_NEAREST); 2276bf215546Sopenharmony_ci } else { 2277bf215546Sopenharmony_ci r2d_src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST); 2278bf215546Sopenharmony_ci } 2279bf215546Sopenharmony_ci } else { 2280bf215546Sopenharmony_ci ops->src(cmd, cs, &src->view, i, VK_FILTER_NEAREST, dst_format); 2281bf215546Sopenharmony_ci } 2282bf215546Sopenharmony_ci 2283bf215546Sopenharmony_ci if (dst_separate_ds) { 2284bf215546Sopenharmony_ci if (vk_dst_format == VK_FORMAT_D32_SFLOAT) { 2285bf215546Sopenharmony_ci ops->dst_depth(cs, dst, i); 2286bf215546Sopenharmony_ci } else { 2287bf215546Sopenharmony_ci ops->dst_stencil(cs, dst, i); 2288bf215546Sopenharmony_ci } 2289bf215546Sopenharmony_ci } else { 2290bf215546Sopenharmony_ci ops->dst(cs, &dst->view, i, src_format); 2291bf215546Sopenharmony_ci } 2292bf215546Sopenharmony_ci 2293bf215546Sopenharmony_ci ops->run(cmd, cs); 2294bf215546Sopenharmony_ci } 2295bf215546Sopenharmony_ci 2296bf215546Sopenharmony_ci ops->teardown(cmd, cs); 2297bf215546Sopenharmony_ci 2298bf215546Sopenharmony_ci trace_end_sysmem_resolve(&cmd->trace, cs, vk_dst_format); 2299bf215546Sopenharmony_ci} 2300bf215546Sopenharmony_ci 2301bf215546Sopenharmony_civoid 2302bf215546Sopenharmony_citu_resolve_sysmem(struct tu_cmd_buffer *cmd, 2303bf215546Sopenharmony_ci struct tu_cs *cs, 2304bf215546Sopenharmony_ci const struct tu_image_view *src, 2305bf215546Sopenharmony_ci const struct tu_image_view *dst, 2306bf215546Sopenharmony_ci uint32_t layer_mask, 2307bf215546Sopenharmony_ci uint32_t layers, 2308bf215546Sopenharmony_ci const VkRect2D *rect) 2309bf215546Sopenharmony_ci{ 2310bf215546Sopenharmony_ci assert(src->image->vk.format == dst->image->vk.format || 2311bf215546Sopenharmony_ci (vk_format_is_depth_or_stencil(src->image->vk.format) && 2312bf215546Sopenharmony_ci vk_format_is_depth_or_stencil(dst->image->vk.format))); 2313bf215546Sopenharmony_ci 2314bf215546Sopenharmony_ci bool src_separate_ds = src->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT; 2315bf215546Sopenharmony_ci bool dst_separate_ds = dst->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT; 2316bf215546Sopenharmony_ci 2317bf215546Sopenharmony_ci if (dst_separate_ds) { 2318bf215546Sopenharmony_ci resolve_sysmem(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_FORMAT_D32_SFLOAT, 2319bf215546Sopenharmony_ci src, dst, layer_mask, layers, rect, 2320bf215546Sopenharmony_ci src_separate_ds, dst_separate_ds); 2321bf215546Sopenharmony_ci resolve_sysmem(cmd, cs, VK_FORMAT_S8_UINT, VK_FORMAT_S8_UINT, 2322bf215546Sopenharmony_ci src, dst, layer_mask, layers, rect, 2323bf215546Sopenharmony_ci src_separate_ds, dst_separate_ds); 2324bf215546Sopenharmony_ci } else { 2325bf215546Sopenharmony_ci resolve_sysmem(cmd, cs, src->image->vk.format, dst->image->vk.format, 2326bf215546Sopenharmony_ci src, dst, layer_mask, layers, rect, 2327bf215546Sopenharmony_ci src_separate_ds, dst_separate_ds); 2328bf215546Sopenharmony_ci } 2329bf215546Sopenharmony_ci} 2330bf215546Sopenharmony_ci 2331bf215546Sopenharmony_cistatic void 2332bf215546Sopenharmony_ciclear_image(struct tu_cmd_buffer *cmd, 2333bf215546Sopenharmony_ci struct tu_image *image, 2334bf215546Sopenharmony_ci const VkClearValue *clear_value, 2335bf215546Sopenharmony_ci const VkImageSubresourceRange *range, 2336bf215546Sopenharmony_ci VkImageAspectFlags aspect_mask) 2337bf215546Sopenharmony_ci{ 2338bf215546Sopenharmony_ci uint32_t level_count = vk_image_subresource_level_count(&image->vk, range); 2339bf215546Sopenharmony_ci uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range); 2340bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->cs; 2341bf215546Sopenharmony_ci enum pipe_format format; 2342bf215546Sopenharmony_ci if (image->vk.format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) { 2343bf215546Sopenharmony_ci format = PIPE_FORMAT_R32_UINT; 2344bf215546Sopenharmony_ci } else { 2345bf215546Sopenharmony_ci format = tu6_plane_format(image->vk.format, 2346bf215546Sopenharmony_ci tu6_plane_index(image->vk.format, 2347bf215546Sopenharmony_ci aspect_mask)); 2348bf215546Sopenharmony_ci } 2349bf215546Sopenharmony_ci 2350bf215546Sopenharmony_ci if (image->layout[0].depth0 > 1) { 2351bf215546Sopenharmony_ci assert(layer_count == 1); 2352bf215546Sopenharmony_ci assert(range->baseArrayLayer == 0); 2353bf215546Sopenharmony_ci } 2354bf215546Sopenharmony_ci 2355bf215546Sopenharmony_ci const struct blit_ops *ops = image->layout[0].nr_samples > 1 ? &r3d_ops : &r2d_ops; 2356bf215546Sopenharmony_ci 2357bf215546Sopenharmony_ci ops->setup(cmd, cs, format, format, aspect_mask, 0, true, image->layout[0].ubwc, 2358bf215546Sopenharmony_ci image->layout[0].nr_samples); 2359bf215546Sopenharmony_ci if (image->vk.format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) 2360bf215546Sopenharmony_ci ops->clear_value(cs, PIPE_FORMAT_R9G9B9E5_FLOAT, clear_value); 2361bf215546Sopenharmony_ci else 2362bf215546Sopenharmony_ci ops->clear_value(cs, format, clear_value); 2363bf215546Sopenharmony_ci 2364bf215546Sopenharmony_ci for (unsigned j = 0; j < level_count; j++) { 2365bf215546Sopenharmony_ci if (image->layout[0].depth0 > 1) 2366bf215546Sopenharmony_ci layer_count = u_minify(image->layout[0].depth0, range->baseMipLevel + j); 2367bf215546Sopenharmony_ci 2368bf215546Sopenharmony_ci ops->coords(cs, &(VkOffset2D){}, NULL, &(VkExtent2D) { 2369bf215546Sopenharmony_ci u_minify(image->layout[0].width0, range->baseMipLevel + j), 2370bf215546Sopenharmony_ci u_minify(image->layout[0].height0, range->baseMipLevel + j) 2371bf215546Sopenharmony_ci }); 2372bf215546Sopenharmony_ci 2373bf215546Sopenharmony_ci struct fdl6_view dst; 2374bf215546Sopenharmony_ci tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) { 2375bf215546Sopenharmony_ci .aspectMask = aspect_mask, 2376bf215546Sopenharmony_ci .mipLevel = range->baseMipLevel + j, 2377bf215546Sopenharmony_ci .baseArrayLayer = range->baseArrayLayer, 2378bf215546Sopenharmony_ci .layerCount = 1, 2379bf215546Sopenharmony_ci }, 0, false); 2380bf215546Sopenharmony_ci 2381bf215546Sopenharmony_ci for (uint32_t i = 0; i < layer_count; i++) { 2382bf215546Sopenharmony_ci ops->dst(cs, &dst, i, format); 2383bf215546Sopenharmony_ci ops->run(cmd, cs); 2384bf215546Sopenharmony_ci } 2385bf215546Sopenharmony_ci } 2386bf215546Sopenharmony_ci 2387bf215546Sopenharmony_ci ops->teardown(cmd, cs); 2388bf215546Sopenharmony_ci} 2389bf215546Sopenharmony_ci 2390bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2391bf215546Sopenharmony_citu_CmdClearColorImage(VkCommandBuffer commandBuffer, 2392bf215546Sopenharmony_ci VkImage image_h, 2393bf215546Sopenharmony_ci VkImageLayout imageLayout, 2394bf215546Sopenharmony_ci const VkClearColorValue *pColor, 2395bf215546Sopenharmony_ci uint32_t rangeCount, 2396bf215546Sopenharmony_ci const VkImageSubresourceRange *pRanges) 2397bf215546Sopenharmony_ci{ 2398bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 2399bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image, image, image_h); 2400bf215546Sopenharmony_ci 2401bf215546Sopenharmony_ci for (unsigned i = 0; i < rangeCount; i++) 2402bf215546Sopenharmony_ci clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT); 2403bf215546Sopenharmony_ci} 2404bf215546Sopenharmony_ci 2405bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2406bf215546Sopenharmony_citu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, 2407bf215546Sopenharmony_ci VkImage image_h, 2408bf215546Sopenharmony_ci VkImageLayout imageLayout, 2409bf215546Sopenharmony_ci const VkClearDepthStencilValue *pDepthStencil, 2410bf215546Sopenharmony_ci uint32_t rangeCount, 2411bf215546Sopenharmony_ci const VkImageSubresourceRange *pRanges) 2412bf215546Sopenharmony_ci{ 2413bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 2414bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_image, image, image_h); 2415bf215546Sopenharmony_ci 2416bf215546Sopenharmony_ci for (unsigned i = 0; i < rangeCount; i++) { 2417bf215546Sopenharmony_ci const VkImageSubresourceRange *range = &pRanges[i]; 2418bf215546Sopenharmony_ci 2419bf215546Sopenharmony_ci if (image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 2420bf215546Sopenharmony_ci /* can't clear both depth and stencil at once, split up the aspect mask */ 2421bf215546Sopenharmony_ci u_foreach_bit(b, range->aspectMask) 2422bf215546Sopenharmony_ci clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, BIT(b)); 2423bf215546Sopenharmony_ci continue; 2424bf215546Sopenharmony_ci } 2425bf215546Sopenharmony_ci 2426bf215546Sopenharmony_ci clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask); 2427bf215546Sopenharmony_ci } 2428bf215546Sopenharmony_ci 2429bf215546Sopenharmony_ci tu_lrz_clear_depth_image(cmd, image, pDepthStencil, rangeCount, pRanges); 2430bf215546Sopenharmony_ci} 2431bf215546Sopenharmony_ci 2432bf215546Sopenharmony_cistatic void 2433bf215546Sopenharmony_citu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd, 2434bf215546Sopenharmony_ci uint32_t attachment_count, 2435bf215546Sopenharmony_ci const VkClearAttachment *attachments, 2436bf215546Sopenharmony_ci uint32_t rect_count, 2437bf215546Sopenharmony_ci const VkClearRect *rects) 2438bf215546Sopenharmony_ci{ 2439bf215546Sopenharmony_ci /* the shader path here is special, it avoids changing MRT/etc state */ 2440bf215546Sopenharmony_ci const struct tu_subpass *subpass = cmd->state.subpass; 2441bf215546Sopenharmony_ci const uint32_t mrt_count = subpass->color_count; 2442bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->draw_cs; 2443bf215546Sopenharmony_ci uint32_t clear_value[MAX_RTS][4]; 2444bf215546Sopenharmony_ci float z_clear_val = 0.0f; 2445bf215546Sopenharmony_ci uint8_t s_clear_val = 0; 2446bf215546Sopenharmony_ci uint32_t clear_rts = 0, clear_components = 0; 2447bf215546Sopenharmony_ci bool z_clear = false; 2448bf215546Sopenharmony_ci bool s_clear = false; 2449bf215546Sopenharmony_ci 2450bf215546Sopenharmony_ci trace_start_sysmem_clear_all(&cmd->trace, cs); 2451bf215546Sopenharmony_ci 2452bf215546Sopenharmony_ci for (uint32_t i = 0; i < attachment_count; i++) { 2453bf215546Sopenharmony_ci uint32_t a; 2454bf215546Sopenharmony_ci if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { 2455bf215546Sopenharmony_ci uint32_t c = attachments[i].colorAttachment; 2456bf215546Sopenharmony_ci a = subpass->color_attachments[c].attachment; 2457bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 2458bf215546Sopenharmony_ci continue; 2459bf215546Sopenharmony_ci 2460bf215546Sopenharmony_ci clear_rts |= 1 << c; 2461bf215546Sopenharmony_ci clear_components |= 0xf << (c * 4); 2462bf215546Sopenharmony_ci memcpy(clear_value[c], &attachments[i].clearValue, 4 * sizeof(uint32_t)); 2463bf215546Sopenharmony_ci } else { 2464bf215546Sopenharmony_ci a = subpass->depth_stencil_attachment.attachment; 2465bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 2466bf215546Sopenharmony_ci continue; 2467bf215546Sopenharmony_ci 2468bf215546Sopenharmony_ci if (attachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) { 2469bf215546Sopenharmony_ci z_clear = true; 2470bf215546Sopenharmony_ci z_clear_val = attachments[i].clearValue.depthStencil.depth; 2471bf215546Sopenharmony_ci } 2472bf215546Sopenharmony_ci 2473bf215546Sopenharmony_ci if (attachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) { 2474bf215546Sopenharmony_ci s_clear = true; 2475bf215546Sopenharmony_ci s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff; 2476bf215546Sopenharmony_ci } 2477bf215546Sopenharmony_ci } 2478bf215546Sopenharmony_ci } 2479bf215546Sopenharmony_ci 2480bf215546Sopenharmony_ci /* We may not know the multisample count if there are no attachments, so 2481bf215546Sopenharmony_ci * just bail early to avoid corner cases later. 2482bf215546Sopenharmony_ci */ 2483bf215546Sopenharmony_ci if (clear_rts == 0 && !z_clear && !s_clear) 2484bf215546Sopenharmony_ci return; 2485bf215546Sopenharmony_ci 2486bf215546Sopenharmony_ci /* disable all draw states so they don't interfere 2487bf215546Sopenharmony_ci * TODO: use and re-use draw states 2488bf215546Sopenharmony_ci * we have to disable draw states individually to preserve 2489bf215546Sopenharmony_ci * input attachment states, because a secondary command buffer 2490bf215546Sopenharmony_ci * won't be able to restore them 2491bf215546Sopenharmony_ci */ 2492bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (TU_DRAW_STATE_COUNT - 2)); 2493bf215546Sopenharmony_ci for (uint32_t i = 0; i < TU_DRAW_STATE_COUNT; i++) { 2494bf215546Sopenharmony_ci if (i == TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM || 2495bf215546Sopenharmony_ci i == TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM) 2496bf215546Sopenharmony_ci continue; 2497bf215546Sopenharmony_ci tu_cs_emit(cs, CP_SET_DRAW_STATE__0_GROUP_ID(i) | 2498bf215546Sopenharmony_ci CP_SET_DRAW_STATE__0_DISABLE); 2499bf215546Sopenharmony_ci tu_cs_emit_qw(cs, 0); 2500bf215546Sopenharmony_ci } 2501bf215546Sopenharmony_ci cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE; 2502bf215546Sopenharmony_ci 2503bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2); 2504bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) | 2505bf215546Sopenharmony_ci A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) | 2506bf215546Sopenharmony_ci 0xfc000000); 2507bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count)); 2508bf215546Sopenharmony_ci 2509bf215546Sopenharmony_ci r3d_common(cmd, cs, false, clear_rts, false, cmd->state.subpass->samples); 2510bf215546Sopenharmony_ci 2511bf215546Sopenharmony_ci /* Disable sample counting in order to not affect occlusion query. */ 2512bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = true)); 2513bf215546Sopenharmony_ci 2514bf215546Sopenharmony_ci if (cmd->state.prim_generated_query_running_before_rp) { 2515bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, STOP_PRIMITIVE_CTRS); 2516bf215546Sopenharmony_ci } 2517bf215546Sopenharmony_ci 2518bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 2519bf215546Sopenharmony_ci A6XX_SP_FS_RENDER_COMPONENTS(.dword = clear_components)); 2520bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 2521bf215546Sopenharmony_ci A6XX_RB_RENDER_COMPONENTS(.dword = clear_components)); 2522bf215546Sopenharmony_ci 2523bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 2524bf215546Sopenharmony_ci A6XX_RB_FS_OUTPUT_CNTL0(), 2525bf215546Sopenharmony_ci A6XX_RB_FS_OUTPUT_CNTL1(.mrt = mrt_count)); 2526bf215546Sopenharmony_ci 2527bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL()); 2528bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.independent_blend = 1, .sample_mask = 0xffff)); 2529bf215546Sopenharmony_ci for (uint32_t i = 0; i < mrt_count; i++) { 2530bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(i, 2531bf215546Sopenharmony_ci .component_enable = COND(clear_rts & (1 << i), 0xf))); 2532bf215546Sopenharmony_ci } 2533bf215546Sopenharmony_ci 2534bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0)); 2535bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0)); 2536bf215546Sopenharmony_ci 2537bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL()); 2538bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL( 2539bf215546Sopenharmony_ci .z_test_enable = z_clear, 2540bf215546Sopenharmony_ci .z_write_enable = z_clear, 2541bf215546Sopenharmony_ci .zfunc = FUNC_ALWAYS)); 2542bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL()); 2543bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL( 2544bf215546Sopenharmony_ci .stencil_enable = s_clear, 2545bf215546Sopenharmony_ci .func = FUNC_ALWAYS, 2546bf215546Sopenharmony_ci .zpass = STENCIL_REPLACE)); 2547bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK(.mask = 0xff)); 2548bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK(.wrmask = 0xff)); 2549bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_STENCILREF(.ref = s_clear_val)); 2550bf215546Sopenharmony_ci 2551bf215546Sopenharmony_ci unsigned num_rts = util_bitcount(clear_rts); 2552bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4 * num_rts); 2553bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) | 2554bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) | 2555bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) | 2556bf215546Sopenharmony_ci CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) | 2557bf215546Sopenharmony_ci CP_LOAD_STATE6_0_NUM_UNIT(num_rts)); 2558bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0)); 2559bf215546Sopenharmony_ci tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0)); 2560bf215546Sopenharmony_ci u_foreach_bit(b, clear_rts) 2561bf215546Sopenharmony_ci tu_cs_emit_array(cs, clear_value[b], 4); 2562bf215546Sopenharmony_ci 2563bf215546Sopenharmony_ci for (uint32_t i = 0; i < rect_count; i++) { 2564bf215546Sopenharmony_ci /* This should be true because of this valid usage for 2565bf215546Sopenharmony_ci * vkCmdClearAttachments: 2566bf215546Sopenharmony_ci * 2567bf215546Sopenharmony_ci * "If the render pass instance this is recorded in uses multiview, 2568bf215546Sopenharmony_ci * then baseArrayLayer must be zero and layerCount must be one" 2569bf215546Sopenharmony_ci */ 2570bf215546Sopenharmony_ci assert(!subpass->multiview_mask || rects[i].baseArrayLayer == 0); 2571bf215546Sopenharmony_ci 2572bf215546Sopenharmony_ci /* a630 doesn't support multiview masks, which means that we can't use 2573bf215546Sopenharmony_ci * the normal multiview path without potentially recompiling a shader 2574bf215546Sopenharmony_ci * on-demand or using a more complicated variant that takes the mask as 2575bf215546Sopenharmony_ci * a const. Just use the layered path instead, since it shouldn't be 2576bf215546Sopenharmony_ci * much worse. 2577bf215546Sopenharmony_ci */ 2578bf215546Sopenharmony_ci for_each_layer(layer, subpass->multiview_mask, rects[i].layerCount) { 2579bf215546Sopenharmony_ci r3d_coords_raw(cs, (float[]) { 2580bf215546Sopenharmony_ci rects[i].rect.offset.x, rects[i].rect.offset.y, 2581bf215546Sopenharmony_ci z_clear_val, uif(rects[i].baseArrayLayer + layer), 2582bf215546Sopenharmony_ci rects[i].rect.offset.x + rects[i].rect.extent.width, 2583bf215546Sopenharmony_ci rects[i].rect.offset.y + rects[i].rect.extent.height, 2584bf215546Sopenharmony_ci z_clear_val, 1.0f, 2585bf215546Sopenharmony_ci }); 2586bf215546Sopenharmony_ci r3d_run_vis(cmd, cs); 2587bf215546Sopenharmony_ci } 2588bf215546Sopenharmony_ci } 2589bf215546Sopenharmony_ci 2590bf215546Sopenharmony_ci /* Re-enable sample counting. */ 2591bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_SAMPLE_COUNT_CONTROL(.disable = false)); 2592bf215546Sopenharmony_ci 2593bf215546Sopenharmony_ci if (cmd->state.prim_generated_query_running_before_rp) { 2594bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, START_PRIMITIVE_CTRS); 2595bf215546Sopenharmony_ci } 2596bf215546Sopenharmony_ci 2597bf215546Sopenharmony_ci trace_end_sysmem_clear_all(&cmd->trace, 2598bf215546Sopenharmony_ci cs, mrt_count, rect_count); 2599bf215546Sopenharmony_ci} 2600bf215546Sopenharmony_ci 2601bf215546Sopenharmony_cistatic void 2602bf215546Sopenharmony_cipack_gmem_clear_value(const VkClearValue *val, enum pipe_format format, uint32_t clear_value[4]) 2603bf215546Sopenharmony_ci{ 2604bf215546Sopenharmony_ci switch (format) { 2605bf215546Sopenharmony_ci case PIPE_FORMAT_Z24X8_UNORM: 2606bf215546Sopenharmony_ci case PIPE_FORMAT_Z24_UNORM_S8_UINT: 2607bf215546Sopenharmony_ci clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) | 2608bf215546Sopenharmony_ci val->depthStencil.stencil << 24; 2609bf215546Sopenharmony_ci return; 2610bf215546Sopenharmony_ci case PIPE_FORMAT_Z16_UNORM: 2611bf215546Sopenharmony_ci clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16); 2612bf215546Sopenharmony_ci return; 2613bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT: 2614bf215546Sopenharmony_ci clear_value[0] = fui(val->depthStencil.depth); 2615bf215546Sopenharmony_ci return; 2616bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT: 2617bf215546Sopenharmony_ci clear_value[0] = val->depthStencil.stencil; 2618bf215546Sopenharmony_ci return; 2619bf215546Sopenharmony_ci default: 2620bf215546Sopenharmony_ci break; 2621bf215546Sopenharmony_ci } 2622bf215546Sopenharmony_ci 2623bf215546Sopenharmony_ci float tmp[4]; 2624bf215546Sopenharmony_ci memcpy(tmp, val->color.float32, 4 * sizeof(float)); 2625bf215546Sopenharmony_ci if (util_format_is_srgb(format)) { 2626bf215546Sopenharmony_ci for (int i = 0; i < 3; i++) 2627bf215546Sopenharmony_ci tmp[i] = util_format_linear_to_srgb_float(tmp[i]); 2628bf215546Sopenharmony_ci } 2629bf215546Sopenharmony_ci 2630bf215546Sopenharmony_ci#define PACK_F(type) util_format_##type##_pack_rgba_float \ 2631bf215546Sopenharmony_ci ( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1) 2632bf215546Sopenharmony_ci switch (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) { 2633bf215546Sopenharmony_ci case 4: 2634bf215546Sopenharmony_ci PACK_F(r4g4b4a4_unorm); 2635bf215546Sopenharmony_ci break; 2636bf215546Sopenharmony_ci case 5: 2637bf215546Sopenharmony_ci if (util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6) 2638bf215546Sopenharmony_ci PACK_F(r5g6b5_unorm); 2639bf215546Sopenharmony_ci else 2640bf215546Sopenharmony_ci PACK_F(r5g5b5a1_unorm); 2641bf215546Sopenharmony_ci break; 2642bf215546Sopenharmony_ci case 8: 2643bf215546Sopenharmony_ci if (util_format_is_snorm(format)) 2644bf215546Sopenharmony_ci PACK_F(r8g8b8a8_snorm); 2645bf215546Sopenharmony_ci else if (util_format_is_unorm(format)) 2646bf215546Sopenharmony_ci PACK_F(r8g8b8a8_unorm); 2647bf215546Sopenharmony_ci else 2648bf215546Sopenharmony_ci pack_int8(clear_value, val->color.uint32); 2649bf215546Sopenharmony_ci break; 2650bf215546Sopenharmony_ci case 10: 2651bf215546Sopenharmony_ci if (util_format_is_pure_integer(format)) 2652bf215546Sopenharmony_ci pack_int10_2(clear_value, val->color.uint32); 2653bf215546Sopenharmony_ci else 2654bf215546Sopenharmony_ci PACK_F(r10g10b10a2_unorm); 2655bf215546Sopenharmony_ci break; 2656bf215546Sopenharmony_ci case 11: 2657bf215546Sopenharmony_ci clear_value[0] = float3_to_r11g11b10f(val->color.float32); 2658bf215546Sopenharmony_ci break; 2659bf215546Sopenharmony_ci case 16: 2660bf215546Sopenharmony_ci if (util_format_is_snorm(format)) 2661bf215546Sopenharmony_ci PACK_F(r16g16b16a16_snorm); 2662bf215546Sopenharmony_ci else if (util_format_is_unorm(format)) 2663bf215546Sopenharmony_ci PACK_F(r16g16b16a16_unorm); 2664bf215546Sopenharmony_ci else if (util_format_is_float(format)) 2665bf215546Sopenharmony_ci PACK_F(r16g16b16a16_float); 2666bf215546Sopenharmony_ci else 2667bf215546Sopenharmony_ci pack_int16(clear_value, val->color.uint32); 2668bf215546Sopenharmony_ci break; 2669bf215546Sopenharmony_ci case 32: 2670bf215546Sopenharmony_ci memcpy(clear_value, val->color.float32, 4 * sizeof(float)); 2671bf215546Sopenharmony_ci break; 2672bf215546Sopenharmony_ci default: 2673bf215546Sopenharmony_ci unreachable("unexpected channel size"); 2674bf215546Sopenharmony_ci } 2675bf215546Sopenharmony_ci#undef PACK_F 2676bf215546Sopenharmony_ci} 2677bf215546Sopenharmony_ci 2678bf215546Sopenharmony_cistatic void 2679bf215546Sopenharmony_ciclear_gmem_attachment(struct tu_cmd_buffer *cmd, 2680bf215546Sopenharmony_ci struct tu_cs *cs, 2681bf215546Sopenharmony_ci enum pipe_format format, 2682bf215546Sopenharmony_ci uint8_t clear_mask, 2683bf215546Sopenharmony_ci uint32_t gmem_offset, 2684bf215546Sopenharmony_ci const VkClearValue *value) 2685bf215546Sopenharmony_ci{ 2686bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1); 2687bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format))); 2688bf215546Sopenharmony_ci 2689bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1, .clear_mask = clear_mask)); 2690bf215546Sopenharmony_ci 2691bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1); 2692bf215546Sopenharmony_ci tu_cs_emit(cs, gmem_offset); 2693bf215546Sopenharmony_ci 2694bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1); 2695bf215546Sopenharmony_ci tu_cs_emit(cs, 0); 2696bf215546Sopenharmony_ci 2697bf215546Sopenharmony_ci uint32_t clear_vals[4] = {}; 2698bf215546Sopenharmony_ci pack_gmem_clear_value(value, format, clear_vals); 2699bf215546Sopenharmony_ci 2700bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); 2701bf215546Sopenharmony_ci tu_cs_emit_array(cs, clear_vals, 4); 2702bf215546Sopenharmony_ci 2703bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, BLIT); 2704bf215546Sopenharmony_ci} 2705bf215546Sopenharmony_ci 2706bf215546Sopenharmony_cistatic void 2707bf215546Sopenharmony_citu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd, 2708bf215546Sopenharmony_ci struct tu_cs *cs, 2709bf215546Sopenharmony_ci uint32_t attachment, 2710bf215546Sopenharmony_ci VkImageAspectFlags mask, 2711bf215546Sopenharmony_ci const VkClearValue *value) 2712bf215546Sopenharmony_ci{ 2713bf215546Sopenharmony_ci const struct tu_render_pass_attachment *att = 2714bf215546Sopenharmony_ci &cmd->state.pass->attachments[attachment]; 2715bf215546Sopenharmony_ci 2716bf215546Sopenharmony_ci trace_start_gmem_clear(&cmd->trace, cs); 2717bf215546Sopenharmony_ci 2718bf215546Sopenharmony_ci enum pipe_format format = tu_vk_format_to_pipe_format(att->format); 2719bf215546Sopenharmony_ci if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 2720bf215546Sopenharmony_ci if (mask & VK_IMAGE_ASPECT_DEPTH_BIT) 2721bf215546Sopenharmony_ci clear_gmem_attachment(cmd, cs, PIPE_FORMAT_Z32_FLOAT, 0xf, tu_attachment_gmem_offset(cmd, att), value); 2722bf215546Sopenharmony_ci if (mask & VK_IMAGE_ASPECT_STENCIL_BIT) 2723bf215546Sopenharmony_ci clear_gmem_attachment(cmd, cs, PIPE_FORMAT_S8_UINT, 0xf, tu_attachment_gmem_offset_stencil(cmd, att), value); 2724bf215546Sopenharmony_ci return; 2725bf215546Sopenharmony_ci } 2726bf215546Sopenharmony_ci 2727bf215546Sopenharmony_ci clear_gmem_attachment(cmd, cs, format, aspect_write_mask(format, mask), 2728bf215546Sopenharmony_ci tu_attachment_gmem_offset(cmd, att), value); 2729bf215546Sopenharmony_ci 2730bf215546Sopenharmony_ci trace_end_gmem_clear(&cmd->trace, cs, att->format, att->samples); 2731bf215546Sopenharmony_ci} 2732bf215546Sopenharmony_ci 2733bf215546Sopenharmony_cistatic void 2734bf215546Sopenharmony_citu_clear_gmem_attachments(struct tu_cmd_buffer *cmd, 2735bf215546Sopenharmony_ci uint32_t attachment_count, 2736bf215546Sopenharmony_ci const VkClearAttachment *attachments, 2737bf215546Sopenharmony_ci uint32_t rect_count, 2738bf215546Sopenharmony_ci const VkClearRect *rects) 2739bf215546Sopenharmony_ci{ 2740bf215546Sopenharmony_ci const struct tu_subpass *subpass = cmd->state.subpass; 2741bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->draw_cs; 2742bf215546Sopenharmony_ci 2743bf215546Sopenharmony_ci if (rect_count > 1) 2744bf215546Sopenharmony_ci perf_debug(cmd->device, "TODO: Swap tu_clear_gmem_attachments() loop for smaller command stream"); 2745bf215546Sopenharmony_ci 2746bf215546Sopenharmony_ci for (unsigned i = 0; i < rect_count; i++) { 2747bf215546Sopenharmony_ci unsigned x1 = rects[i].rect.offset.x; 2748bf215546Sopenharmony_ci unsigned y1 = rects[i].rect.offset.y; 2749bf215546Sopenharmony_ci unsigned x2 = x1 + rects[i].rect.extent.width - 1; 2750bf215546Sopenharmony_ci unsigned y2 = y1 + rects[i].rect.extent.height - 1; 2751bf215546Sopenharmony_ci 2752bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); 2753bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1)); 2754bf215546Sopenharmony_ci tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2)); 2755bf215546Sopenharmony_ci 2756bf215546Sopenharmony_ci for (unsigned j = 0; j < attachment_count; j++) { 2757bf215546Sopenharmony_ci uint32_t a; 2758bf215546Sopenharmony_ci if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) 2759bf215546Sopenharmony_ci a = subpass->color_attachments[attachments[j].colorAttachment].attachment; 2760bf215546Sopenharmony_ci else 2761bf215546Sopenharmony_ci a = subpass->depth_stencil_attachment.attachment; 2762bf215546Sopenharmony_ci 2763bf215546Sopenharmony_ci if (a == VK_ATTACHMENT_UNUSED) 2764bf215546Sopenharmony_ci continue; 2765bf215546Sopenharmony_ci 2766bf215546Sopenharmony_ci tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask, 2767bf215546Sopenharmony_ci &attachments[j].clearValue); 2768bf215546Sopenharmony_ci } 2769bf215546Sopenharmony_ci } 2770bf215546Sopenharmony_ci} 2771bf215546Sopenharmony_ci 2772bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL 2773bf215546Sopenharmony_citu_CmdClearAttachments(VkCommandBuffer commandBuffer, 2774bf215546Sopenharmony_ci uint32_t attachmentCount, 2775bf215546Sopenharmony_ci const VkClearAttachment *pAttachments, 2776bf215546Sopenharmony_ci uint32_t rectCount, 2777bf215546Sopenharmony_ci const VkClearRect *pRects) 2778bf215546Sopenharmony_ci{ 2779bf215546Sopenharmony_ci TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer); 2780bf215546Sopenharmony_ci struct tu_cs *cs = &cmd->draw_cs; 2781bf215546Sopenharmony_ci 2782bf215546Sopenharmony_ci /* sysmem path behaves like a draw, note we don't have a way of using different 2783bf215546Sopenharmony_ci * flushes for sysmem/gmem, so this needs to be outside of the cond_exec 2784bf215546Sopenharmony_ci */ 2785bf215546Sopenharmony_ci tu_emit_cache_flush_renderpass(cmd, cs); 2786bf215546Sopenharmony_ci 2787bf215546Sopenharmony_ci for (uint32_t j = 0; j < attachmentCount; j++) { 2788bf215546Sopenharmony_ci if ((pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) == 0) 2789bf215546Sopenharmony_ci continue; 2790bf215546Sopenharmony_ci 2791bf215546Sopenharmony_ci tu_lrz_disable_during_renderpass(cmd); 2792bf215546Sopenharmony_ci } 2793bf215546Sopenharmony_ci 2794bf215546Sopenharmony_ci /* vkCmdClearAttachments is supposed to respect the predicate if active. The 2795bf215546Sopenharmony_ci * easiest way to do this is to always use the 3d path, which always works 2796bf215546Sopenharmony_ci * even with GMEM because it's just a simple draw using the existing 2797bf215546Sopenharmony_ci * attachment state. 2798bf215546Sopenharmony_ci * 2799bf215546Sopenharmony_ci * Similarly, we also use the 3D path when in a secondary command buffer that 2800bf215546Sopenharmony_ci * doesn't know the GMEM layout that will be chosen by the primary. 2801bf215546Sopenharmony_ci */ 2802bf215546Sopenharmony_ci if (cmd->state.predication_active || cmd->state.gmem_layout == TU_GMEM_LAYOUT_COUNT) { 2803bf215546Sopenharmony_ci tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); 2804bf215546Sopenharmony_ci return; 2805bf215546Sopenharmony_ci } 2806bf215546Sopenharmony_ci 2807bf215546Sopenharmony_ci /* If we could skip tile load/stores based on any draws intersecting them at 2808bf215546Sopenharmony_ci * binning time, then emit the clear as a 3D draw so that it contributes to 2809bf215546Sopenharmony_ci * that visibility. 2810bf215546Sopenharmony_ci */ 2811bf215546Sopenharmony_ci const struct tu_subpass *subpass = cmd->state.subpass; 2812bf215546Sopenharmony_ci for (uint32_t i = 0; i < attachmentCount; i++) { 2813bf215546Sopenharmony_ci uint32_t a; 2814bf215546Sopenharmony_ci if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) { 2815bf215546Sopenharmony_ci uint32_t c = pAttachments[i].colorAttachment; 2816bf215546Sopenharmony_ci a = subpass->color_attachments[c].attachment; 2817bf215546Sopenharmony_ci } else { 2818bf215546Sopenharmony_ci a = subpass->depth_stencil_attachment.attachment; 2819bf215546Sopenharmony_ci } 2820bf215546Sopenharmony_ci if (a != VK_ATTACHMENT_UNUSED) { 2821bf215546Sopenharmony_ci const struct tu_render_pass_attachment *att = &cmd->state.pass->attachments[a]; 2822bf215546Sopenharmony_ci if (att->cond_load_allowed || att->cond_store_allowed) { 2823bf215546Sopenharmony_ci tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); 2824bf215546Sopenharmony_ci return; 2825bf215546Sopenharmony_ci } 2826bf215546Sopenharmony_ci } 2827bf215546Sopenharmony_ci } 2828bf215546Sopenharmony_ci 2829bf215546Sopenharmony_ci /* Otherwise, emit 2D blits for gmem rendering. */ 2830bf215546Sopenharmony_ci tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM); 2831bf215546Sopenharmony_ci tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); 2832bf215546Sopenharmony_ci tu_cond_exec_end(cs); 2833bf215546Sopenharmony_ci 2834bf215546Sopenharmony_ci tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM); 2835bf215546Sopenharmony_ci tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects); 2836bf215546Sopenharmony_ci tu_cond_exec_end(cs); 2837bf215546Sopenharmony_ci} 2838bf215546Sopenharmony_ci 2839bf215546Sopenharmony_cistatic void 2840bf215546Sopenharmony_ciclear_sysmem_attachment(struct tu_cmd_buffer *cmd, 2841bf215546Sopenharmony_ci struct tu_cs *cs, 2842bf215546Sopenharmony_ci VkFormat vk_format, 2843bf215546Sopenharmony_ci VkImageAspectFlags clear_mask, 2844bf215546Sopenharmony_ci const VkClearValue *value, 2845bf215546Sopenharmony_ci uint32_t a, 2846bf215546Sopenharmony_ci bool separate_ds) 2847bf215546Sopenharmony_ci{ 2848bf215546Sopenharmony_ci enum pipe_format format = tu_vk_format_to_pipe_format(vk_format); 2849bf215546Sopenharmony_ci const struct tu_framebuffer *fb = cmd->state.framebuffer; 2850bf215546Sopenharmony_ci const struct tu_image_view *iview = cmd->state.attachments[a]; 2851bf215546Sopenharmony_ci const uint32_t clear_views = cmd->state.pass->attachments[a].clear_views; 2852bf215546Sopenharmony_ci const struct blit_ops *ops = &r2d_ops; 2853bf215546Sopenharmony_ci if (cmd->state.pass->attachments[a].samples > 1) 2854bf215546Sopenharmony_ci ops = &r3d_ops; 2855bf215546Sopenharmony_ci 2856bf215546Sopenharmony_ci trace_start_sysmem_clear(&cmd->trace, cs); 2857bf215546Sopenharmony_ci 2858bf215546Sopenharmony_ci ops->setup(cmd, cs, format, format, clear_mask, 0, true, iview->view.ubwc_enabled, 2859bf215546Sopenharmony_ci cmd->state.pass->attachments[a].samples); 2860bf215546Sopenharmony_ci ops->coords(cs, &cmd->state.render_area.offset, NULL, 2861bf215546Sopenharmony_ci &cmd->state.render_area.extent); 2862bf215546Sopenharmony_ci ops->clear_value(cs, format, value); 2863bf215546Sopenharmony_ci 2864bf215546Sopenharmony_ci for_each_layer(i, clear_views, fb->layers) { 2865bf215546Sopenharmony_ci if (separate_ds) { 2866bf215546Sopenharmony_ci if (vk_format == VK_FORMAT_D32_SFLOAT) { 2867bf215546Sopenharmony_ci ops->dst_depth(cs, iview, i); 2868bf215546Sopenharmony_ci } else { 2869bf215546Sopenharmony_ci ops->dst_stencil(cs, iview, i); 2870bf215546Sopenharmony_ci } 2871bf215546Sopenharmony_ci } else { 2872bf215546Sopenharmony_ci ops->dst(cs, &iview->view, i, format); 2873bf215546Sopenharmony_ci } 2874bf215546Sopenharmony_ci ops->run(cmd, cs); 2875bf215546Sopenharmony_ci } 2876bf215546Sopenharmony_ci 2877bf215546Sopenharmony_ci ops->teardown(cmd, cs); 2878bf215546Sopenharmony_ci 2879bf215546Sopenharmony_ci trace_end_sysmem_clear(&cmd->trace, cs, 2880bf215546Sopenharmony_ci vk_format, ops == &r3d_ops, 2881bf215546Sopenharmony_ci cmd->state.pass->attachments[a].samples); 2882bf215546Sopenharmony_ci} 2883bf215546Sopenharmony_ci 2884bf215546Sopenharmony_civoid 2885bf215546Sopenharmony_citu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd, 2886bf215546Sopenharmony_ci struct tu_cs *cs, 2887bf215546Sopenharmony_ci uint32_t a, 2888bf215546Sopenharmony_ci const VkClearValue *value) 2889bf215546Sopenharmony_ci{ 2890bf215546Sopenharmony_ci const struct tu_render_pass_attachment *attachment = 2891bf215546Sopenharmony_ci &cmd->state.pass->attachments[a]; 2892bf215546Sopenharmony_ci 2893bf215546Sopenharmony_ci if (!attachment->clear_mask) 2894bf215546Sopenharmony_ci return; 2895bf215546Sopenharmony_ci 2896bf215546Sopenharmony_ci if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 2897bf215546Sopenharmony_ci if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) { 2898bf215546Sopenharmony_ci clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT, 2899bf215546Sopenharmony_ci value, a, true); 2900bf215546Sopenharmony_ci } 2901bf215546Sopenharmony_ci if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) { 2902bf215546Sopenharmony_ci clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT, 2903bf215546Sopenharmony_ci value, a, true); 2904bf215546Sopenharmony_ci } 2905bf215546Sopenharmony_ci } else { 2906bf215546Sopenharmony_ci clear_sysmem_attachment(cmd, cs, attachment->format, attachment->clear_mask, 2907bf215546Sopenharmony_ci value, a, false); 2908bf215546Sopenharmony_ci } 2909bf215546Sopenharmony_ci 2910bf215546Sopenharmony_ci /* The spec doesn't explicitly say, but presumably the initial renderpass 2911bf215546Sopenharmony_ci * clear is considered part of the renderpass, and therefore barriers 2912bf215546Sopenharmony_ci * aren't required inside the subpass/renderpass. Therefore we need to 2913bf215546Sopenharmony_ci * flush CCU color into CCU depth here, just like with 2914bf215546Sopenharmony_ci * vkCmdClearAttachments(). Note that because this only happens at the 2915bf215546Sopenharmony_ci * beginning of a renderpass, and renderpass writes are considered 2916bf215546Sopenharmony_ci * "incoherent", we shouldn't have to worry about syncing depth into color 2917bf215546Sopenharmony_ci * beforehand as depth should already be flushed. 2918bf215546Sopenharmony_ci */ 2919bf215546Sopenharmony_ci if (vk_format_is_depth_or_stencil(attachment->format)) { 2920bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); 2921bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_DEPTH_TS); 2922bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH); 2923bf215546Sopenharmony_ci } else { 2924bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); 2925bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR); 2926bf215546Sopenharmony_ci } 2927bf215546Sopenharmony_ci 2928bf215546Sopenharmony_ci if (cmd->device->physical_device->info->a6xx.has_ccu_flush_bug) 2929bf215546Sopenharmony_ci tu_cs_emit_wfi(cs); 2930bf215546Sopenharmony_ci} 2931bf215546Sopenharmony_ci 2932bf215546Sopenharmony_civoid 2933bf215546Sopenharmony_citu_clear_gmem_attachment(struct tu_cmd_buffer *cmd, 2934bf215546Sopenharmony_ci struct tu_cs *cs, 2935bf215546Sopenharmony_ci uint32_t a, 2936bf215546Sopenharmony_ci const VkClearValue *value) 2937bf215546Sopenharmony_ci{ 2938bf215546Sopenharmony_ci const struct tu_render_pass_attachment *attachment = 2939bf215546Sopenharmony_ci &cmd->state.pass->attachments[a]; 2940bf215546Sopenharmony_ci 2941bf215546Sopenharmony_ci if (!attachment->clear_mask) 2942bf215546Sopenharmony_ci return; 2943bf215546Sopenharmony_ci 2944bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples))); 2945bf215546Sopenharmony_ci 2946bf215546Sopenharmony_ci tu_emit_clear_gmem_attachment(cmd, cs, a, attachment->clear_mask, value); 2947bf215546Sopenharmony_ci} 2948bf215546Sopenharmony_ci 2949bf215546Sopenharmony_cistatic void 2950bf215546Sopenharmony_citu_emit_blit(struct tu_cmd_buffer *cmd, 2951bf215546Sopenharmony_ci struct tu_cs *cs, 2952bf215546Sopenharmony_ci const struct tu_image_view *iview, 2953bf215546Sopenharmony_ci const struct tu_render_pass_attachment *attachment, 2954bf215546Sopenharmony_ci bool resolve, 2955bf215546Sopenharmony_ci bool separate_stencil) 2956bf215546Sopenharmony_ci{ 2957bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 2958bf215546Sopenharmony_ci A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples))); 2959bf215546Sopenharmony_ci 2960bf215546Sopenharmony_ci tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO( 2961bf215546Sopenharmony_ci .unk0 = !resolve, 2962bf215546Sopenharmony_ci .gmem = !resolve, 2963bf215546Sopenharmony_ci .sample_0 = vk_format_is_int(attachment->format) || 2964bf215546Sopenharmony_ci vk_format_is_depth_or_stencil(attachment->format))); 2965bf215546Sopenharmony_ci 2966bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4); 2967bf215546Sopenharmony_ci if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 2968bf215546Sopenharmony_ci if (!separate_stencil) { 2969bf215546Sopenharmony_ci tu_cs_emit(cs, tu_image_view_depth(iview, RB_BLIT_DST_INFO)); 2970bf215546Sopenharmony_ci tu_cs_emit_qw(cs, iview->depth_base_addr); 2971bf215546Sopenharmony_ci tu_cs_emit(cs, iview->depth_PITCH); 2972bf215546Sopenharmony_ci 2973bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3); 2974bf215546Sopenharmony_ci tu_cs_image_flag_ref(cs, &iview->view, 0); 2975bf215546Sopenharmony_ci } else { 2976bf215546Sopenharmony_ci tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS); 2977bf215546Sopenharmony_ci tu_cs_emit_qw(cs, iview->stencil_base_addr); 2978bf215546Sopenharmony_ci tu_cs_emit(cs, iview->stencil_PITCH); 2979bf215546Sopenharmony_ci } 2980bf215546Sopenharmony_ci } else { 2981bf215546Sopenharmony_ci tu_cs_emit(cs, iview->view.RB_BLIT_DST_INFO); 2982bf215546Sopenharmony_ci tu_cs_image_ref_2d(cs, &iview->view, 0, false); 2983bf215546Sopenharmony_ci 2984bf215546Sopenharmony_ci tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3); 2985bf215546Sopenharmony_ci tu_cs_image_flag_ref(cs, &iview->view, 0); 2986bf215546Sopenharmony_ci } 2987bf215546Sopenharmony_ci 2988bf215546Sopenharmony_ci if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && separate_stencil) { 2989bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 2990bf215546Sopenharmony_ci A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset_stencil(cmd, attachment))); 2991bf215546Sopenharmony_ci } else { 2992bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 2993bf215546Sopenharmony_ci A6XX_RB_BLIT_BASE_GMEM(tu_attachment_gmem_offset(cmd, attachment))); 2994bf215546Sopenharmony_ci } 2995bf215546Sopenharmony_ci 2996bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, BLIT); 2997bf215546Sopenharmony_ci} 2998bf215546Sopenharmony_ci 2999bf215546Sopenharmony_cistatic bool 3000bf215546Sopenharmony_ciblit_can_resolve(VkFormat format) 3001bf215546Sopenharmony_ci{ 3002bf215546Sopenharmony_ci const struct util_format_description *desc = vk_format_description(format); 3003bf215546Sopenharmony_ci 3004bf215546Sopenharmony_ci /* blit event can only do resolve for simple cases: 3005bf215546Sopenharmony_ci * averaging samples as unsigned integers or choosing only one sample 3006bf215546Sopenharmony_ci */ 3007bf215546Sopenharmony_ci if (vk_format_is_snorm(format) || vk_format_is_srgb(format)) 3008bf215546Sopenharmony_ci return false; 3009bf215546Sopenharmony_ci 3010bf215546Sopenharmony_ci /* can't do formats with larger channel sizes 3011bf215546Sopenharmony_ci * note: this includes all float formats 3012bf215546Sopenharmony_ci * note2: single channel integer formats seem OK 3013bf215546Sopenharmony_ci */ 3014bf215546Sopenharmony_ci if (desc->channel[0].size > 10) 3015bf215546Sopenharmony_ci return false; 3016bf215546Sopenharmony_ci 3017bf215546Sopenharmony_ci switch (format) { 3018bf215546Sopenharmony_ci /* for unknown reasons blit event can't msaa resolve these formats when tiled 3019bf215546Sopenharmony_ci * likely related to these formats having different layout from other cpp=2 formats 3020bf215546Sopenharmony_ci */ 3021bf215546Sopenharmony_ci case VK_FORMAT_R8G8_UNORM: 3022bf215546Sopenharmony_ci case VK_FORMAT_R8G8_UINT: 3023bf215546Sopenharmony_ci case VK_FORMAT_R8G8_SINT: 3024bf215546Sopenharmony_ci /* TODO: this one should be able to work? */ 3025bf215546Sopenharmony_ci case VK_FORMAT_D24_UNORM_S8_UINT: 3026bf215546Sopenharmony_ci return false; 3027bf215546Sopenharmony_ci default: 3028bf215546Sopenharmony_ci break; 3029bf215546Sopenharmony_ci } 3030bf215546Sopenharmony_ci 3031bf215546Sopenharmony_ci return true; 3032bf215546Sopenharmony_ci} 3033bf215546Sopenharmony_ci 3034bf215546Sopenharmony_cistatic void 3035bf215546Sopenharmony_citu_begin_load_store_cond_exec(struct tu_cmd_buffer *cmd, 3036bf215546Sopenharmony_ci struct tu_cs *cs, bool load) 3037bf215546Sopenharmony_ci{ 3038bf215546Sopenharmony_ci tu_cond_exec_start(cs, CP_COND_REG_EXEC_0_MODE(PRED_TEST)); 3039bf215546Sopenharmony_ci 3040bf215546Sopenharmony_ci if (!unlikely(cmd->device->physical_device->instance->debug_flags & 3041bf215546Sopenharmony_ci TU_DEBUG_LOG_SKIP_GMEM_OPS)) 3042bf215546Sopenharmony_ci return; 3043bf215546Sopenharmony_ci 3044bf215546Sopenharmony_ci uint64_t result_iova; 3045bf215546Sopenharmony_ci if (load) 3046bf215546Sopenharmony_ci result_iova = global_iova(cmd, dbg_gmem_taken_loads); 3047bf215546Sopenharmony_ci else 3048bf215546Sopenharmony_ci result_iova = global_iova(cmd, dbg_gmem_taken_stores); 3049bf215546Sopenharmony_ci 3050bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7); 3051bf215546Sopenharmony_ci tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B); 3052bf215546Sopenharmony_ci tu_cs_emit_qw(cs, result_iova); 3053bf215546Sopenharmony_ci tu_cs_emit_qw(cs, result_iova); 3054bf215546Sopenharmony_ci tu_cs_emit_qw(cs, global_iova(cmd, dbg_one)); 3055bf215546Sopenharmony_ci} 3056bf215546Sopenharmony_ci 3057bf215546Sopenharmony_cistatic void 3058bf215546Sopenharmony_citu_end_load_store_cond_exec(struct tu_cmd_buffer *cmd, 3059bf215546Sopenharmony_ci struct tu_cs *cs, bool load) 3060bf215546Sopenharmony_ci{ 3061bf215546Sopenharmony_ci tu_cond_exec_end(cs); 3062bf215546Sopenharmony_ci 3063bf215546Sopenharmony_ci if (!unlikely(cmd->device->physical_device->instance->debug_flags & 3064bf215546Sopenharmony_ci TU_DEBUG_LOG_SKIP_GMEM_OPS)) 3065bf215546Sopenharmony_ci return; 3066bf215546Sopenharmony_ci 3067bf215546Sopenharmony_ci uint64_t result_iova; 3068bf215546Sopenharmony_ci if (load) 3069bf215546Sopenharmony_ci result_iova = global_iova(cmd, dbg_gmem_total_loads); 3070bf215546Sopenharmony_ci else 3071bf215546Sopenharmony_ci result_iova = global_iova(cmd, dbg_gmem_total_stores); 3072bf215546Sopenharmony_ci 3073bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_MEM_TO_MEM, 7); 3074bf215546Sopenharmony_ci tu_cs_emit(cs, CP_MEM_TO_MEM_0_NEG_B); 3075bf215546Sopenharmony_ci tu_cs_emit_qw(cs, result_iova); 3076bf215546Sopenharmony_ci tu_cs_emit_qw(cs, result_iova); 3077bf215546Sopenharmony_ci tu_cs_emit_qw(cs, global_iova(cmd, dbg_one)); 3078bf215546Sopenharmony_ci} 3079bf215546Sopenharmony_ci 3080bf215546Sopenharmony_civoid 3081bf215546Sopenharmony_citu_load_gmem_attachment(struct tu_cmd_buffer *cmd, 3082bf215546Sopenharmony_ci struct tu_cs *cs, 3083bf215546Sopenharmony_ci uint32_t a, 3084bf215546Sopenharmony_ci bool cond_exec_allowed, 3085bf215546Sopenharmony_ci bool force_load) 3086bf215546Sopenharmony_ci{ 3087bf215546Sopenharmony_ci const struct tu_image_view *iview = cmd->state.attachments[a]; 3088bf215546Sopenharmony_ci const struct tu_render_pass_attachment *attachment = 3089bf215546Sopenharmony_ci &cmd->state.pass->attachments[a]; 3090bf215546Sopenharmony_ci 3091bf215546Sopenharmony_ci bool load_common = attachment->load || force_load; 3092bf215546Sopenharmony_ci bool load_stencil = 3093bf215546Sopenharmony_ci attachment->load_stencil || 3094bf215546Sopenharmony_ci (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load); 3095bf215546Sopenharmony_ci 3096bf215546Sopenharmony_ci if (!load_common && !load_stencil) 3097bf215546Sopenharmony_ci return; 3098bf215546Sopenharmony_ci 3099bf215546Sopenharmony_ci trace_start_gmem_load(&cmd->trace, cs); 3100bf215546Sopenharmony_ci 3101bf215546Sopenharmony_ci /* If attachment will be cleared by vkCmdClearAttachments - it is likely 3102bf215546Sopenharmony_ci * that it would be partially cleared, and since it is done by 2d blit 3103bf215546Sopenharmony_ci * it doesn't produce geometry, so we have to unconditionally load. 3104bf215546Sopenharmony_ci * 3105bf215546Sopenharmony_ci * To simplify conditions treat partially cleared separate DS as fully 3106bf215546Sopenharmony_ci * cleared and don't emit cond_exec. 3107bf215546Sopenharmony_ci */ 3108bf215546Sopenharmony_ci bool cond_exec = cond_exec_allowed && attachment->cond_load_allowed; 3109bf215546Sopenharmony_ci if (cond_exec) 3110bf215546Sopenharmony_ci tu_begin_load_store_cond_exec(cmd, cs, true); 3111bf215546Sopenharmony_ci 3112bf215546Sopenharmony_ci if (load_common) 3113bf215546Sopenharmony_ci tu_emit_blit(cmd, cs, iview, attachment, false, false); 3114bf215546Sopenharmony_ci 3115bf215546Sopenharmony_ci if (load_stencil) 3116bf215546Sopenharmony_ci tu_emit_blit(cmd, cs, iview, attachment, false, true); 3117bf215546Sopenharmony_ci 3118bf215546Sopenharmony_ci if (cond_exec) 3119bf215546Sopenharmony_ci tu_end_load_store_cond_exec(cmd, cs, true); 3120bf215546Sopenharmony_ci 3121bf215546Sopenharmony_ci trace_end_gmem_load(&cmd->trace, cs, attachment->format, force_load); 3122bf215546Sopenharmony_ci} 3123bf215546Sopenharmony_ci 3124bf215546Sopenharmony_cistatic void 3125bf215546Sopenharmony_cistore_cp_blit(struct tu_cmd_buffer *cmd, 3126bf215546Sopenharmony_ci struct tu_cs *cs, 3127bf215546Sopenharmony_ci const struct tu_image_view *iview, 3128bf215546Sopenharmony_ci uint32_t samples, 3129bf215546Sopenharmony_ci bool separate_stencil, 3130bf215546Sopenharmony_ci enum pipe_format src_format, 3131bf215546Sopenharmony_ci enum pipe_format dst_format, 3132bf215546Sopenharmony_ci uint32_t gmem_offset, 3133bf215546Sopenharmony_ci uint32_t cpp) 3134bf215546Sopenharmony_ci{ 3135bf215546Sopenharmony_ci r2d_setup_common(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, 3136bf215546Sopenharmony_ci iview->view.ubwc_enabled, true); 3137bf215546Sopenharmony_ci 3138bf215546Sopenharmony_ci if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 3139bf215546Sopenharmony_ci if (!separate_stencil) { 3140bf215546Sopenharmony_ci r2d_dst_depth(cs, iview, 0); 3141bf215546Sopenharmony_ci } else { 3142bf215546Sopenharmony_ci r2d_dst_stencil(cs, iview, 0); 3143bf215546Sopenharmony_ci } 3144bf215546Sopenharmony_ci } else { 3145bf215546Sopenharmony_ci r2d_dst(cs, &iview->view, 0, src_format); 3146bf215546Sopenharmony_ci } 3147bf215546Sopenharmony_ci 3148bf215546Sopenharmony_ci enum a6xx_format fmt = tu6_format_texture(src_format, TILE6_2).fmt; 3149bf215546Sopenharmony_ci fixup_src_format(&src_format, dst_format, &fmt); 3150bf215546Sopenharmony_ci 3151bf215546Sopenharmony_ci tu_cs_emit_regs(cs, 3152bf215546Sopenharmony_ci A6XX_SP_PS_2D_SRC_INFO( 3153bf215546Sopenharmony_ci .color_format = fmt, 3154bf215546Sopenharmony_ci .color_swap = WZYX, 3155bf215546Sopenharmony_ci .tile_mode = TILE6_2, 3156bf215546Sopenharmony_ci .srgb = util_format_is_srgb(src_format), 3157bf215546Sopenharmony_ci .samples = tu_msaa_samples(samples), 3158bf215546Sopenharmony_ci .samples_average = !util_format_is_pure_integer(dst_format) && 3159bf215546Sopenharmony_ci !util_format_is_depth_or_stencil(dst_format), 3160bf215546Sopenharmony_ci .unk20 = 1, 3161bf215546Sopenharmony_ci .unk22 = 1), 3162bf215546Sopenharmony_ci /* note: src size does not matter when not scaling */ 3163bf215546Sopenharmony_ci A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff), 3164bf215546Sopenharmony_ci A6XX_SP_PS_2D_SRC(.qword = cmd->device->physical_device->gmem_base + gmem_offset), 3165bf215546Sopenharmony_ci A6XX_SP_PS_2D_SRC_PITCH(.pitch = cmd->state.tiling->tile0.width * cpp)); 3166bf215546Sopenharmony_ci 3167bf215546Sopenharmony_ci /* sync GMEM writes with CACHE. */ 3168bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); 3169bf215546Sopenharmony_ci 3170bf215546Sopenharmony_ci /* Wait for CACHE_INVALIDATE to land */ 3171bf215546Sopenharmony_ci tu_cs_emit_wfi(cs); 3172bf215546Sopenharmony_ci 3173bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_BLIT, 1); 3174bf215546Sopenharmony_ci tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE)); 3175bf215546Sopenharmony_ci 3176bf215546Sopenharmony_ci /* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to 3177bf215546Sopenharmony_ci * sysmem, and we generally assume that GMEM renderpasses leave their 3178bf215546Sopenharmony_ci * results in sysmem, so we need to flush manually here. 3179bf215546Sopenharmony_ci */ 3180bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); 3181bf215546Sopenharmony_ci} 3182bf215546Sopenharmony_ci 3183bf215546Sopenharmony_cistatic void 3184bf215546Sopenharmony_cistore_3d_blit(struct tu_cmd_buffer *cmd, 3185bf215546Sopenharmony_ci struct tu_cs *cs, 3186bf215546Sopenharmony_ci const struct tu_image_view *iview, 3187bf215546Sopenharmony_ci uint32_t dst_samples, 3188bf215546Sopenharmony_ci bool separate_stencil, 3189bf215546Sopenharmony_ci enum pipe_format src_format, 3190bf215546Sopenharmony_ci enum pipe_format dst_format, 3191bf215546Sopenharmony_ci const VkRect2D *render_area, 3192bf215546Sopenharmony_ci uint32_t gmem_offset, 3193bf215546Sopenharmony_ci uint32_t cpp) 3194bf215546Sopenharmony_ci{ 3195bf215546Sopenharmony_ci /* RB_BIN_CONTROL/GRAS_BIN_CONTROL are normally only set once and they 3196bf215546Sopenharmony_ci * aren't set until we know whether we're HW binning or not, and we want to 3197bf215546Sopenharmony_ci * avoid a dependence on that here to be able to store attachments before 3198bf215546Sopenharmony_ci * the end of the renderpass in the future. Use the scratch space to 3199bf215546Sopenharmony_ci * save/restore them dynamically. 3200bf215546Sopenharmony_ci */ 3201bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_REG_TO_SCRATCH, 1); 3202bf215546Sopenharmony_ci tu_cs_emit(cs, CP_REG_TO_SCRATCH_0_REG(REG_A6XX_RB_BIN_CONTROL) | 3203bf215546Sopenharmony_ci CP_REG_TO_SCRATCH_0_SCRATCH(0) | 3204bf215546Sopenharmony_ci CP_REG_TO_SCRATCH_0_CNT(1 - 1)); 3205bf215546Sopenharmony_ci 3206bf215546Sopenharmony_ci r3d_setup(cmd, cs, src_format, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, 3207bf215546Sopenharmony_ci iview->view.ubwc_enabled, dst_samples); 3208bf215546Sopenharmony_ci 3209bf215546Sopenharmony_ci r3d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); 3210bf215546Sopenharmony_ci 3211bf215546Sopenharmony_ci if (iview->image->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) { 3212bf215546Sopenharmony_ci if (!separate_stencil) { 3213bf215546Sopenharmony_ci r3d_dst_depth(cs, iview, 0); 3214bf215546Sopenharmony_ci } else { 3215bf215546Sopenharmony_ci r3d_dst_stencil(cs, iview, 0); 3216bf215546Sopenharmony_ci } 3217bf215546Sopenharmony_ci } else { 3218bf215546Sopenharmony_ci r3d_dst(cs, &iview->view, 0, src_format); 3219bf215546Sopenharmony_ci } 3220bf215546Sopenharmony_ci 3221bf215546Sopenharmony_ci r3d_src_gmem(cmd, cs, iview, src_format, dst_format, gmem_offset, cpp); 3222bf215546Sopenharmony_ci 3223bf215546Sopenharmony_ci /* sync GMEM writes with CACHE. */ 3224bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE); 3225bf215546Sopenharmony_ci 3226bf215546Sopenharmony_ci /* Wait for CACHE_INVALIDATE to land */ 3227bf215546Sopenharmony_ci tu_cs_emit_wfi(cs); 3228bf215546Sopenharmony_ci 3229bf215546Sopenharmony_ci r3d_run(cmd, cs); 3230bf215546Sopenharmony_ci 3231bf215546Sopenharmony_ci r3d_teardown(cmd, cs); 3232bf215546Sopenharmony_ci 3233bf215546Sopenharmony_ci /* Draws write to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to 3234bf215546Sopenharmony_ci * sysmem, and we generally assume that GMEM renderpasses leave their 3235bf215546Sopenharmony_ci * results in sysmem, so we need to flush manually here. The 3d blit path 3236bf215546Sopenharmony_ci * writes to depth images as a color RT, so there's no need to flush depth. 3237bf215546Sopenharmony_ci */ 3238bf215546Sopenharmony_ci tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS); 3239bf215546Sopenharmony_ci 3240bf215546Sopenharmony_ci /* Restore RB_BIN_CONTROL/GRAS_BIN_CONTROL saved above. */ 3241bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); 3242bf215546Sopenharmony_ci tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_RB_BIN_CONTROL) | 3243bf215546Sopenharmony_ci CP_SCRATCH_TO_REG_0_SCRATCH(0) | 3244bf215546Sopenharmony_ci CP_SCRATCH_TO_REG_0_CNT(1 - 1)); 3245bf215546Sopenharmony_ci 3246bf215546Sopenharmony_ci tu_cs_emit_pkt7(cs, CP_SCRATCH_TO_REG, 1); 3247bf215546Sopenharmony_ci tu_cs_emit(cs, CP_SCRATCH_TO_REG_0_REG(REG_A6XX_GRAS_BIN_CONTROL) | 3248bf215546Sopenharmony_ci CP_SCRATCH_TO_REG_0_SCRATCH(0) | 3249bf215546Sopenharmony_ci CP_SCRATCH_TO_REG_0_CNT(1 - 1)); 3250bf215546Sopenharmony_ci} 3251bf215546Sopenharmony_ci 3252bf215546Sopenharmony_cistatic bool 3253bf215546Sopenharmony_citu_attachment_store_unaligned(struct tu_cmd_buffer *cmd, uint32_t a) 3254bf215546Sopenharmony_ci{ 3255bf215546Sopenharmony_ci struct tu_physical_device *phys_dev = cmd->device->physical_device; 3256bf215546Sopenharmony_ci const struct tu_image_view *iview = cmd->state.attachments[a]; 3257bf215546Sopenharmony_ci const VkRect2D *render_area = &cmd->state.render_area; 3258bf215546Sopenharmony_ci 3259bf215546Sopenharmony_ci /* Unaligned store is incredibly rare in CTS, we have to force it to test. */ 3260bf215546Sopenharmony_ci if (unlikely(cmd->device->physical_device->instance->debug_flags & TU_DEBUG_UNALIGNED_STORE)) 3261bf215546Sopenharmony_ci return true; 3262bf215546Sopenharmony_ci 3263bf215546Sopenharmony_ci uint32_t x1 = render_area->offset.x; 3264bf215546Sopenharmony_ci uint32_t y1 = render_area->offset.y; 3265bf215546Sopenharmony_ci uint32_t x2 = x1 + render_area->extent.width; 3266bf215546Sopenharmony_ci uint32_t y2 = y1 + render_area->extent.height; 3267bf215546Sopenharmony_ci /* x2/y2 can be unaligned if equal to the size of the image, since it will 3268bf215546Sopenharmony_ci * write into padding space. The one exception is linear levels which don't 3269bf215546Sopenharmony_ci * have the required y padding in the layout (except for the last level) 3270bf215546Sopenharmony_ci */ 3271bf215546Sopenharmony_ci bool need_y2_align = 3272bf215546Sopenharmony_ci y2 != iview->view.height || iview->view.need_y2_align; 3273bf215546Sopenharmony_ci 3274bf215546Sopenharmony_ci return (x1 % phys_dev->info->gmem_align_w || 3275bf215546Sopenharmony_ci (x2 % phys_dev->info->gmem_align_w && x2 != iview->view.width) || 3276bf215546Sopenharmony_ci y1 % phys_dev->info->gmem_align_h || 3277bf215546Sopenharmony_ci (y2 % phys_dev->info->gmem_align_h && need_y2_align)); 3278bf215546Sopenharmony_ci} 3279bf215546Sopenharmony_ci 3280bf215546Sopenharmony_ci/* Choose the GMEM layout (use the CCU space or not) based on whether the 3281bf215546Sopenharmony_ci * current attachments will need. This has to happen at vkBeginRenderPass() 3282bf215546Sopenharmony_ci * time because tu_attachment_store_unaligned() looks at the image views, which 3283bf215546Sopenharmony_ci * are only available at that point. This should match the logic for the 3284bf215546Sopenharmony_ci * !unaligned case in tu_store_gmem_attachment(). 3285bf215546Sopenharmony_ci */ 3286bf215546Sopenharmony_civoid 3287bf215546Sopenharmony_citu_choose_gmem_layout(struct tu_cmd_buffer *cmd) 3288bf215546Sopenharmony_ci{ 3289bf215546Sopenharmony_ci cmd->state.gmem_layout = TU_GMEM_LAYOUT_FULL; 3290bf215546Sopenharmony_ci 3291bf215546Sopenharmony_ci for (unsigned i = 0; i < cmd->state.pass->attachment_count; i++) { 3292bf215546Sopenharmony_ci if (!cmd->state.attachments[i]) 3293bf215546Sopenharmony_ci continue; 3294bf215546Sopenharmony_ci 3295bf215546Sopenharmony_ci struct tu_render_pass_attachment *att = 3296bf215546Sopenharmony_ci &cmd->state.pass->attachments[i]; 3297bf215546Sopenharmony_ci if ((att->store || att->store_stencil) && 3298bf215546Sopenharmony_ci tu_attachment_store_unaligned(cmd, i)) 3299bf215546Sopenharmony_ci cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU; 3300bf215546Sopenharmony_ci if (att->will_be_resolved && !blit_can_resolve(att->format)) 3301bf215546Sopenharmony_ci cmd->state.gmem_layout = TU_GMEM_LAYOUT_AVOID_CCU; 3302bf215546Sopenharmony_ci } 3303bf215546Sopenharmony_ci 3304bf215546Sopenharmony_ci cmd->state.tiling = &cmd->state.framebuffer->tiling[cmd->state.gmem_layout]; 3305bf215546Sopenharmony_ci} 3306bf215546Sopenharmony_ci 3307bf215546Sopenharmony_civoid 3308bf215546Sopenharmony_citu_store_gmem_attachment(struct tu_cmd_buffer *cmd, 3309bf215546Sopenharmony_ci struct tu_cs *cs, 3310bf215546Sopenharmony_ci uint32_t a, 3311bf215546Sopenharmony_ci uint32_t gmem_a, 3312bf215546Sopenharmony_ci bool cond_exec_allowed) 3313bf215546Sopenharmony_ci{ 3314bf215546Sopenharmony_ci const VkRect2D *render_area = &cmd->state.render_area; 3315bf215546Sopenharmony_ci struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a]; 3316bf215546Sopenharmony_ci const struct tu_image_view *iview = cmd->state.attachments[a]; 3317bf215546Sopenharmony_ci struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a]; 3318bf215546Sopenharmony_ci 3319bf215546Sopenharmony_ci if (!dst->store && !dst->store_stencil) 3320bf215546Sopenharmony_ci return; 3321bf215546Sopenharmony_ci 3322bf215546Sopenharmony_ci /* Unconditional store should happen only if attachment was cleared, 3323bf215546Sopenharmony_ci * which could have happened either by load_op or via vkCmdClearAttachments. 3324bf215546Sopenharmony_ci */ 3325bf215546Sopenharmony_ci bool cond_exec = cond_exec_allowed && src->cond_store_allowed; 3326bf215546Sopenharmony_ci if (cond_exec) { 3327bf215546Sopenharmony_ci tu_begin_load_store_cond_exec(cmd, cs, false); 3328bf215546Sopenharmony_ci } 3329bf215546Sopenharmony_ci 3330bf215546Sopenharmony_ci bool unaligned = tu_attachment_store_unaligned(cmd, a); 3331bf215546Sopenharmony_ci 3332bf215546Sopenharmony_ci /* D32_SFLOAT_S8_UINT is quite special format: it has two planes, 3333bf215546Sopenharmony_ci * one for depth and other for stencil. When resolving a MSAA 3334bf215546Sopenharmony_ci * D32_SFLOAT_S8_UINT to S8_UINT, we need to take that into account. 3335bf215546Sopenharmony_ci */ 3336bf215546Sopenharmony_ci bool resolve_d32s8_s8 = 3337bf215546Sopenharmony_ci src->format == VK_FORMAT_D32_SFLOAT_S8_UINT && 3338bf215546Sopenharmony_ci dst->format == VK_FORMAT_S8_UINT; 3339bf215546Sopenharmony_ci 3340bf215546Sopenharmony_ci /* The fast path doesn't support picking out the last component of a D24S8 3341bf215546Sopenharmony_ci * texture reinterpreted as RGBA8_UNORM. 3342bf215546Sopenharmony_ci */ 3343bf215546Sopenharmony_ci bool resolve_d24s8_s8 = 3344bf215546Sopenharmony_ci src->format == VK_FORMAT_D24_UNORM_S8_UINT && 3345bf215546Sopenharmony_ci dst->format == VK_FORMAT_S8_UINT; 3346bf215546Sopenharmony_ci 3347bf215546Sopenharmony_ci bool store_common = dst->store && !resolve_d32s8_s8; 3348bf215546Sopenharmony_ci bool store_separate_stencil = dst->store_stencil || resolve_d32s8_s8; 3349bf215546Sopenharmony_ci 3350bf215546Sopenharmony_ci trace_start_gmem_store(&cmd->trace, cs); 3351bf215546Sopenharmony_ci 3352bf215546Sopenharmony_ci /* use fast path when render area is aligned, except for unsupported resolve cases */ 3353bf215546Sopenharmony_ci if (!unaligned && !resolve_d24s8_s8 && 3354bf215546Sopenharmony_ci (a == gmem_a || blit_can_resolve(dst->format))) { 3355bf215546Sopenharmony_ci if (store_common) 3356bf215546Sopenharmony_ci tu_emit_blit(cmd, cs, iview, src, true, false); 3357bf215546Sopenharmony_ci if (store_separate_stencil) 3358bf215546Sopenharmony_ci tu_emit_blit(cmd, cs, iview, src, true, true); 3359bf215546Sopenharmony_ci 3360bf215546Sopenharmony_ci if (cond_exec) { 3361bf215546Sopenharmony_ci tu_end_load_store_cond_exec(cmd, cs, false); 3362bf215546Sopenharmony_ci } 3363bf215546Sopenharmony_ci 3364bf215546Sopenharmony_ci trace_end_gmem_store(&cmd->trace, cs, dst->format, true, false); 3365bf215546Sopenharmony_ci return; 3366bf215546Sopenharmony_ci } 3367bf215546Sopenharmony_ci 3368bf215546Sopenharmony_ci assert(cmd->state.gmem_layout == TU_GMEM_LAYOUT_AVOID_CCU); 3369bf215546Sopenharmony_ci 3370bf215546Sopenharmony_ci enum pipe_format src_format = tu_vk_format_to_pipe_format(src->format); 3371bf215546Sopenharmony_ci if (src_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) 3372bf215546Sopenharmony_ci src_format = PIPE_FORMAT_Z32_FLOAT; 3373bf215546Sopenharmony_ci 3374bf215546Sopenharmony_ci enum pipe_format dst_format = tu_vk_format_to_pipe_format(dst->format); 3375bf215546Sopenharmony_ci if (dst_format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT) 3376bf215546Sopenharmony_ci dst_format = PIPE_FORMAT_Z32_FLOAT; 3377bf215546Sopenharmony_ci 3378bf215546Sopenharmony_ci if (dst->samples > 1) { 3379bf215546Sopenharmony_ci /* If we hit this path, we have to disable draw states after every tile 3380bf215546Sopenharmony_ci * instead of once at the end of the renderpass, so that they aren't 3381bf215546Sopenharmony_ci * executed when calling CP_DRAW. 3382bf215546Sopenharmony_ci * 3383bf215546Sopenharmony_ci * TODO: store a flag somewhere so we don't do this more than once and 3384bf215546Sopenharmony_ci * don't do it after the renderpass when this happens. 3385bf215546Sopenharmony_ci */ 3386bf215546Sopenharmony_ci if (store_common || store_separate_stencil) 3387bf215546Sopenharmony_ci tu_disable_draw_states(cmd, cs); 3388bf215546Sopenharmony_ci 3389bf215546Sopenharmony_ci if (store_common) { 3390bf215546Sopenharmony_ci store_3d_blit(cmd, cs, iview, dst->samples, false, src_format, 3391bf215546Sopenharmony_ci dst_format, render_area, tu_attachment_gmem_offset(cmd, src), src->cpp); 3392bf215546Sopenharmony_ci } 3393bf215546Sopenharmony_ci if (store_separate_stencil) { 3394bf215546Sopenharmony_ci store_3d_blit(cmd, cs, iview, dst->samples, true, PIPE_FORMAT_S8_UINT, 3395bf215546Sopenharmony_ci PIPE_FORMAT_S8_UINT, render_area, 3396bf215546Sopenharmony_ci tu_attachment_gmem_offset_stencil(cmd, src), src->samples); 3397bf215546Sopenharmony_ci } 3398bf215546Sopenharmony_ci } else { 3399bf215546Sopenharmony_ci r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent); 3400bf215546Sopenharmony_ci 3401bf215546Sopenharmony_ci if (store_common) { 3402bf215546Sopenharmony_ci store_cp_blit(cmd, cs, iview, src->samples, false, src_format, 3403bf215546Sopenharmony_ci dst_format, tu_attachment_gmem_offset(cmd, src), src->cpp); 3404bf215546Sopenharmony_ci } 3405bf215546Sopenharmony_ci if (store_separate_stencil) { 3406bf215546Sopenharmony_ci store_cp_blit(cmd, cs, iview, src->samples, true, PIPE_FORMAT_S8_UINT, 3407bf215546Sopenharmony_ci PIPE_FORMAT_S8_UINT, tu_attachment_gmem_offset_stencil(cmd, src), src->samples); 3408bf215546Sopenharmony_ci } 3409bf215546Sopenharmony_ci } 3410bf215546Sopenharmony_ci 3411bf215546Sopenharmony_ci if (cond_exec) { 3412bf215546Sopenharmony_ci tu_end_load_store_cond_exec(cmd, cs, false); 3413bf215546Sopenharmony_ci } 3414bf215546Sopenharmony_ci 3415bf215546Sopenharmony_ci trace_end_gmem_store(&cmd->trace, cs, dst->format, false, unaligned); 3416bf215546Sopenharmony_ci} 3417