1/* 2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> 3 * Copyright © 2018 Google, Inc. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 * Authors: 25 * Rob Clark <robclark@freedesktop.org> 26 */ 27 28#include <stdio.h> 29 30#include "pipe/p_state.h" 31#include "util/format/u_format.h" 32#include "util/u_inlines.h" 33#include "util/u_memory.h" 34#include "util/u_string.h" 35 36#include "freedreno_draw.h" 37#include "freedreno_resource.h" 38#include "freedreno_state.h" 39#include "freedreno_tracepoints.h" 40 41#include "fd6_blitter.h" 42#include "fd6_context.h" 43#include "fd6_draw.h" 44#include "fd6_emit.h" 45#include "fd6_gmem.h" 46#include "fd6_pack.h" 47#include "fd6_program.h" 48#include "fd6_resource.h" 49#include "fd6_zsa.h" 50 51/** 52 * Emits the flags registers, suitable for RB_MRT_FLAG_BUFFER, 53 * RB_DEPTH_FLAG_BUFFER, SP_PS_2D_SRC_FLAGS, and RB_BLIT_FLAG_DST. 54 */ 55void 56fd6_emit_flag_reference(struct fd_ringbuffer *ring, struct fd_resource *rsc, 57 int level, int layer) 58{ 59 if (fd_resource_ubwc_enabled(rsc, level)) { 60 OUT_RELOC(ring, rsc->bo, fd_resource_ubwc_offset(rsc, level, layer), 0, 61 0); 62 OUT_RING(ring, A6XX_RB_MRT_FLAG_BUFFER_PITCH_PITCH( 63 fdl_ubwc_pitch(&rsc->layout, level)) | 64 A6XX_RB_MRT_FLAG_BUFFER_PITCH_ARRAY_PITCH( 65 rsc->layout.ubwc_layer_size >> 2)); 66 } else { 67 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ 68 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ 69 OUT_RING(ring, 0x00000000); 70 } 71} 72 73static void 74emit_mrt(struct fd_ringbuffer *ring, struct pipe_framebuffer_state *pfb, 75 const struct fd_gmem_stateobj *gmem) 76{ 77 unsigned srgb_cntl = 0; 78 unsigned i; 79 80 /* Note, GLES 3.2 says "If the fragment’s layer number is negative, or 81 * greater than or equal to the minimum number of layers of any attachment, 82 * the effects of the fragment on the framebuffer contents are undefined." 83 */ 84 unsigned max_layer_index = 0; 85 enum a6xx_format mrt0_format = 0; 86 87 for (i = 0; i < pfb->nr_cbufs; i++) { 88 enum a3xx_color_swap swap = WZYX; 89 bool sint = false, uint = false; 90 struct fd_resource *rsc = NULL; 91 struct fdl_slice *slice = NULL; 92 uint32_t stride = 0; 93 uint32_t array_stride = 0; 94 uint32_t offset; 95 96 if (!pfb->cbufs[i]) 97 continue; 98 99 struct pipe_surface *psurf = pfb->cbufs[i]; 100 enum pipe_format pformat = psurf->format; 101 rsc = fd_resource(psurf->texture); 102 if (!rsc->bo) 103 continue; 104 105 uint32_t base = gmem ? gmem->cbuf_base[i] : 0; 106 slice = fd_resource_slice(rsc, psurf->u.tex.level); 107 uint32_t tile_mode = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level); 108 enum a6xx_format format = fd6_color_format(pformat, tile_mode); 109 sint = util_format_is_pure_sint(pformat); 110 uint = util_format_is_pure_uint(pformat); 111 112 if (util_format_is_srgb(pformat)) 113 srgb_cntl |= (1 << i); 114 115 offset = 116 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); 117 118 stride = fd_resource_pitch(rsc, psurf->u.tex.level); 119 array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level); 120 swap = fd6_color_swap(pformat, rsc->layout.tile_mode); 121 122 max_layer_index = psurf->u.tex.last_layer - psurf->u.tex.first_layer; 123 124 assert((offset + slice->size0) <= fd_bo_size(rsc->bo)); 125 126 OUT_REG( 127 ring, 128 A6XX_RB_MRT_BUF_INFO(i, .color_format = format, 129 .color_tile_mode = tile_mode, .color_swap = swap), 130 A6XX_RB_MRT_PITCH(i, .a6xx_rb_mrt_pitch = stride), 131 A6XX_RB_MRT_ARRAY_PITCH(i, .a6xx_rb_mrt_array_pitch = array_stride), 132 A6XX_RB_MRT_BASE(i, .bo = rsc->bo, .bo_offset = offset), 133 A6XX_RB_MRT_BASE_GMEM(i, .unknown = base)); 134 135 OUT_REG(ring, A6XX_SP_FS_MRT_REG(i, .color_format = format, 136 .color_sint = sint, .color_uint = uint)); 137 138 OUT_PKT4(ring, REG_A6XX_RB_MRT_FLAG_BUFFER(i), 3); 139 fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level, 140 psurf->u.tex.first_layer); 141 142 if (i == 0) 143 mrt0_format = format; 144 } 145 if (pfb->zsbuf) 146 max_layer_index = pfb->zsbuf->u.tex.last_layer - pfb->zsbuf->u.tex.first_layer; 147 148 OUT_REG(ring, A6XX_GRAS_LRZ_MRT_BUF_INFO_0(.color_format = mrt0_format)); 149 150 OUT_REG(ring, A6XX_RB_SRGB_CNTL(.dword = srgb_cntl)); 151 OUT_REG(ring, A6XX_SP_SRGB_CNTL(.dword = srgb_cntl)); 152 153 OUT_REG(ring, A6XX_GRAS_MAX_LAYER_INDEX(max_layer_index)); 154} 155 156static void 157emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, 158 const struct fd_gmem_stateobj *gmem) 159{ 160 if (zsbuf) { 161 struct fd_resource *rsc = fd_resource(zsbuf->texture); 162 enum a6xx_depth_format fmt = fd6_pipe2depth(zsbuf->format); 163 uint32_t stride = fd_resource_pitch(rsc, zsbuf->u.tex.level); 164 uint32_t array_stride = fd_resource_layer_stride(rsc, zsbuf->u.tex.level); 165 uint32_t base = gmem ? gmem->zsbuf_base[0] : 0; 166 uint32_t offset = 167 fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer); 168 169 OUT_REG( 170 ring, A6XX_RB_DEPTH_BUFFER_INFO(.depth_format = fmt), 171 A6XX_RB_DEPTH_BUFFER_PITCH(.a6xx_rb_depth_buffer_pitch = stride), 172 A6XX_RB_DEPTH_BUFFER_ARRAY_PITCH(.a6xx_rb_depth_buffer_array_pitch = 173 array_stride), 174 A6XX_RB_DEPTH_BUFFER_BASE(.bo = rsc->bo, .bo_offset = offset), 175 A6XX_RB_DEPTH_BUFFER_BASE_GMEM(.dword = base)); 176 177 OUT_REG(ring, A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = fmt)); 178 179 OUT_PKT4(ring, REG_A6XX_RB_DEPTH_FLAG_BUFFER_BASE, 3); 180 fd6_emit_flag_reference(ring, rsc, zsbuf->u.tex.level, 181 zsbuf->u.tex.first_layer); 182 183 if (rsc->lrz) { 184 OUT_REG(ring, A6XX_GRAS_LRZ_BUFFER_BASE(.bo = rsc->lrz), 185 A6XX_GRAS_LRZ_BUFFER_PITCH(.pitch = rsc->lrz_pitch), 186 // XXX a6xx seems to use a different buffer here.. not sure 187 // what for.. 188 A6XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE()); 189 } else { 190 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5); 191 OUT_RING(ring, 0x00000000); 192 OUT_RING(ring, 0x00000000); 193 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ 194 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */ 195 OUT_RING(ring, 0x00000000); 196 } 197 198 /* NOTE: blob emits GRAS_LRZ_CNTL plus GRAZ_LRZ_BUFFER_BASE 199 * plus this CP_EVENT_WRITE at the end in it's own IB.. 200 */ 201 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 202 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(LRZ_CLEAR)); 203 204 if (rsc->stencil) { 205 stride = fd_resource_pitch(rsc->stencil, zsbuf->u.tex.level); 206 array_stride = fd_resource_layer_stride(rsc->stencil, zsbuf->u.tex.level); 207 uint32_t base = gmem ? gmem->zsbuf_base[1] : 0; 208 uint32_t offset = 209 fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer); 210 211 OUT_REG(ring, A6XX_RB_STENCIL_INFO(.separate_stencil = true), 212 A6XX_RB_STENCIL_BUFFER_PITCH(.a6xx_rb_stencil_buffer_pitch = 213 stride), 214 A6XX_RB_STENCIL_BUFFER_ARRAY_PITCH( 215 .a6xx_rb_stencil_buffer_array_pitch = array_stride), 216 A6XX_RB_STENCIL_BUFFER_BASE(.bo = rsc->stencil->bo, .bo_offset = offset), 217 A6XX_RB_STENCIL_BUFFER_BASE_GMEM(.dword = base)); 218 } else { 219 OUT_REG(ring, A6XX_RB_STENCIL_INFO(0)); 220 } 221 } else { 222 OUT_PKT4(ring, REG_A6XX_RB_DEPTH_BUFFER_INFO, 6); 223 OUT_RING(ring, A6XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH6_NONE)); 224 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ 225 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ 226 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ 227 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ 228 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_GMEM */ 229 230 OUT_REG(ring, 231 A6XX_GRAS_SU_DEPTH_BUFFER_INFO(.depth_format = DEPTH6_NONE)); 232 233 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_BUFFER_BASE, 5); 234 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ 235 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ 236 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ 237 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO */ 238 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI */ 239 240 OUT_REG(ring, A6XX_RB_STENCIL_INFO(0)); 241 } 242} 243 244static bool 245use_hw_binning(struct fd_batch *batch) 246{ 247 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 248 249 if ((gmem->maxpw * gmem->maxph) > 32) 250 return false; 251 252 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) >= 2) && 253 (batch->num_draws > 0); 254} 255 256static void 257patch_fb_read_gmem(struct fd_batch *batch) 258{ 259 unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches); 260 if (!num_patches) 261 return; 262 263 struct fd_screen *screen = batch->ctx->screen; 264 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 265 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 266 struct pipe_surface *psurf = pfb->cbufs[0]; 267 struct pipe_resource *prsc = psurf->texture; 268 struct fd_resource *rsc = fd_resource(prsc); 269 enum pipe_format format = psurf->format; 270 271 uint8_t swiz[4]; 272 fdl6_format_swiz(psurf->format, false, swiz); 273 274 /* always TILE6_2 mode in GMEM, which also means no swap: */ 275 uint32_t texconst0 = A6XX_TEX_CONST_0_FMT(fd6_texture_format(format, rsc->layout.tile_mode)) | 276 A6XX_TEX_CONST_0_SAMPLES(fd_msaa_samples(prsc->nr_samples)) | 277 A6XX_TEX_CONST_0_SWAP(WZYX) | 278 A6XX_TEX_CONST_0_TILE_MODE(TILE6_2) | 279 COND(util_format_is_srgb(format), A6XX_TEX_CONST_0_SRGB) | 280 A6XX_TEX_CONST_0_SWIZ_X(fdl6_swiz(swiz[0])) | 281 A6XX_TEX_CONST_0_SWIZ_Y(fdl6_swiz(swiz[1])) | 282 A6XX_TEX_CONST_0_SWIZ_Z(fdl6_swiz(swiz[2])) | 283 A6XX_TEX_CONST_0_SWIZ_W(fdl6_swiz(swiz[3])); 284 285 for (unsigned i = 0; i < num_patches; i++) { 286 struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i); 287 patch->cs[0] = texconst0; 288 patch->cs[2] = A6XX_TEX_CONST_2_PITCH(gmem->bin_w * gmem->cbuf_cpp[0]) | 289 A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D); 290 patch->cs[4] = A6XX_TEX_CONST_4_BASE_LO(screen->gmem_base); 291 patch->cs[5] = A6XX_TEX_CONST_5_BASE_HI(screen->gmem_base >> 32) | 292 A6XX_TEX_CONST_5_DEPTH(1); 293 } 294 util_dynarray_clear(&batch->fb_read_patches); 295} 296 297static void 298patch_fb_read_sysmem(struct fd_batch *batch) 299{ 300 unsigned num_patches = fd_patch_num_elements(&batch->fb_read_patches); 301 if (!num_patches) 302 return; 303 304 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 305 struct pipe_surface *psurf = pfb->cbufs[0]; 306 if (!psurf) 307 return; 308 309 struct fd_resource *rsc = fd_resource(psurf->texture); 310 311 uint32_t block_width, block_height; 312 fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height); 313 314 struct fdl_view_args args = { 315 .iova = fd_bo_get_iova(rsc->bo), 316 317 .base_miplevel = psurf->u.tex.level, 318 .level_count = 1, 319 320 .base_array_layer = psurf->u.tex.first_layer, 321 .layer_count = 1, 322 323 .format = psurf->format, 324 .swiz = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W}, 325 326 .type = FDL_VIEW_TYPE_2D, 327 .chroma_offsets = {FDL_CHROMA_LOCATION_COSITED_EVEN, 328 FDL_CHROMA_LOCATION_COSITED_EVEN}, 329 }; 330 const struct fdl_layout *layouts[3] = {&rsc->layout, NULL, NULL}; 331 struct fdl6_view view; 332 fdl6_view_init(&view, layouts, &args, 333 batch->ctx->screen->info->a6xx.has_z24uint_s8uint); 334 335 for (unsigned i = 0; i < num_patches; i++) { 336 struct fd_cs_patch *patch = fd_patch_element(&batch->fb_read_patches, i); 337 338 /* This is cheating a bit, since we can't use OUT_RELOC() here.. but 339 * the render target will already have a reloc emitted for RB_MRT state, 340 * so we can get away with manually patching in the address here: 341 */ 342 memcpy(patch->cs, view.descriptor, FDL6_TEX_CONST_DWORDS * 4); 343 } 344 util_dynarray_clear(&batch->fb_read_patches); 345} 346 347static void 348update_render_cntl(struct fd_batch *batch, struct pipe_framebuffer_state *pfb, 349 bool binning) 350{ 351 struct fd_ringbuffer *ring = batch->gmem; 352 struct fd_screen *screen = batch->ctx->screen; 353 uint32_t cntl = 0; 354 bool depth_ubwc_enable = false; 355 uint32_t mrts_ubwc_enable = 0; 356 int i; 357 358 if (pfb->zsbuf) { 359 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 360 depth_ubwc_enable = 361 fd_resource_ubwc_enabled(rsc, pfb->zsbuf->u.tex.level); 362 } 363 364 for (i = 0; i < pfb->nr_cbufs; i++) { 365 if (!pfb->cbufs[i]) 366 continue; 367 368 struct pipe_surface *psurf = pfb->cbufs[i]; 369 struct fd_resource *rsc = fd_resource(psurf->texture); 370 if (!rsc->bo) 371 continue; 372 373 if (fd_resource_ubwc_enabled(rsc, psurf->u.tex.level)) 374 mrts_ubwc_enable |= 1 << i; 375 } 376 377 cntl |= A6XX_RB_RENDER_CNTL_CCUSINGLECACHELINESIZE(2); 378 if (binning) 379 cntl |= A6XX_RB_RENDER_CNTL_BINNING; 380 381 if (screen->info->a6xx.has_cp_reg_write) { 382 OUT_PKT7(ring, CP_REG_WRITE, 3); 383 OUT_RING(ring, CP_REG_WRITE_0_TRACKER(TRACK_RENDER_CNTL)); 384 OUT_RING(ring, REG_A6XX_RB_RENDER_CNTL); 385 } else { 386 OUT_PKT4(ring, REG_A6XX_RB_RENDER_CNTL, 1); 387 } 388 OUT_RING(ring, cntl | 389 COND(depth_ubwc_enable, A6XX_RB_RENDER_CNTL_FLAG_DEPTH) | 390 A6XX_RB_RENDER_CNTL_FLAG_MRTS(mrts_ubwc_enable)); 391} 392 393/* extra size to store VSC_DRAW_STRM_SIZE: */ 394#define VSC_DRAW_STRM_SIZE(pitch) ((pitch)*32 + 0x100) 395#define VSC_PRIM_STRM_SIZE(pitch) ((pitch)*32) 396 397static void 398update_vsc_pipe(struct fd_batch *batch) 399{ 400 struct fd_context *ctx = batch->ctx; 401 struct fd6_context *fd6_ctx = fd6_context(ctx); 402 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 403 struct fd_ringbuffer *ring = batch->gmem; 404 int i; 405 406 if (batch->draw_strm_bits / 8 > fd6_ctx->vsc_draw_strm_pitch) { 407 if (fd6_ctx->vsc_draw_strm) 408 fd_bo_del(fd6_ctx->vsc_draw_strm); 409 fd6_ctx->vsc_draw_strm = NULL; 410 /* Note: probably only need to align to 0x40, but aligning stronger 411 * reduces the odds that we will have to realloc again on the next 412 * frame: 413 */ 414 fd6_ctx->vsc_draw_strm_pitch = align(batch->draw_strm_bits / 8, 0x4000); 415 mesa_logd("pre-resize VSC_DRAW_STRM_PITCH to: 0x%x", 416 fd6_ctx->vsc_draw_strm_pitch); 417 } 418 419 if (batch->prim_strm_bits / 8 > fd6_ctx->vsc_prim_strm_pitch) { 420 if (fd6_ctx->vsc_prim_strm) 421 fd_bo_del(fd6_ctx->vsc_prim_strm); 422 fd6_ctx->vsc_prim_strm = NULL; 423 fd6_ctx->vsc_prim_strm_pitch = align(batch->prim_strm_bits / 8, 0x4000); 424 mesa_logd("pre-resize VSC_PRIM_STRM_PITCH to: 0x%x", 425 fd6_ctx->vsc_prim_strm_pitch); 426 } 427 428 if (!fd6_ctx->vsc_draw_strm) { 429 fd6_ctx->vsc_draw_strm = fd_bo_new( 430 ctx->screen->dev, VSC_DRAW_STRM_SIZE(fd6_ctx->vsc_draw_strm_pitch), 431 FD_BO_NOMAP, "vsc_draw_strm"); 432 } 433 434 if (!fd6_ctx->vsc_prim_strm) { 435 fd6_ctx->vsc_prim_strm = fd_bo_new( 436 ctx->screen->dev, VSC_PRIM_STRM_SIZE(fd6_ctx->vsc_prim_strm_pitch), 437 FD_BO_NOMAP, "vsc_prim_strm"); 438 } 439 440 OUT_REG( 441 ring, A6XX_VSC_BIN_SIZE(.width = gmem->bin_w, .height = gmem->bin_h), 442 A6XX_VSC_DRAW_STRM_SIZE_ADDRESS(.bo = fd6_ctx->vsc_draw_strm, 443 .bo_offset = 444 32 * fd6_ctx->vsc_draw_strm_pitch)); 445 446 OUT_REG(ring, A6XX_VSC_BIN_COUNT(.nx = gmem->nbins_x, .ny = gmem->nbins_y)); 447 448 OUT_PKT4(ring, REG_A6XX_VSC_PIPE_CONFIG_REG(0), 32); 449 for (i = 0; i < 32; i++) { 450 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i]; 451 OUT_RING(ring, A6XX_VSC_PIPE_CONFIG_REG_X(pipe->x) | 452 A6XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) | 453 A6XX_VSC_PIPE_CONFIG_REG_W(pipe->w) | 454 A6XX_VSC_PIPE_CONFIG_REG_H(pipe->h)); 455 } 456 457 OUT_REG( 458 ring, A6XX_VSC_PRIM_STRM_ADDRESS(.bo = fd6_ctx->vsc_prim_strm), 459 A6XX_VSC_PRIM_STRM_PITCH(.dword = fd6_ctx->vsc_prim_strm_pitch), 460 A6XX_VSC_PRIM_STRM_LIMIT(.dword = fd6_ctx->vsc_prim_strm_pitch - 64)); 461 462 OUT_REG( 463 ring, A6XX_VSC_DRAW_STRM_ADDRESS(.bo = fd6_ctx->vsc_draw_strm), 464 A6XX_VSC_DRAW_STRM_PITCH(.dword = fd6_ctx->vsc_draw_strm_pitch), 465 A6XX_VSC_DRAW_STRM_LIMIT(.dword = fd6_ctx->vsc_draw_strm_pitch - 64)); 466} 467 468/* 469 * If overflow is detected, either 0x1 (VSC_DRAW_STRM overflow) or 0x3 470 * (VSC_PRIM_STRM overflow) plus the size of the overflowed buffer is 471 * written to control->vsc_overflow. This allows the CPU to 472 * detect which buffer overflowed (and, since the current size is 473 * encoded as well, this protects against already-submitted but 474 * not executed batches from fooling the CPU into increasing the 475 * size again unnecessarily). 476 */ 477static void 478emit_vsc_overflow_test(struct fd_batch *batch) 479{ 480 struct fd_ringbuffer *ring = batch->gmem; 481 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 482 struct fd6_context *fd6_ctx = fd6_context(batch->ctx); 483 484 assert((fd6_ctx->vsc_draw_strm_pitch & 0x3) == 0); 485 assert((fd6_ctx->vsc_prim_strm_pitch & 0x3) == 0); 486 487 /* Check for overflow, write vsc_scratch if detected: */ 488 for (int i = 0; i < gmem->num_vsc_pipes; i++) { 489 OUT_PKT7(ring, CP_COND_WRITE5, 8); 490 OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) | 491 CP_COND_WRITE5_0_WRITE_MEMORY); 492 OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO( 493 REG_A6XX_VSC_DRAW_STRM_SIZE_REG(i))); 494 OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0)); 495 OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_draw_strm_pitch - 64)); 496 OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0)); 497 OUT_RELOC(ring, 498 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */ 499 OUT_RING(ring, 500 CP_COND_WRITE5_7_WRITE_DATA(1 + fd6_ctx->vsc_draw_strm_pitch)); 501 502 OUT_PKT7(ring, CP_COND_WRITE5, 8); 503 OUT_RING(ring, CP_COND_WRITE5_0_FUNCTION(WRITE_GE) | 504 CP_COND_WRITE5_0_WRITE_MEMORY); 505 OUT_RING(ring, CP_COND_WRITE5_1_POLL_ADDR_LO( 506 REG_A6XX_VSC_PRIM_STRM_SIZE_REG(i))); 507 OUT_RING(ring, CP_COND_WRITE5_2_POLL_ADDR_HI(0)); 508 OUT_RING(ring, CP_COND_WRITE5_3_REF(fd6_ctx->vsc_prim_strm_pitch - 64)); 509 OUT_RING(ring, CP_COND_WRITE5_4_MASK(~0)); 510 OUT_RELOC(ring, 511 control_ptr(fd6_ctx, vsc_overflow)); /* WRITE_ADDR_LO/HI */ 512 OUT_RING(ring, 513 CP_COND_WRITE5_7_WRITE_DATA(3 + fd6_ctx->vsc_prim_strm_pitch)); 514 } 515 516 OUT_PKT7(ring, CP_WAIT_MEM_WRITES, 0); 517} 518 519static void 520check_vsc_overflow(struct fd_context *ctx) 521{ 522 struct fd6_context *fd6_ctx = fd6_context(ctx); 523 struct fd6_control *control = fd_bo_map(fd6_ctx->control_mem); 524 uint32_t vsc_overflow = control->vsc_overflow; 525 526 if (!vsc_overflow) 527 return; 528 529 /* clear overflow flag: */ 530 control->vsc_overflow = 0; 531 532 unsigned buffer = vsc_overflow & 0x3; 533 unsigned size = vsc_overflow & ~0x3; 534 535 if (buffer == 0x1) { 536 /* VSC_DRAW_STRM overflow: */ 537 538 if (size < fd6_ctx->vsc_draw_strm_pitch) { 539 /* we've already increased the size, this overflow is 540 * from a batch submitted before resize, but executed 541 * after 542 */ 543 return; 544 } 545 546 fd_bo_del(fd6_ctx->vsc_draw_strm); 547 fd6_ctx->vsc_draw_strm = NULL; 548 fd6_ctx->vsc_draw_strm_pitch *= 2; 549 550 mesa_logd("resized VSC_DRAW_STRM_PITCH to: 0x%x", 551 fd6_ctx->vsc_draw_strm_pitch); 552 553 } else if (buffer == 0x3) { 554 /* VSC_PRIM_STRM overflow: */ 555 556 if (size < fd6_ctx->vsc_prim_strm_pitch) { 557 /* we've already increased the size */ 558 return; 559 } 560 561 fd_bo_del(fd6_ctx->vsc_prim_strm); 562 fd6_ctx->vsc_prim_strm = NULL; 563 fd6_ctx->vsc_prim_strm_pitch *= 2; 564 565 mesa_logd("resized VSC_PRIM_STRM_PITCH to: 0x%x", 566 fd6_ctx->vsc_prim_strm_pitch); 567 568 } else { 569 /* NOTE: it's possible, for example, for overflow to corrupt the 570 * control page. I mostly just see this hit if I set initial VSC 571 * buffer size extremely small. Things still seem to recover, 572 * but maybe we should pre-emptively realloc vsc_data/vsc_data2 573 * and hope for different memory placement? 574 */ 575 mesa_loge("invalid vsc_overflow value: 0x%08x", vsc_overflow); 576 } 577} 578 579static void 580emit_common_init(struct fd_batch *batch) 581{ 582 struct fd_ringbuffer *ring = batch->gmem; 583 struct fd_autotune *at = &batch->ctx->autotune; 584 struct fd_batch_result *result = batch->autotune_result; 585 586 if (!result) 587 return; 588 589 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1); 590 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY); 591 592 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2); 593 OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_start)); 594 595 fd6_event_write(batch, ring, ZPASS_DONE, false); 596} 597 598static void 599emit_common_fini(struct fd_batch *batch) 600{ 601 struct fd_ringbuffer *ring = batch->gmem; 602 struct fd_autotune *at = &batch->ctx->autotune; 603 struct fd_batch_result *result = batch->autotune_result; 604 605 if (!result) 606 return; 607 608 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_CONTROL, 1); 609 OUT_RING(ring, A6XX_RB_SAMPLE_COUNT_CONTROL_COPY); 610 611 OUT_PKT4(ring, REG_A6XX_RB_SAMPLE_COUNT_ADDR, 2); 612 OUT_RELOC(ring, results_ptr(at, result[result->idx].samples_end)); 613 614 fd6_event_write(batch, ring, ZPASS_DONE, false); 615 616 // TODO is there a better event to use.. a single ZPASS_DONE_TS would be nice 617 OUT_PKT7(ring, CP_EVENT_WRITE, 4); 618 OUT_RING(ring, CP_EVENT_WRITE_0_EVENT(CACHE_FLUSH_TS)); 619 OUT_RELOC(ring, results_ptr(at, fence)); 620 OUT_RING(ring, result->fence); 621} 622 623/* 624 * Emit conditional CP_INDIRECT_BRANCH based on VSC_STATE[p], ie. the IB 625 * is skipped for tiles that have no visible geometry. 626 */ 627static void 628emit_conditional_ib(struct fd_batch *batch, const struct fd_tile *tile, 629 struct fd_ringbuffer *target) 630{ 631 struct fd_ringbuffer *ring = batch->gmem; 632 633 if (target->cur == target->start) 634 return; 635 636 emit_marker6(ring, 6); 637 638 unsigned count = fd_ringbuffer_cmd_count(target); 639 640 BEGIN_RING(ring, 5 + 4 * count); /* ensure conditional doesn't get split */ 641 642 OUT_PKT7(ring, CP_REG_TEST, 1); 643 OUT_RING(ring, A6XX_CP_REG_TEST_0_REG(REG_A6XX_VSC_STATE_REG(tile->p)) | 644 A6XX_CP_REG_TEST_0_BIT(tile->n) | 645 A6XX_CP_REG_TEST_0_WAIT_FOR_ME); 646 647 OUT_PKT7(ring, CP_COND_REG_EXEC, 2); 648 OUT_RING(ring, CP_COND_REG_EXEC_0_MODE(PRED_TEST)); 649 OUT_RING(ring, CP_COND_REG_EXEC_1_DWORDS(4 * count)); 650 651 for (unsigned i = 0; i < count; i++) { 652 uint32_t dwords; 653 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 654 dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; 655 assert(dwords > 0); 656 OUT_RING(ring, dwords); 657 } 658 659 emit_marker6(ring, 6); 660} 661 662static void 663set_scissor(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1, uint32_t x2, 664 uint32_t y2) 665{ 666 OUT_REG(ring, A6XX_GRAS_SC_WINDOW_SCISSOR_TL(.x = x1, .y = y1), 667 A6XX_GRAS_SC_WINDOW_SCISSOR_BR(.x = x2, .y = y2)); 668 669 OUT_REG(ring, A6XX_GRAS_2D_RESOLVE_CNTL_1(.x = x1, .y = y1), 670 A6XX_GRAS_2D_RESOLVE_CNTL_2(.x = x2, .y = y2)); 671} 672 673static void 674set_bin_size(struct fd_ringbuffer *ring, uint32_t w, uint32_t h, uint32_t flag) 675{ 676 OUT_REG(ring, A6XX_GRAS_BIN_CONTROL(.binw = w, .binh = h, .dword = flag)); 677 OUT_REG(ring, A6XX_RB_BIN_CONTROL(.binw = w, .binh = h, .dword = flag)); 678 /* no flag for RB_BIN_CONTROL2... */ 679 OUT_REG(ring, A6XX_RB_BIN_CONTROL2(.binw = w, .binh = h)); 680} 681 682static void 683emit_binning_pass(struct fd_batch *batch) assert_dt 684{ 685 struct fd_ringbuffer *ring = batch->gmem; 686 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 687 struct fd_screen *screen = batch->ctx->screen; 688 689 assert(!batch->tessellation); 690 691 set_scissor(ring, 0, 0, gmem->width - 1, gmem->height - 1); 692 693 emit_marker6(ring, 7); 694 OUT_PKT7(ring, CP_SET_MARKER, 1); 695 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BINNING)); 696 emit_marker6(ring, 7); 697 698 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 699 OUT_RING(ring, 0x1); 700 701 OUT_PKT7(ring, CP_SET_MODE, 1); 702 OUT_RING(ring, 0x1); 703 704 OUT_WFI5(ring); 705 706 OUT_REG(ring, A6XX_VFD_MODE_CNTL(.render_mode = BINNING_PASS)); 707 708 update_vsc_pipe(batch); 709 710 OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1); 711 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL); 712 713 OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1); 714 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL); 715 716 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 717 OUT_RING(ring, UNK_2C); 718 719 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1); 720 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(0) | A6XX_RB_WINDOW_OFFSET_Y(0)); 721 722 OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1); 723 OUT_RING(ring, 724 A6XX_SP_TP_WINDOW_OFFSET_X(0) | A6XX_SP_TP_WINDOW_OFFSET_Y(0)); 725 726 /* emit IB to binning drawcmds: */ 727 trace_start_binning_ib(&batch->trace, ring); 728 fd6_emit_ib(ring, batch->draw); 729 trace_end_binning_ib(&batch->trace, ring); 730 731 fd_reset_wfi(batch); 732 733 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); 734 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | 735 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | 736 CP_SET_DRAW_STATE__0_GROUP_ID(0)); 737 OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); 738 OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); 739 740 OUT_PKT7(ring, CP_EVENT_WRITE, 1); 741 OUT_RING(ring, UNK_2D); 742 743 fd6_cache_inv(batch, ring); 744 fd6_cache_flush(batch, ring); 745 fd_wfi(batch, ring); 746 747 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); 748 749 trace_start_vsc_overflow_test(&batch->trace, batch->gmem); 750 emit_vsc_overflow_test(batch); 751 trace_end_vsc_overflow_test(&batch->trace, batch->gmem); 752 753 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 754 OUT_RING(ring, 0x0); 755 756 OUT_PKT7(ring, CP_SET_MODE, 1); 757 OUT_RING(ring, 0x0); 758 759 OUT_WFI5(ring); 760 761 OUT_REG(ring, 762 A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem, 763 .gmem = true, 764 .concurrent_resolve = screen->info->a6xx.concurrent_resolve)); 765} 766 767static void 768emit_msaa(struct fd_ringbuffer *ring, unsigned nr) 769{ 770 enum a3xx_msaa_samples samples = fd_msaa_samples(nr); 771 772 OUT_PKT4(ring, REG_A6XX_SP_TP_RAS_MSAA_CNTL, 2); 773 OUT_RING(ring, A6XX_SP_TP_RAS_MSAA_CNTL_SAMPLES(samples)); 774 OUT_RING(ring, A6XX_SP_TP_DEST_MSAA_CNTL_SAMPLES(samples) | 775 COND(samples == MSAA_ONE, 776 A6XX_SP_TP_DEST_MSAA_CNTL_MSAA_DISABLE)); 777 778 OUT_PKT4(ring, REG_A6XX_GRAS_RAS_MSAA_CNTL, 2); 779 OUT_RING(ring, A6XX_GRAS_RAS_MSAA_CNTL_SAMPLES(samples)); 780 OUT_RING(ring, A6XX_GRAS_DEST_MSAA_CNTL_SAMPLES(samples) | 781 COND(samples == MSAA_ONE, 782 A6XX_GRAS_DEST_MSAA_CNTL_MSAA_DISABLE)); 783 784 OUT_PKT4(ring, REG_A6XX_RB_RAS_MSAA_CNTL, 2); 785 OUT_RING(ring, A6XX_RB_RAS_MSAA_CNTL_SAMPLES(samples)); 786 OUT_RING(ring, 787 A6XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) | 788 COND(samples == MSAA_ONE, A6XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE)); 789 790 OUT_PKT4(ring, REG_A6XX_RB_MSAA_CNTL, 1); 791 OUT_RING(ring, A6XX_RB_MSAA_CNTL_SAMPLES(samples)); 792} 793 794static void prepare_tile_setup_ib(struct fd_batch *batch); 795static void prepare_tile_fini_ib(struct fd_batch *batch); 796 797/* before first tile */ 798static void 799fd6_emit_tile_init(struct fd_batch *batch) assert_dt 800{ 801 struct fd_ringbuffer *ring = batch->gmem; 802 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 803 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 804 struct fd_screen *screen = batch->ctx->screen; 805 806 fd6_emit_restore(batch, ring); 807 808 fd6_emit_lrz_flush(ring); 809 810 if (batch->prologue) { 811 trace_start_prologue(&batch->trace, ring); 812 fd6_emit_ib(ring, batch->prologue); 813 trace_end_prologue(&batch->trace, ring); 814 } 815 816 fd6_cache_inv(batch, ring); 817 818 prepare_tile_setup_ib(batch); 819 prepare_tile_fini_ib(batch); 820 821 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 822 OUT_RING(ring, 0x0); 823 824 /* blob controls "local" in IB2, but I think that is not required */ 825 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1); 826 OUT_RING(ring, 0x1); 827 828 fd_wfi(batch, ring); 829 OUT_REG(ring, 830 A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_gmem, 831 .gmem = true, 832 .concurrent_resolve = screen->info->a6xx.concurrent_resolve)); 833 834 emit_zs(ring, pfb->zsbuf, batch->gmem_state); 835 emit_mrt(ring, pfb, batch->gmem_state); 836 emit_msaa(ring, pfb->samples); 837 patch_fb_read_gmem(batch); 838 839 if (use_hw_binning(batch)) { 840 /* enable stream-out during binning pass: */ 841 OUT_REG(ring, A6XX_VPC_SO_DISABLE(false)); 842 843 set_bin_size(ring, gmem->bin_w, gmem->bin_h, 844 A6XX_RB_BIN_CONTROL_RENDER_MODE(BINNING_PASS) | 845 A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6)); 846 update_render_cntl(batch, pfb, true); 847 emit_binning_pass(batch); 848 849 /* and disable stream-out for draw pass: */ 850 OUT_REG(ring, A6XX_VPC_SO_DISABLE(true)); 851 852 /* 853 * NOTE: even if we detect VSC overflow and disable use of 854 * visibility stream in draw pass, it is still safe to execute 855 * the reset of these cmds: 856 */ 857 858 // NOTE a618 not setting .FORCE_LRZ_WRITE_DIS .. 859 set_bin_size(ring, gmem->bin_w, gmem->bin_h, 860 A6XX_RB_BIN_CONTROL_FORCE_LRZ_WRITE_DIS | 861 A6XX_RB_BIN_CONTROL_LRZ_FEEDBACK_ZMODE_MASK(0x6)); 862 863 OUT_PKT4(ring, REG_A6XX_VFD_MODE_CNTL, 1); 864 OUT_RING(ring, 0x0); 865 866 OUT_PKT4(ring, REG_A6XX_PC_POWER_CNTL, 1); 867 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL); 868 869 OUT_PKT4(ring, REG_A6XX_VFD_POWER_CNTL, 1); 870 OUT_RING(ring, screen->info->a6xx.magic.PC_POWER_CNTL); 871 872 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 873 OUT_RING(ring, 0x1); 874 } else { 875 /* no binning pass, so enable stream-out for draw pass:: */ 876 OUT_REG(ring, A6XX_VPC_SO_DISABLE(false)); 877 878 set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); 879 } 880 881 update_render_cntl(batch, pfb, false); 882 883 emit_common_init(batch); 884} 885 886static void 887set_window_offset(struct fd_ringbuffer *ring, uint32_t x1, uint32_t y1) 888{ 889 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET, 1); 890 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET_X(x1) | A6XX_RB_WINDOW_OFFSET_Y(y1)); 891 892 OUT_PKT4(ring, REG_A6XX_RB_WINDOW_OFFSET2, 1); 893 OUT_RING(ring, A6XX_RB_WINDOW_OFFSET2_X(x1) | A6XX_RB_WINDOW_OFFSET2_Y(y1)); 894 895 OUT_PKT4(ring, REG_A6XX_SP_WINDOW_OFFSET, 1); 896 OUT_RING(ring, A6XX_SP_WINDOW_OFFSET_X(x1) | A6XX_SP_WINDOW_OFFSET_Y(y1)); 897 898 OUT_PKT4(ring, REG_A6XX_SP_TP_WINDOW_OFFSET, 1); 899 OUT_RING(ring, 900 A6XX_SP_TP_WINDOW_OFFSET_X(x1) | A6XX_SP_TP_WINDOW_OFFSET_Y(y1)); 901} 902 903/* before mem2gmem */ 904static void 905fd6_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) 906{ 907 struct fd_context *ctx = batch->ctx; 908 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 909 struct fd6_context *fd6_ctx = fd6_context(ctx); 910 struct fd_ringbuffer *ring = batch->gmem; 911 912 emit_marker6(ring, 7); 913 OUT_PKT7(ring, CP_SET_MARKER, 1); 914 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_GMEM)); 915 emit_marker6(ring, 7); 916 917 uint32_t x1 = tile->xoff; 918 uint32_t y1 = tile->yoff; 919 uint32_t x2 = tile->xoff + tile->bin_w - 1; 920 uint32_t y2 = tile->yoff + tile->bin_h - 1; 921 922 set_scissor(ring, x1, y1, x2, y2); 923 924 if (use_hw_binning(batch)) { 925 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p]; 926 927 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); 928 929 OUT_PKT7(ring, CP_SET_MODE, 1); 930 OUT_RING(ring, 0x0); 931 932 OUT_PKT7(ring, CP_SET_BIN_DATA5, 7); 933 OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) | 934 CP_SET_BIN_DATA5_0_VSC_N(tile->n)); 935 OUT_RELOC(ring, fd6_ctx->vsc_draw_strm, /* per-pipe draw-stream address */ 936 (tile->p * fd6_ctx->vsc_draw_strm_pitch), 0, 0); 937 OUT_RELOC(ring, 938 fd6_ctx->vsc_draw_strm, /* VSC_DRAW_STRM_ADDRESS + (p * 4) */ 939 (tile->p * 4) + (32 * fd6_ctx->vsc_draw_strm_pitch), 0, 0); 940 OUT_RELOC(ring, fd6_ctx->vsc_prim_strm, 941 (tile->p * fd6_ctx->vsc_prim_strm_pitch), 0, 0); 942 943 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 944 OUT_RING(ring, 0x0); 945 946 set_window_offset(ring, x1, y1); 947 948 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 949 set_bin_size(ring, gmem->bin_w, gmem->bin_h, 0x6000000); 950 951 OUT_PKT7(ring, CP_SET_MODE, 1); 952 OUT_RING(ring, 0x0); 953 } else { 954 set_window_offset(ring, x1, y1); 955 956 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 957 OUT_RING(ring, 0x1); 958 959 OUT_PKT7(ring, CP_SET_MODE, 1); 960 OUT_RING(ring, 0x0); 961 } 962} 963 964static void 965set_blit_scissor(struct fd_batch *batch, struct fd_ringbuffer *ring) 966{ 967 struct pipe_scissor_state blit_scissor = batch->max_scissor; 968 969 blit_scissor.minx = ROUND_DOWN_TO(blit_scissor.minx, 16); 970 blit_scissor.miny = ROUND_DOWN_TO(blit_scissor.miny, 4); 971 blit_scissor.maxx = ALIGN(blit_scissor.maxx, 16); 972 blit_scissor.maxy = ALIGN(blit_scissor.maxy, 4); 973 974 OUT_PKT4(ring, REG_A6XX_RB_BLIT_SCISSOR_TL, 2); 975 OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_TL_X(blit_scissor.minx) | 976 A6XX_RB_BLIT_SCISSOR_TL_Y(blit_scissor.miny)); 977 OUT_RING(ring, A6XX_RB_BLIT_SCISSOR_BR_X(blit_scissor.maxx - 1) | 978 A6XX_RB_BLIT_SCISSOR_BR_Y(blit_scissor.maxy - 1)); 979} 980 981static void 982emit_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, uint32_t base, 983 struct pipe_surface *psurf, bool stencil) 984{ 985 struct fd_resource *rsc = fd_resource(psurf->texture); 986 enum pipe_format pfmt = psurf->format; 987 uint32_t offset; 988 bool ubwc_enabled; 989 990 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 991 992 /* separate stencil case: */ 993 if (stencil) { 994 rsc = rsc->stencil; 995 pfmt = rsc->b.b.format; 996 } 997 998 offset = 999 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); 1000 ubwc_enabled = fd_resource_ubwc_enabled(rsc, psurf->u.tex.level); 1001 1002 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 1003 1004 uint32_t tile_mode = fd_resource_tile_mode(&rsc->b.b, psurf->u.tex.level); 1005 enum a6xx_format format = fd6_color_format(pfmt, tile_mode); 1006 uint32_t stride = fd_resource_pitch(rsc, psurf->u.tex.level); 1007 uint32_t array_stride = fd_resource_layer_stride(rsc, psurf->u.tex.level); 1008 enum a3xx_color_swap swap = fd6_color_swap(pfmt, rsc->layout.tile_mode); 1009 enum a3xx_msaa_samples samples = fd_msaa_samples(rsc->b.b.nr_samples); 1010 1011 OUT_REG(ring, 1012 A6XX_RB_BLIT_DST_INFO(.tile_mode = tile_mode, .samples = samples, 1013 .color_format = format, .color_swap = swap, 1014 .flags = ubwc_enabled), 1015 A6XX_RB_BLIT_DST(.bo = rsc->bo, .bo_offset = offset), 1016 A6XX_RB_BLIT_DST_PITCH(.a6xx_rb_blit_dst_pitch = stride), 1017 A6XX_RB_BLIT_DST_ARRAY_PITCH(.a6xx_rb_blit_dst_array_pitch = array_stride)); 1018 1019 OUT_REG(ring, A6XX_RB_BLIT_BASE_GMEM(.dword = base)); 1020 1021 if (ubwc_enabled) { 1022 OUT_PKT4(ring, REG_A6XX_RB_BLIT_FLAG_DST, 3); 1023 fd6_emit_flag_reference(ring, rsc, psurf->u.tex.level, 1024 psurf->u.tex.first_layer); 1025 } 1026 1027 fd6_emit_blit(batch, ring); 1028} 1029 1030static void 1031emit_restore_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, 1032 uint32_t base, struct pipe_surface *psurf, unsigned buffer) 1033{ 1034 bool stencil = (buffer == FD_BUFFER_STENCIL); 1035 1036 OUT_REG(ring, A6XX_RB_BLIT_INFO(.gmem = true, .unk0 = true, 1037 .depth = (buffer == FD_BUFFER_DEPTH), 1038 .sample_0 = util_format_is_pure_integer( 1039 psurf->format))); 1040 1041 emit_blit(batch, ring, base, psurf, stencil); 1042} 1043 1044static void 1045emit_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) 1046{ 1047 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 1048 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 1049 enum a3xx_msaa_samples samples = fd_msaa_samples(pfb->samples); 1050 1051 uint32_t buffers = batch->fast_cleared; 1052 1053 if (buffers & PIPE_CLEAR_COLOR) { 1054 1055 for (int i = 0; i < pfb->nr_cbufs; i++) { 1056 union pipe_color_union *color = &batch->clear_color[i]; 1057 union util_color uc = {0}; 1058 1059 if (!pfb->cbufs[i]) 1060 continue; 1061 1062 if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) 1063 continue; 1064 1065 enum pipe_format pfmt = pfb->cbufs[i]->format; 1066 1067 // XXX I think RB_CLEAR_COLOR_DWn wants to take into account SWAP?? 1068 union pipe_color_union swapped; 1069 switch (fd6_color_swap(pfmt, TILE6_LINEAR)) { 1070 case WZYX: 1071 swapped.ui[0] = color->ui[0]; 1072 swapped.ui[1] = color->ui[1]; 1073 swapped.ui[2] = color->ui[2]; 1074 swapped.ui[3] = color->ui[3]; 1075 break; 1076 case WXYZ: 1077 swapped.ui[2] = color->ui[0]; 1078 swapped.ui[1] = color->ui[1]; 1079 swapped.ui[0] = color->ui[2]; 1080 swapped.ui[3] = color->ui[3]; 1081 break; 1082 case ZYXW: 1083 swapped.ui[3] = color->ui[0]; 1084 swapped.ui[0] = color->ui[1]; 1085 swapped.ui[1] = color->ui[2]; 1086 swapped.ui[2] = color->ui[3]; 1087 break; 1088 case XYZW: 1089 swapped.ui[3] = color->ui[0]; 1090 swapped.ui[2] = color->ui[1]; 1091 swapped.ui[1] = color->ui[2]; 1092 swapped.ui[0] = color->ui[3]; 1093 break; 1094 } 1095 1096 util_pack_color_union(pfmt, &uc, &swapped); 1097 1098 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); 1099 OUT_RING(ring, 1100 A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | 1101 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | 1102 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR))); 1103 1104 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); 1105 OUT_RING(ring, 1106 A6XX_RB_BLIT_INFO_GMEM | A6XX_RB_BLIT_INFO_CLEAR_MASK(0xf)); 1107 1108 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); 1109 OUT_RING(ring, gmem->cbuf_base[i]); 1110 1111 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); 1112 OUT_RING(ring, 0); 1113 1114 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4); 1115 OUT_RING(ring, uc.ui[0]); 1116 OUT_RING(ring, uc.ui[1]); 1117 OUT_RING(ring, uc.ui[2]); 1118 OUT_RING(ring, uc.ui[3]); 1119 1120 fd6_emit_blit(batch, ring); 1121 } 1122 } 1123 1124 const bool has_depth = pfb->zsbuf; 1125 const bool has_separate_stencil = 1126 has_depth && fd_resource(pfb->zsbuf->texture)->stencil; 1127 1128 /* First clear depth or combined depth/stencil. */ 1129 if ((has_depth && (buffers & PIPE_CLEAR_DEPTH)) || 1130 (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) { 1131 enum pipe_format pfmt = pfb->zsbuf->format; 1132 uint32_t clear_value; 1133 uint32_t mask = 0; 1134 1135 if (has_separate_stencil) { 1136 pfmt = util_format_get_depth_only(pfb->zsbuf->format); 1137 clear_value = util_pack_z(pfmt, batch->clear_depth); 1138 } else { 1139 pfmt = pfb->zsbuf->format; 1140 clear_value = 1141 util_pack_z_stencil(pfmt, batch->clear_depth, batch->clear_stencil); 1142 } 1143 1144 if (buffers & PIPE_CLEAR_DEPTH) 1145 mask |= 0x1; 1146 1147 if (!has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) 1148 mask |= 0x2; 1149 1150 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); 1151 OUT_RING(ring, 1152 A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | 1153 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | 1154 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(fd6_color_format(pfmt, TILE6_LINEAR))); 1155 1156 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); 1157 OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | 1158 // XXX UNK0 for separate stencil ?? 1159 A6XX_RB_BLIT_INFO_DEPTH | 1160 A6XX_RB_BLIT_INFO_CLEAR_MASK(mask)); 1161 1162 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); 1163 OUT_RING(ring, gmem->zsbuf_base[0]); 1164 1165 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); 1166 OUT_RING(ring, 0); 1167 1168 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1); 1169 OUT_RING(ring, clear_value); 1170 1171 fd6_emit_blit(batch, ring); 1172 } 1173 1174 /* Then clear the separate stencil buffer in case of 32 bit depth 1175 * formats with separate stencil. */ 1176 if (has_separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) { 1177 OUT_PKT4(ring, REG_A6XX_RB_BLIT_DST_INFO, 1); 1178 OUT_RING(ring, A6XX_RB_BLIT_DST_INFO_TILE_MODE(TILE6_LINEAR) | 1179 A6XX_RB_BLIT_DST_INFO_SAMPLES(samples) | 1180 A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(FMT6_8_UINT)); 1181 1182 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); 1183 OUT_RING(ring, A6XX_RB_BLIT_INFO_GMEM | 1184 // A6XX_RB_BLIT_INFO_UNK0 | 1185 A6XX_RB_BLIT_INFO_DEPTH | 1186 A6XX_RB_BLIT_INFO_CLEAR_MASK(0x1)); 1187 1188 OUT_PKT4(ring, REG_A6XX_RB_BLIT_BASE_GMEM, 1); 1189 OUT_RING(ring, gmem->zsbuf_base[1]); 1190 1191 OUT_PKT4(ring, REG_A6XX_RB_UNKNOWN_88D0, 1); 1192 OUT_RING(ring, 0); 1193 1194 OUT_PKT4(ring, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 1); 1195 OUT_RING(ring, batch->clear_stencil & 0xff); 1196 1197 fd6_emit_blit(batch, ring); 1198 } 1199} 1200 1201/* 1202 * transfer from system memory to gmem 1203 */ 1204static void 1205emit_restore_blits(struct fd_batch *batch, struct fd_ringbuffer *ring) 1206{ 1207 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 1208 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 1209 1210 if (batch->restore & FD_BUFFER_COLOR) { 1211 unsigned i; 1212 for (i = 0; i < pfb->nr_cbufs; i++) { 1213 if (!pfb->cbufs[i]) 1214 continue; 1215 if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i))) 1216 continue; 1217 emit_restore_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i], 1218 FD_BUFFER_COLOR); 1219 } 1220 } 1221 1222 if (batch->restore & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { 1223 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 1224 1225 if (!rsc->stencil || (batch->restore & FD_BUFFER_DEPTH)) { 1226 emit_restore_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf, 1227 FD_BUFFER_DEPTH); 1228 } 1229 if (rsc->stencil && (batch->restore & FD_BUFFER_STENCIL)) { 1230 emit_restore_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf, 1231 FD_BUFFER_STENCIL); 1232 } 1233 } 1234} 1235 1236static void 1237prepare_tile_setup_ib(struct fd_batch *batch) 1238{ 1239 if (!(batch->restore || batch->fast_cleared)) 1240 return; 1241 1242 batch->tile_setup = 1243 fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); 1244 1245 set_blit_scissor(batch, batch->tile_setup); 1246 1247 emit_restore_blits(batch, batch->tile_setup); 1248 emit_clears(batch, batch->tile_setup); 1249} 1250 1251/* 1252 * transfer from system memory to gmem 1253 */ 1254static void 1255fd6_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile) 1256{ 1257} 1258 1259/* before IB to rendering cmds: */ 1260static void 1261fd6_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile) 1262{ 1263 if (!batch->tile_setup) 1264 return; 1265 1266 trace_start_clear_restore(&batch->trace, batch->gmem, batch->fast_cleared); 1267 if (batch->fast_cleared || !use_hw_binning(batch)) { 1268 fd6_emit_ib(batch->gmem, batch->tile_setup); 1269 } else { 1270 emit_conditional_ib(batch, tile, batch->tile_setup); 1271 } 1272 trace_end_clear_restore(&batch->trace, batch->gmem); 1273} 1274 1275static bool 1276blit_can_resolve(enum pipe_format format) 1277{ 1278 const struct util_format_description *desc = util_format_description(format); 1279 1280 /* blit event can only do resolve for simple cases: 1281 * averaging samples as unsigned integers or choosing only one sample 1282 */ 1283 if (util_format_is_snorm(format) || util_format_is_srgb(format)) 1284 return false; 1285 1286 /* can't do formats with larger channel sizes 1287 * note: this includes all float formats 1288 * note2: single channel integer formats seem OK 1289 */ 1290 if (desc->channel[0].size > 10) 1291 return false; 1292 1293 switch (format) { 1294 /* for unknown reasons blit event can't msaa resolve these formats when tiled 1295 * likely related to these formats having different layout from other cpp=2 1296 * formats 1297 */ 1298 case PIPE_FORMAT_R8G8_UNORM: 1299 case PIPE_FORMAT_R8G8_UINT: 1300 case PIPE_FORMAT_R8G8_SINT: 1301 /* TODO: this one should be able to work? */ 1302 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 1303 return false; 1304 default: 1305 break; 1306 } 1307 1308 return true; 1309} 1310 1311static bool 1312needs_resolve(struct pipe_surface *psurf) 1313{ 1314 return psurf->nr_samples && 1315 (psurf->nr_samples != psurf->texture->nr_samples); 1316} 1317 1318/** 1319 * Returns the UNKNOWN_8C01 value for handling partial depth/stencil 1320 * clear/stores to Z24S8. 1321 */ 1322static uint32_t 1323fd6_unknown_8c01(enum pipe_format format, unsigned buffers) 1324{ 1325 if (format == PIPE_FORMAT_Z24_UNORM_S8_UINT) { 1326 if (buffers == FD_BUFFER_DEPTH) 1327 return 0x08000041; 1328 else if (buffers == FD_BUFFER_STENCIL) 1329 return 0x00084001; 1330 } 1331 return 0; 1332} 1333 1334static void 1335emit_resolve_blit(struct fd_batch *batch, struct fd_ringbuffer *ring, 1336 uint32_t base, struct pipe_surface *psurf, 1337 unsigned buffer) assert_dt 1338{ 1339 uint32_t info = 0; 1340 bool stencil = false; 1341 1342 if (!fd_resource(psurf->texture)->valid) 1343 return; 1344 1345 /* if we need to resolve, but cannot with BLIT event, we instead need 1346 * to generate per-tile CP_BLIT (r2d) commands: 1347 * 1348 * The separate-stencil is a special case, we might need to use CP_BLIT 1349 * for depth, but we can still resolve stencil with a BLIT event 1350 */ 1351 if (needs_resolve(psurf) && !blit_can_resolve(psurf->format) && 1352 (buffer != FD_BUFFER_STENCIL)) { 1353 /* We could potentially use fd6_unknown_8c01() to handle partial z/s 1354 * resolve to packed z/s, but we would need a corresponding ability in the 1355 * !resolve case below, so batch_draw_tracking_for_dirty_bits() has us 1356 * just do a restore of the other channel for partial packed z/s writes. 1357 */ 1358 fd6_resolve_tile(batch, ring, base, psurf, 0); 1359 return; 1360 } 1361 1362 switch (buffer) { 1363 case FD_BUFFER_COLOR: 1364 break; 1365 case FD_BUFFER_STENCIL: 1366 info |= A6XX_RB_BLIT_INFO_UNK0; 1367 stencil = true; 1368 break; 1369 case FD_BUFFER_DEPTH: 1370 info |= A6XX_RB_BLIT_INFO_DEPTH; 1371 break; 1372 } 1373 1374 if (util_format_is_pure_integer(psurf->format) || 1375 util_format_is_depth_or_stencil(psurf->format)) 1376 info |= A6XX_RB_BLIT_INFO_SAMPLE_0; 1377 1378 OUT_PKT4(ring, REG_A6XX_RB_BLIT_INFO, 1); 1379 OUT_RING(ring, info); 1380 1381 emit_blit(batch, ring, base, psurf, stencil); 1382} 1383 1384/* 1385 * transfer from gmem to system memory (ie. normal RAM) 1386 */ 1387 1388static void 1389prepare_tile_fini_ib(struct fd_batch *batch) assert_dt 1390{ 1391 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 1392 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 1393 struct fd_ringbuffer *ring; 1394 1395 batch->tile_fini = 1396 fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); 1397 ring = batch->tile_fini; 1398 1399 set_blit_scissor(batch, ring); 1400 1401 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { 1402 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 1403 1404 if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) { 1405 emit_resolve_blit(batch, ring, gmem->zsbuf_base[0], pfb->zsbuf, 1406 FD_BUFFER_DEPTH); 1407 } 1408 if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) { 1409 emit_resolve_blit(batch, ring, gmem->zsbuf_base[1], pfb->zsbuf, 1410 FD_BUFFER_STENCIL); 1411 } 1412 } 1413 1414 if (batch->resolve & FD_BUFFER_COLOR) { 1415 unsigned i; 1416 for (i = 0; i < pfb->nr_cbufs; i++) { 1417 if (!pfb->cbufs[i]) 1418 continue; 1419 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) 1420 continue; 1421 emit_resolve_blit(batch, ring, gmem->cbuf_base[i], pfb->cbufs[i], 1422 FD_BUFFER_COLOR); 1423 } 1424 } 1425} 1426 1427static void 1428fd6_emit_tile(struct fd_batch *batch, const struct fd_tile *tile) 1429{ 1430 if (!use_hw_binning(batch)) { 1431 fd6_emit_ib(batch->gmem, batch->draw); 1432 } else { 1433 emit_conditional_ib(batch, tile, batch->draw); 1434 } 1435 1436 if (batch->epilogue) 1437 fd6_emit_ib(batch->gmem, batch->epilogue); 1438} 1439 1440static void 1441fd6_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile) 1442{ 1443 struct fd_ringbuffer *ring = batch->gmem; 1444 1445 if (use_hw_binning(batch)) { 1446 OUT_PKT7(ring, CP_SET_MARKER, 1); 1447 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_ENDVIS)); 1448 } 1449 1450 OUT_PKT7(ring, CP_SET_DRAW_STATE, 3); 1451 OUT_RING(ring, CP_SET_DRAW_STATE__0_COUNT(0) | 1452 CP_SET_DRAW_STATE__0_DISABLE_ALL_GROUPS | 1453 CP_SET_DRAW_STATE__0_GROUP_ID(0)); 1454 OUT_RING(ring, CP_SET_DRAW_STATE__1_ADDR_LO(0)); 1455 OUT_RING(ring, CP_SET_DRAW_STATE__2_ADDR_HI(0)); 1456 1457 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1); 1458 OUT_RING(ring, 0x0); 1459 1460 emit_marker6(ring, 7); 1461 OUT_PKT7(ring, CP_SET_MARKER, 1); 1462 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_RESOLVE)); 1463 emit_marker6(ring, 7); 1464 1465 trace_start_resolve(&batch->trace, batch->gmem); 1466 if (batch->fast_cleared || !use_hw_binning(batch)) { 1467 fd6_emit_ib(batch->gmem, batch->tile_fini); 1468 } else { 1469 emit_conditional_ib(batch, tile, batch->tile_fini); 1470 } 1471 trace_end_resolve(&batch->trace, batch->gmem); 1472} 1473 1474static void 1475fd6_emit_tile_fini(struct fd_batch *batch) 1476{ 1477 struct fd_ringbuffer *ring = batch->gmem; 1478 1479 emit_common_fini(batch); 1480 1481 OUT_PKT4(ring, REG_A6XX_GRAS_LRZ_CNTL, 1); 1482 OUT_RING(ring, A6XX_GRAS_LRZ_CNTL_ENABLE); 1483 1484 fd6_emit_lrz_flush(ring); 1485 1486 fd6_event_write(batch, ring, PC_CCU_RESOLVE_TS, true); 1487 1488 if (use_hw_binning(batch)) { 1489 check_vsc_overflow(batch->ctx); 1490 } 1491} 1492 1493static void 1494emit_sysmem_clears(struct fd_batch *batch, struct fd_ringbuffer *ring) assert_dt 1495{ 1496 struct fd_context *ctx = batch->ctx; 1497 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 1498 1499 uint32_t buffers = batch->fast_cleared; 1500 1501 if (!buffers) 1502 return; 1503 1504 trace_start_clear_restore(&batch->trace, ring, buffers); 1505 1506 if (buffers & PIPE_CLEAR_COLOR) { 1507 for (int i = 0; i < pfb->nr_cbufs; i++) { 1508 union pipe_color_union color = batch->clear_color[i]; 1509 1510 if (!pfb->cbufs[i]) 1511 continue; 1512 1513 if (!(buffers & (PIPE_CLEAR_COLOR0 << i))) 1514 continue; 1515 1516 fd6_clear_surface(ctx, ring, pfb->cbufs[i], pfb->width, pfb->height, 1517 &color, 0); 1518 } 1519 } 1520 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { 1521 union pipe_color_union value = {}; 1522 1523 const bool has_depth = pfb->zsbuf; 1524 struct pipe_resource *separate_stencil = 1525 has_depth && fd_resource(pfb->zsbuf->texture)->stencil 1526 ? &fd_resource(pfb->zsbuf->texture)->stencil->b.b 1527 : NULL; 1528 1529 if ((buffers & PIPE_CLEAR_DEPTH) || (!separate_stencil && (buffers & PIPE_CLEAR_STENCIL))) { 1530 value.f[0] = batch->clear_depth; 1531 value.ui[1] = batch->clear_stencil; 1532 fd6_clear_surface(ctx, ring, pfb->zsbuf, pfb->width, pfb->height, 1533 &value, fd6_unknown_8c01(pfb->zsbuf->format, buffers)); 1534 } 1535 1536 if (separate_stencil && (buffers & PIPE_CLEAR_STENCIL)) { 1537 value.ui[0] = batch->clear_stencil; 1538 1539 struct pipe_surface stencil_surf = *pfb->zsbuf; 1540 stencil_surf.format = PIPE_FORMAT_S8_UINT; 1541 stencil_surf.texture = separate_stencil; 1542 1543 fd6_clear_surface(ctx, ring, &stencil_surf, pfb->width, pfb->height, 1544 &value, 0); 1545 } 1546 } 1547 1548 fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true); 1549 fd_wfi(batch, ring); 1550 1551 trace_end_clear_restore(&batch->trace, ring); 1552} 1553 1554static void 1555fd6_emit_sysmem_prep(struct fd_batch *batch) assert_dt 1556{ 1557 struct fd_ringbuffer *ring = batch->gmem; 1558 struct fd_screen *screen = batch->ctx->screen; 1559 1560 fd6_emit_restore(batch, ring); 1561 fd6_emit_lrz_flush(ring); 1562 1563 if (batch->prologue) { 1564 if (!batch->nondraw) { 1565 trace_start_prologue(&batch->trace, ring); 1566 } 1567 fd6_emit_ib(ring, batch->prologue); 1568 if (!batch->nondraw) { 1569 trace_end_prologue(&batch->trace, ring); 1570 } 1571 } 1572 1573 /* remaining setup below here does not apply to blit/compute: */ 1574 if (batch->nondraw) 1575 return; 1576 1577 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 1578 1579 if (pfb->width > 0 && pfb->height > 0) 1580 set_scissor(ring, 0, 0, pfb->width - 1, pfb->height - 1); 1581 else 1582 set_scissor(ring, 0, 0, 0, 0); 1583 1584 set_window_offset(ring, 0, 0); 1585 1586 set_bin_size(ring, 0, 0, 0xc00000); /* 0xc00000 = BYPASS? */ 1587 1588 emit_sysmem_clears(batch, ring); 1589 1590 emit_marker6(ring, 7); 1591 OUT_PKT7(ring, CP_SET_MARKER, 1); 1592 OUT_RING(ring, A6XX_CP_SET_MARKER_0_MODE(RM6_BYPASS)); 1593 emit_marker6(ring, 7); 1594 1595 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 1596 OUT_RING(ring, 0x0); 1597 1598 /* blob controls "local" in IB2, but I think that is not required */ 1599 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_LOCAL, 1); 1600 OUT_RING(ring, 0x1); 1601 1602 fd6_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false); 1603 fd6_cache_inv(batch, ring); 1604 1605 fd_wfi(batch, ring); 1606 OUT_REG(ring, A6XX_RB_CCU_CNTL(.color_offset = screen->ccu_offset_bypass)); 1607 1608 /* enable stream-out, with sysmem there is only one pass: */ 1609 OUT_REG(ring, A6XX_VPC_SO_DISABLE(false)); 1610 1611 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 1612 OUT_RING(ring, 0x1); 1613 1614 emit_zs(ring, pfb->zsbuf, NULL); 1615 emit_mrt(ring, pfb, NULL); 1616 emit_msaa(ring, pfb->samples); 1617 patch_fb_read_sysmem(batch); 1618 1619 update_render_cntl(batch, pfb, false); 1620 1621 emit_common_init(batch); 1622} 1623 1624static void 1625fd6_emit_sysmem_fini(struct fd_batch *batch) assert_dt 1626{ 1627 struct fd_ringbuffer *ring = batch->gmem; 1628 1629 emit_common_fini(batch); 1630 1631 if (batch->epilogue) 1632 fd6_emit_ib(batch->gmem, batch->epilogue); 1633 1634 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 1635 OUT_RING(ring, 0x0); 1636 1637 fd6_emit_lrz_flush(ring); 1638 1639 fd6_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true); 1640 fd6_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true); 1641 fd_wfi(batch, ring); 1642} 1643 1644void 1645fd6_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis 1646{ 1647 struct fd_context *ctx = fd_context(pctx); 1648 1649 ctx->emit_tile_init = fd6_emit_tile_init; 1650 ctx->emit_tile_prep = fd6_emit_tile_prep; 1651 ctx->emit_tile_mem2gmem = fd6_emit_tile_mem2gmem; 1652 ctx->emit_tile_renderprep = fd6_emit_tile_renderprep; 1653 ctx->emit_tile = fd6_emit_tile; 1654 ctx->emit_tile_gmem2mem = fd6_emit_tile_gmem2mem; 1655 ctx->emit_tile_fini = fd6_emit_tile_fini; 1656 ctx->emit_sysmem_prep = fd6_emit_sysmem_prep; 1657 ctx->emit_sysmem_fini = fd6_emit_sysmem_fini; 1658} 1659