1/* 2 * Copyright (C) 2016 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "pipe/p_state.h" 28#include "util/format/u_format.h" 29#include "util/u_inlines.h" 30#include "util/u_memory.h" 31#include "util/u_string.h" 32 33#include "freedreno_draw.h" 34#include "freedreno_resource.h" 35#include "freedreno_state.h" 36 37#include "fd5_context.h" 38#include "fd5_draw.h" 39#include "fd5_emit.h" 40#include "fd5_format.h" 41#include "fd5_gmem.h" 42#include "fd5_program.h" 43#include "fd5_zsa.h" 44 45static void 46emit_mrt(struct fd_ringbuffer *ring, unsigned nr_bufs, 47 struct pipe_surface **bufs, const struct fd_gmem_stateobj *gmem) 48{ 49 enum a5xx_tile_mode tile_mode; 50 unsigned i; 51 52 for (i = 0; i < A5XX_MAX_RENDER_TARGETS; i++) { 53 enum a5xx_color_fmt format = 0; 54 enum a3xx_color_swap swap = WZYX; 55 bool srgb = false, sint = false, uint = false; 56 struct fd_resource *rsc = NULL; 57 uint32_t stride = 0; 58 uint32_t size = 0; 59 uint32_t base = 0; 60 uint32_t offset = 0; 61 62 if (gmem) { 63 tile_mode = TILE5_2; 64 } else { 65 tile_mode = TILE5_LINEAR; 66 } 67 68 if ((i < nr_bufs) && bufs[i]) { 69 struct pipe_surface *psurf = bufs[i]; 70 enum pipe_format pformat = psurf->format; 71 72 rsc = fd_resource(psurf->texture); 73 74 format = fd5_pipe2color(pformat); 75 swap = fd5_pipe2swap(pformat); 76 srgb = util_format_is_srgb(pformat); 77 sint = util_format_is_pure_sint(pformat); 78 uint = util_format_is_pure_uint(pformat); 79 80 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 81 82 offset = fd_resource_offset(rsc, psurf->u.tex.level, 83 psurf->u.tex.first_layer); 84 85 if (gmem) { 86 stride = gmem->bin_w * gmem->cbuf_cpp[i]; 87 size = stride * gmem->bin_h; 88 base = gmem->cbuf_base[i]; 89 } else { 90 stride = fd_resource_pitch(rsc, psurf->u.tex.level); 91 size = fd_resource_layer_stride(rsc, psurf->u.tex.level); 92 93 tile_mode = 94 fd_resource_tile_mode(psurf->texture, psurf->u.tex.level); 95 } 96 } 97 98 OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(i), 5); 99 OUT_RING( 100 ring, 101 A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | 102 A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(tile_mode) | 103 A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(swap) | 104 COND(gmem, 105 0x800) | /* XXX 0x1000 for RECTLIST clear, 0x0 for BLIT.. */ 106 COND(srgb, A5XX_RB_MRT_BUF_INFO_COLOR_SRGB)); 107 OUT_RING(ring, A5XX_RB_MRT_PITCH(stride)); 108 OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(size)); 109 if (gmem || (i >= nr_bufs) || !bufs[i]) { 110 OUT_RING(ring, base); /* RB_MRT[i].BASE_LO */ 111 OUT_RING(ring, 0x00000000); /* RB_MRT[i].BASE_HI */ 112 } else { 113 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* BASE_LO/HI */ 114 } 115 116 OUT_PKT4(ring, REG_A5XX_SP_FS_MRT_REG(i), 1); 117 OUT_RING(ring, A5XX_SP_FS_MRT_REG_COLOR_FORMAT(format) | 118 COND(sint, A5XX_SP_FS_MRT_REG_COLOR_SINT) | 119 COND(uint, A5XX_SP_FS_MRT_REG_COLOR_UINT) | 120 COND(srgb, A5XX_SP_FS_MRT_REG_COLOR_SRGB)); 121 122 /* when we support UBWC, these would be the system memory 123 * addr/pitch/etc: 124 */ 125 OUT_PKT4(ring, REG_A5XX_RB_MRT_FLAG_BUFFER(i), 4); 126 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_LO */ 127 OUT_RING(ring, 0x00000000); /* RB_MRT_FLAG_BUFFER[i].ADDR_HI */ 128 OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_PITCH(0)); 129 OUT_RING(ring, A5XX_RB_MRT_FLAG_BUFFER_ARRAY_PITCH(0)); 130 } 131} 132 133static void 134emit_zs(struct fd_ringbuffer *ring, struct pipe_surface *zsbuf, 135 const struct fd_gmem_stateobj *gmem) 136{ 137 if (zsbuf) { 138 struct fd_resource *rsc = fd_resource(zsbuf->texture); 139 enum a5xx_depth_format fmt = fd5_pipe2depth(zsbuf->format); 140 uint32_t cpp = rsc->layout.cpp; 141 uint32_t stride = 0; 142 uint32_t size = 0; 143 144 if (gmem) { 145 stride = cpp * gmem->bin_w; 146 size = stride * gmem->bin_h; 147 } else { 148 stride = fd_resource_pitch(rsc, zsbuf->u.tex.level); 149 size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level); 150 } 151 152 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5); 153 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); 154 if (gmem) { 155 OUT_RING(ring, gmem->zsbuf_base[0]); /* RB_DEPTH_BUFFER_BASE_LO */ 156 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ 157 } else { 158 OUT_RELOC(ring, rsc->bo, 159 fd_resource_offset(rsc, zsbuf->u.tex.level, zsbuf->u.tex.first_layer), 160 0, 0); /* RB_DEPTH_BUFFER_BASE_LO/HI */ 161 } 162 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_PITCH(stride)); 163 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_ARRAY_PITCH(size)); 164 165 OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); 166 OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(fmt)); 167 168 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); 169 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ 170 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ 171 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */ 172 173 if (rsc->lrz) { 174 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3); 175 OUT_RELOC(ring, rsc->lrz, 0x1000, 0, 0); 176 OUT_RING(ring, A5XX_GRAS_LRZ_BUFFER_PITCH(rsc->lrz_pitch)); 177 178 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2); 179 OUT_RELOC(ring, rsc->lrz, 0, 0, 0); 180 } else { 181 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_BUFFER_BASE_LO, 3); 182 OUT_RING(ring, 0x00000000); 183 OUT_RING(ring, 0x00000000); 184 OUT_RING(ring, 0x00000000); /* GRAS_LRZ_BUFFER_PITCH */ 185 186 OUT_PKT4(ring, REG_A5XX_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO, 2); 187 OUT_RING(ring, 0x00000000); 188 OUT_RING(ring, 0x00000000); 189 } 190 191 if (rsc->stencil) { 192 if (gmem) { 193 stride = 1 * gmem->bin_w; 194 size = stride * gmem->bin_h; 195 } else { 196 stride = fd_resource_pitch(rsc->stencil, zsbuf->u.tex.level); 197 size = fd_resource_layer_stride(rsc, zsbuf->u.tex.level); 198 } 199 200 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 5); 201 OUT_RING(ring, A5XX_RB_STENCIL_INFO_SEPARATE_STENCIL); 202 if (gmem) { 203 OUT_RING(ring, gmem->zsbuf_base[1]); /* RB_STENCIL_BASE_LO */ 204 OUT_RING(ring, 0x00000000); /* RB_STENCIL_BASE_HI */ 205 } else { 206 OUT_RELOC(ring, rsc->stencil->bo, 207 fd_resource_offset(rsc->stencil, zsbuf->u.tex.level, zsbuf->u.tex.first_layer), 208 0, 0); /* RB_STENCIL_BASE_LO/HI */ 209 } 210 OUT_RING(ring, A5XX_RB_STENCIL_PITCH(stride)); 211 OUT_RING(ring, A5XX_RB_STENCIL_ARRAY_PITCH(size)); 212 } else { 213 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1); 214 OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */ 215 } 216 } else { 217 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_BUFFER_INFO, 5); 218 OUT_RING(ring, A5XX_RB_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE)); 219 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_LO */ 220 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_BASE_HI */ 221 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_PITCH */ 222 OUT_RING(ring, 0x00000000); /* RB_DEPTH_BUFFER_ARRAY_PITCH */ 223 224 OUT_PKT4(ring, REG_A5XX_GRAS_SU_DEPTH_BUFFER_INFO, 1); 225 OUT_RING(ring, A5XX_GRAS_SU_DEPTH_BUFFER_INFO_DEPTH_FORMAT(DEPTH5_NONE)); 226 227 OUT_PKT4(ring, REG_A5XX_RB_DEPTH_FLAG_BUFFER_BASE_LO, 3); 228 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_LO */ 229 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_BASE_HI */ 230 OUT_RING(ring, 0x00000000); /* RB_DEPTH_FLAG_BUFFER_PITCH */ 231 232 OUT_PKT4(ring, REG_A5XX_RB_STENCIL_INFO, 1); 233 OUT_RING(ring, 0x00000000); /* RB_STENCIL_INFO */ 234 } 235} 236 237static void 238emit_msaa(struct fd_ringbuffer *ring, uint32_t nr_samples) 239{ 240 enum a3xx_msaa_samples samples = fd_msaa_samples(nr_samples); 241 242 OUT_PKT4(ring, REG_A5XX_TPL1_TP_RAS_MSAA_CNTL, 2); 243 OUT_RING(ring, A5XX_TPL1_TP_RAS_MSAA_CNTL_SAMPLES(samples)); 244 OUT_RING(ring, A5XX_TPL1_TP_DEST_MSAA_CNTL_SAMPLES(samples) | 245 COND(samples == MSAA_ONE, 246 A5XX_TPL1_TP_DEST_MSAA_CNTL_MSAA_DISABLE)); 247 248 OUT_PKT4(ring, REG_A5XX_RB_RAS_MSAA_CNTL, 2); 249 OUT_RING(ring, A5XX_RB_RAS_MSAA_CNTL_SAMPLES(samples)); 250 OUT_RING(ring, 251 A5XX_RB_DEST_MSAA_CNTL_SAMPLES(samples) | 252 COND(samples == MSAA_ONE, A5XX_RB_DEST_MSAA_CNTL_MSAA_DISABLE)); 253 254 OUT_PKT4(ring, REG_A5XX_GRAS_SC_RAS_MSAA_CNTL, 2); 255 OUT_RING(ring, A5XX_GRAS_SC_RAS_MSAA_CNTL_SAMPLES(samples)); 256 OUT_RING(ring, A5XX_GRAS_SC_DEST_MSAA_CNTL_SAMPLES(samples) | 257 COND(samples == MSAA_ONE, 258 A5XX_GRAS_SC_DEST_MSAA_CNTL_MSAA_DISABLE)); 259} 260 261static bool 262use_hw_binning(struct fd_batch *batch) 263{ 264 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 265 266 /* workaround: Like on a3xx, hw binning and scissor optimization 267 * don't play nice together. 268 * 269 * Disable binning if scissor optimization is used. 270 */ 271 if (gmem->minx || gmem->miny) 272 return false; 273 274 if ((gmem->maxpw * gmem->maxph) > 32) 275 return false; 276 277 if ((gmem->maxpw > 15) || (gmem->maxph > 15)) 278 return false; 279 280 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2) && 281 (batch->num_draws > 0); 282} 283 284static void 285patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) 286{ 287 unsigned i; 288 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { 289 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); 290 *patch->cs = patch->val | DRAW4(0, 0, 0, vismode); 291 } 292 util_dynarray_clear(&batch->draw_patches); 293} 294 295static void 296update_vsc_pipe(struct fd_batch *batch) assert_dt 297{ 298 struct fd_context *ctx = batch->ctx; 299 struct fd5_context *fd5_ctx = fd5_context(ctx); 300 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 301 struct fd_ringbuffer *ring = batch->gmem; 302 int i; 303 304 OUT_PKT4(ring, REG_A5XX_VSC_BIN_SIZE, 3); 305 OUT_RING(ring, A5XX_VSC_BIN_SIZE_WIDTH(gmem->bin_w) | 306 A5XX_VSC_BIN_SIZE_HEIGHT(gmem->bin_h)); 307 OUT_RELOC(ring, fd5_ctx->vsc_size_mem, 0, 0, 0); /* VSC_SIZE_ADDRESS_LO/HI */ 308 309 OUT_PKT4(ring, REG_A5XX_UNKNOWN_0BC5, 2); 310 OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC5 */ 311 OUT_RING(ring, 0x00000000); /* UNKNOWN_0BC6 */ 312 313 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_CONFIG_REG(0), 16); 314 for (i = 0; i < 16; i++) { 315 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i]; 316 OUT_RING(ring, A5XX_VSC_PIPE_CONFIG_REG_X(pipe->x) | 317 A5XX_VSC_PIPE_CONFIG_REG_Y(pipe->y) | 318 A5XX_VSC_PIPE_CONFIG_REG_W(pipe->w) | 319 A5XX_VSC_PIPE_CONFIG_REG_H(pipe->h)); 320 } 321 322 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_ADDRESS_LO(0), 32); 323 for (i = 0; i < 16; i++) { 324 if (!ctx->vsc_pipe_bo[i]) { 325 ctx->vsc_pipe_bo[i] = fd_bo_new( 326 ctx->dev, 0x20000, 0, "vsc_pipe[%u]", i); 327 } 328 OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0, 329 0); /* VSC_PIPE_DATA_ADDRESS[i].LO/HI */ 330 } 331 332 OUT_PKT4(ring, REG_A5XX_VSC_PIPE_DATA_LENGTH_REG(0), 16); 333 for (i = 0; i < 16; i++) { 334 OUT_RING(ring, fd_bo_size(ctx->vsc_pipe_bo[i]) - 335 32); /* VSC_PIPE_DATA_LENGTH[i] */ 336 } 337} 338 339static void 340emit_binning_pass(struct fd_batch *batch) assert_dt 341{ 342 struct fd_ringbuffer *ring = batch->gmem; 343 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 344 345 uint32_t x1 = gmem->minx; 346 uint32_t y1 = gmem->miny; 347 uint32_t x2 = gmem->minx + gmem->width - 1; 348 uint32_t y2 = gmem->miny + gmem->height - 1; 349 350 fd5_set_render_mode(batch->ctx, ring, BINNING); 351 352 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); 353 OUT_RING(ring, 354 A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h)); 355 356 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 357 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | 358 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); 359 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | 360 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); 361 362 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); 363 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1)); 364 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2)); 365 366 update_vsc_pipe(batch); 367 368 OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1); 369 OUT_RING(ring, A5XX_VPC_MODE_CNTL_BINNING_PASS); 370 371 fd5_event_write(batch, ring, UNK_2C, false); 372 373 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); 374 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0)); 375 376 /* emit IB to binning drawcmds: */ 377 fd5_emit_ib(ring, batch->binning); 378 379 fd_reset_wfi(batch); 380 381 fd5_event_write(batch, ring, UNK_2D, false); 382 383 fd5_event_write(batch, ring, CACHE_FLUSH_TS, true); 384 385 // TODO CP_COND_WRITE's for all the vsc buffers (check for overflow??) 386 387 fd_wfi(batch, ring); 388 389 OUT_PKT4(ring, REG_A5XX_VPC_MODE_CNTL, 1); 390 OUT_RING(ring, 0x0); 391} 392 393/* before first tile */ 394static void 395fd5_emit_tile_init(struct fd_batch *batch) assert_dt 396{ 397 struct fd_ringbuffer *ring = batch->gmem; 398 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 399 400 fd5_emit_restore(batch, ring); 401 402 if (batch->prologue) 403 fd5_emit_ib(ring, batch->prologue); 404 405 fd5_emit_lrz_flush(batch, ring); 406 407 OUT_PKT4(ring, REG_A5XX_GRAS_CL_CNTL, 1); 408 OUT_RING(ring, 0x00000080); /* GRAS_CL_CNTL */ 409 410 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 411 OUT_RING(ring, 0x0); 412 413 OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); 414 OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ 415 416 OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); 417 OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ 418 419 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ 420 fd_wfi(batch, ring); 421 OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); 422 OUT_RING(ring, 0x7c13c080); /* RB_CCU_CNTL */ 423 424 emit_zs(ring, pfb->zsbuf, batch->gmem_state); 425 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, batch->gmem_state); 426 427 /* Enable stream output for the first pass (likely the binning). */ 428 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); 429 OUT_RING(ring, 0); 430 431 if (use_hw_binning(batch)) { 432 emit_binning_pass(batch); 433 434 /* Disable stream output after binning, since each VS output should get 435 * streamed out once. 436 */ 437 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); 438 OUT_RING(ring, A5XX_VPC_SO_OVERRIDE_SO_DISABLE); 439 440 fd5_emit_lrz_flush(batch, ring); 441 patch_draws(batch, USE_VISIBILITY); 442 } else { 443 patch_draws(batch, IGNORE_VISIBILITY); 444 } 445 446 fd5_set_render_mode(batch->ctx, ring, GMEM); 447 448 /* XXX If we're in gmem mode but not doing HW binning, then after the first 449 * tile we should disable stream output (fd6_gmem.c doesn't do that either). 450 */ 451} 452 453/* before mem2gmem */ 454static void 455fd5_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) assert_dt 456{ 457 struct fd_context *ctx = batch->ctx; 458 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 459 struct fd5_context *fd5_ctx = fd5_context(ctx); 460 struct fd_ringbuffer *ring = batch->gmem; 461 462 uint32_t x1 = tile->xoff; 463 uint32_t y1 = tile->yoff; 464 uint32_t x2 = tile->xoff + tile->bin_w - 1; 465 uint32_t y2 = tile->yoff + tile->bin_h - 1; 466 467 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 468 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(x1) | 469 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(y1)); 470 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(x2) | 471 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(y2)); 472 473 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); 474 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(x1) | A5XX_RB_RESOLVE_CNTL_1_Y(y1)); 475 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(x2) | A5XX_RB_RESOLVE_CNTL_2_Y(y2)); 476 477 if (use_hw_binning(batch)) { 478 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[tile->p]; 479 struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p]; 480 481 OUT_PKT7(ring, CP_WAIT_FOR_ME, 0); 482 483 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 484 OUT_RING(ring, 0x0); 485 486 OUT_PKT7(ring, CP_SET_BIN_DATA5, 5); 487 OUT_RING(ring, CP_SET_BIN_DATA5_0_VSC_SIZE(pipe->w * pipe->h) | 488 CP_SET_BIN_DATA5_0_VSC_N(tile->n)); 489 OUT_RELOC(ring, pipe_bo, 0, 0, 0); /* VSC_PIPE[p].DATA_ADDRESS */ 490 OUT_RELOC(ring, fd5_ctx->vsc_size_mem, /* VSC_SIZE_ADDRESS + (p * 4) */ 491 (tile->p * 4), 0, 0); 492 } else { 493 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 494 OUT_RING(ring, 0x1); 495 } 496 497 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); 498 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(x1) | A5XX_RB_WINDOW_OFFSET_Y(y1)); 499} 500 501/* 502 * transfer from system memory to gmem 503 */ 504 505static void 506emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, 507 struct pipe_surface *psurf, enum a5xx_blit_buf buf) 508{ 509 struct fd_ringbuffer *ring = batch->gmem; 510 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 511 struct fd_resource *rsc = fd_resource(psurf->texture); 512 uint32_t stride, size; 513 514 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 515 516 if (buf == BLIT_S) 517 rsc = rsc->stencil; 518 519 if ((buf == BLIT_ZS) || (buf == BLIT_S)) { 520 // XXX hack import via BLIT_MRT0 instead of BLIT_ZS, since I don't 521 // know otherwise how to go from linear in sysmem to tiled in gmem. 522 // possibly we want to flip this around gmem2mem and keep depth 523 // tiled in sysmem (and fixup sampler state to assume tiled).. this 524 // might be required for doing depth/stencil in bypass mode? 525 enum a5xx_color_fmt format = 526 fd5_pipe2color(fd_gmem_restore_format(rsc->b.b.format)); 527 528 OUT_PKT4(ring, REG_A5XX_RB_MRT_BUF_INFO(0), 5); 529 OUT_RING(ring, 530 A5XX_RB_MRT_BUF_INFO_COLOR_FORMAT(format) | 531 A5XX_RB_MRT_BUF_INFO_COLOR_TILE_MODE(rsc->layout.tile_mode) | 532 A5XX_RB_MRT_BUF_INFO_COLOR_SWAP(WZYX)); 533 OUT_RING(ring, A5XX_RB_MRT_PITCH(fd_resource_pitch(rsc, psurf->u.tex.level))); 534 OUT_RING(ring, A5XX_RB_MRT_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level))); 535 OUT_RELOC(ring, rsc->bo, 536 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer), 537 0, 0); /* BASE_LO/HI */ 538 539 buf = BLIT_MRT0; 540 } 541 542 stride = gmem->bin_w << fdl_cpp_shift(&rsc->layout); 543 size = stride * gmem->bin_h; 544 545 OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4); 546 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */ 547 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */ 548 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */ 549 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */ 550 551 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5); 552 OUT_RING(ring, 0x00000000); /* RB_RESOLVE_CNTL_3 */ 553 OUT_RING(ring, base); /* RB_BLIT_DST_LO */ 554 OUT_RING(ring, 0x00000000); /* RB_BLIT_DST_HI */ 555 OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(stride)); 556 OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(size)); 557 558 OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); 559 OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf)); 560 561 fd5_emit_blit(batch, ring); 562} 563 564static void 565fd5_emit_tile_mem2gmem(struct fd_batch *batch, const struct fd_tile *tile) 566{ 567 struct fd_ringbuffer *ring = batch->gmem; 568 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 569 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 570 571 /* 572 * setup mrt and zs with system memory base addresses: 573 */ 574 575 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL); 576 // emit_zs(ring, pfb->zsbuf, NULL); 577 578 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); 579 OUT_RING(ring, A5XX_RB_CNTL_WIDTH(gmem->bin_w) | 580 A5XX_RB_CNTL_HEIGHT(gmem->bin_h) | A5XX_RB_CNTL_BYPASS); 581 582 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) { 583 unsigned i; 584 for (i = 0; i < pfb->nr_cbufs; i++) { 585 if (!pfb->cbufs[i]) 586 continue; 587 if (!(batch->restore & (PIPE_CLEAR_COLOR0 << i))) 588 continue; 589 emit_mem2gmem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i], 590 BLIT_MRT0 + i); 591 } 592 } 593 594 if (fd_gmem_needs_restore(batch, tile, 595 FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { 596 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 597 598 if (!rsc->stencil || fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH)) 599 emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS); 600 if (rsc->stencil && fd_gmem_needs_restore(batch, tile, FD_BUFFER_STENCIL)) 601 emit_mem2gmem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S); 602 } 603} 604 605/* before IB to rendering cmds: */ 606static void 607fd5_emit_tile_renderprep(struct fd_batch *batch, const struct fd_tile *tile) 608{ 609 struct fd_ringbuffer *ring = batch->gmem; 610 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 611 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 612 613 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); 614 OUT_RING(ring, 615 A5XX_RB_CNTL_WIDTH(gmem->bin_w) | A5XX_RB_CNTL_HEIGHT(gmem->bin_h)); 616 617 emit_zs(ring, pfb->zsbuf, gmem); 618 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, gmem); 619 emit_msaa(ring, pfb->samples); 620} 621 622/* 623 * transfer from gmem to system memory (ie. normal RAM) 624 */ 625 626static void 627emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, 628 struct pipe_surface *psurf, enum a5xx_blit_buf buf) 629{ 630 struct fd_ringbuffer *ring = batch->gmem; 631 struct fd_resource *rsc = fd_resource(psurf->texture); 632 bool tiled; 633 uint32_t offset, pitch; 634 635 if (!rsc->valid) 636 return; 637 638 if (buf == BLIT_S) 639 rsc = rsc->stencil; 640 641 offset = 642 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); 643 pitch = fd_resource_pitch(rsc, psurf->u.tex.level); 644 645 assert(psurf->u.tex.first_layer == psurf->u.tex.last_layer); 646 647 OUT_PKT4(ring, REG_A5XX_RB_BLIT_FLAG_DST_LO, 4); 648 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_LO */ 649 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_HI */ 650 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_PITCH */ 651 OUT_RING(ring, 0x00000000); /* RB_BLIT_FLAG_DST_ARRAY_PITCH */ 652 653 tiled = fd_resource_tile_mode(psurf->texture, psurf->u.tex.level); 654 655 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_3, 5); 656 OUT_RING(ring, 0x00000004 | /* XXX RB_RESOLVE_CNTL_3 */ 657 COND(tiled, A5XX_RB_RESOLVE_CNTL_3_TILED)); 658 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_BLIT_DST_LO/HI */ 659 OUT_RING(ring, A5XX_RB_BLIT_DST_PITCH(pitch)); 660 OUT_RING(ring, A5XX_RB_BLIT_DST_ARRAY_PITCH(fd_resource_layer_stride(rsc, psurf->u.tex.level))); 661 662 OUT_PKT4(ring, REG_A5XX_RB_BLIT_CNTL, 1); 663 OUT_RING(ring, A5XX_RB_BLIT_CNTL_BUF(buf)); 664 665 // bool msaa_resolve = pfb->samples > 1; 666 bool msaa_resolve = false; 667 OUT_PKT4(ring, REG_A5XX_RB_CLEAR_CNTL, 1); 668 OUT_RING(ring, COND(msaa_resolve, A5XX_RB_CLEAR_CNTL_MSAA_RESOLVE)); 669 670 fd5_emit_blit(batch, ring); 671} 672 673static void 674fd5_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile) 675{ 676 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 677 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 678 679 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) { 680 struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); 681 682 if (!rsc->stencil || (batch->resolve & FD_BUFFER_DEPTH)) 683 emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf, BLIT_ZS); 684 if (rsc->stencil && (batch->resolve & FD_BUFFER_STENCIL)) 685 emit_gmem2mem_surf(batch, gmem->zsbuf_base[1], pfb->zsbuf, BLIT_S); 686 } 687 688 if (batch->resolve & FD_BUFFER_COLOR) { 689 unsigned i; 690 for (i = 0; i < pfb->nr_cbufs; i++) { 691 if (!pfb->cbufs[i]) 692 continue; 693 if (!(batch->resolve & (PIPE_CLEAR_COLOR0 << i))) 694 continue; 695 emit_gmem2mem_surf(batch, gmem->cbuf_base[i], pfb->cbufs[i], 696 BLIT_MRT0 + i); 697 } 698 } 699} 700 701static void 702fd5_emit_tile_fini(struct fd_batch *batch) assert_dt 703{ 704 struct fd_ringbuffer *ring = batch->gmem; 705 706 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 707 OUT_RING(ring, 0x0); 708 709 fd5_emit_lrz_flush(batch, ring); 710 711 fd5_cache_flush(batch, ring); 712 fd5_set_render_mode(batch->ctx, ring, BYPASS); 713} 714 715static void 716fd5_emit_sysmem_prep(struct fd_batch *batch) assert_dt 717{ 718 struct fd_ringbuffer *ring = batch->gmem; 719 720 fd5_emit_restore(batch, ring); 721 722 fd5_emit_lrz_flush(batch, ring); 723 724 if (batch->prologue) 725 fd5_emit_ib(ring, batch->prologue); 726 727 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 728 OUT_RING(ring, 0x0); 729 730 fd5_event_write(batch, ring, PC_CCU_INVALIDATE_COLOR, false); 731 732 OUT_PKT4(ring, REG_A5XX_PC_POWER_CNTL, 1); 733 OUT_RING(ring, 0x00000003); /* PC_POWER_CNTL */ 734 735 OUT_PKT4(ring, REG_A5XX_VFD_POWER_CNTL, 1); 736 OUT_RING(ring, 0x00000003); /* VFD_POWER_CNTL */ 737 738 /* 0x10000000 for BYPASS.. 0x7c13c080 for GMEM: */ 739 fd_wfi(batch, ring); 740 OUT_PKT4(ring, REG_A5XX_RB_CCU_CNTL, 1); 741 OUT_RING(ring, 0x10000000); /* RB_CCU_CNTL */ 742 743 OUT_PKT4(ring, REG_A5XX_RB_CNTL, 1); 744 OUT_RING(ring, A5XX_RB_CNTL_WIDTH(0) | A5XX_RB_CNTL_HEIGHT(0) | 745 A5XX_RB_CNTL_BYPASS); 746 747 /* remaining setup below here does not apply to blit/compute: */ 748 if (batch->nondraw) 749 return; 750 751 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 752 753 OUT_PKT4(ring, REG_A5XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 754 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_TL_X(0) | 755 A5XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(0)); 756 OUT_RING(ring, A5XX_GRAS_SC_WINDOW_SCISSOR_BR_X(pfb->width - 1) | 757 A5XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(pfb->height - 1)); 758 759 OUT_PKT4(ring, REG_A5XX_RB_RESOLVE_CNTL_1, 2); 760 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_1_X(0) | A5XX_RB_RESOLVE_CNTL_1_Y(0)); 761 OUT_RING(ring, A5XX_RB_RESOLVE_CNTL_2_X(pfb->width - 1) | 762 A5XX_RB_RESOLVE_CNTL_2_Y(pfb->height - 1)); 763 764 OUT_PKT4(ring, REG_A5XX_RB_WINDOW_OFFSET, 1); 765 OUT_RING(ring, A5XX_RB_WINDOW_OFFSET_X(0) | A5XX_RB_WINDOW_OFFSET_Y(0)); 766 767 /* Enable stream output, since there's no binning pass to put it in. */ 768 OUT_PKT4(ring, REG_A5XX_VPC_SO_OVERRIDE, 1); 769 OUT_RING(ring, 0); 770 771 OUT_PKT7(ring, CP_SET_VISIBILITY_OVERRIDE, 1); 772 OUT_RING(ring, 0x1); 773 774 patch_draws(batch, IGNORE_VISIBILITY); 775 776 emit_zs(ring, pfb->zsbuf, NULL); 777 emit_mrt(ring, pfb->nr_cbufs, pfb->cbufs, NULL); 778 emit_msaa(ring, pfb->samples); 779} 780 781static void 782fd5_emit_sysmem_fini(struct fd_batch *batch) 783{ 784 struct fd_ringbuffer *ring = batch->gmem; 785 786 OUT_PKT7(ring, CP_SKIP_IB2_ENABLE_GLOBAL, 1); 787 OUT_RING(ring, 0x0); 788 789 fd5_emit_lrz_flush(batch, ring); 790 791 fd5_event_write(batch, ring, PC_CCU_FLUSH_COLOR_TS, true); 792 fd5_event_write(batch, ring, PC_CCU_FLUSH_DEPTH_TS, true); 793} 794 795void 796fd5_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis 797{ 798 struct fd_context *ctx = fd_context(pctx); 799 800 ctx->emit_tile_init = fd5_emit_tile_init; 801 ctx->emit_tile_prep = fd5_emit_tile_prep; 802 ctx->emit_tile_mem2gmem = fd5_emit_tile_mem2gmem; 803 ctx->emit_tile_renderprep = fd5_emit_tile_renderprep; 804 ctx->emit_tile_gmem2mem = fd5_emit_tile_gmem2mem; 805 ctx->emit_tile_fini = fd5_emit_tile_fini; 806 ctx->emit_sysmem_prep = fd5_emit_sysmem_prep; 807 ctx->emit_sysmem_fini = fd5_emit_sysmem_fini; 808} 809