1/* 2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "pipe/p_state.h" 28#include "util/u_inlines.h" 29#include "util/u_memory.h" 30#include "util/u_string.h" 31 32#include "freedreno_draw.h" 33#include "freedreno_resource.h" 34#include "freedreno_state.h" 35 36#include "ir2/instr-a2xx.h" 37#include "fd2_context.h" 38#include "fd2_draw.h" 39#include "fd2_emit.h" 40#include "fd2_gmem.h" 41#include "fd2_program.h" 42#include "fd2_util.h" 43#include "fd2_zsa.h" 44 45static uint32_t 46fmt2swap(enum pipe_format format) 47{ 48 switch (format) { 49 case PIPE_FORMAT_B8G8R8A8_UNORM: 50 case PIPE_FORMAT_B8G8R8X8_UNORM: 51 case PIPE_FORMAT_B5G6R5_UNORM: 52 case PIPE_FORMAT_B5G5R5A1_UNORM: 53 case PIPE_FORMAT_B5G5R5X1_UNORM: 54 case PIPE_FORMAT_B4G4R4A4_UNORM: 55 case PIPE_FORMAT_B4G4R4X4_UNORM: 56 case PIPE_FORMAT_B2G3R3_UNORM: 57 return 1; 58 default: 59 return 0; 60 } 61} 62 63static bool 64use_hw_binning(struct fd_batch *batch) 65{ 66 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 67 68 /* we hardcoded a limit of 8 "pipes", we can increase this limit 69 * at the cost of a slightly larger command stream 70 * however very few cases will need more than 8 71 * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?) 72 */ 73 if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes) 74 return false; 75 76 /* only a20x hw binning is implement 77 * a22x is more like a3xx, but perhaps the a20x works? (TODO) 78 */ 79 if (!is_a20x(batch->ctx->screen)) 80 return false; 81 82 return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2); 83} 84 85/* transfer from gmem to system memory (ie. normal RAM) */ 86 87static void 88emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base, 89 struct pipe_surface *psurf) 90{ 91 struct fd_ringbuffer *ring = batch->tile_fini; 92 struct fd_resource *rsc = fd_resource(psurf->texture); 93 uint32_t offset = 94 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); 95 enum pipe_format format = fd_gmem_restore_format(psurf->format); 96 uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level); 97 98 assert((pitch & 31) == 0); 99 assert((offset & 0xfff) == 0); 100 101 if (!rsc->valid) 102 return; 103 104 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 105 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); 106 OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) | 107 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format))); 108 109 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 110 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); 111 OUT_RING(ring, 0x00000000); /* RB_COPY_CONTROL */ 112 OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */ 113 OUT_RING(ring, pitch >> 5); /* RB_COPY_DEST_PITCH */ 114 OUT_RING(ring, /* RB_COPY_DEST_INFO */ 115 A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(format)) | 116 COND(!rsc->layout.tile_mode, A2XX_RB_COPY_DEST_INFO_LINEAR) | 117 A2XX_RB_COPY_DEST_INFO_WRITE_RED | 118 A2XX_RB_COPY_DEST_INFO_WRITE_GREEN | 119 A2XX_RB_COPY_DEST_INFO_WRITE_BLUE | 120 A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA); 121 122 if (!is_a20x(batch->ctx->screen)) { 123 OUT_WFI(ring); 124 125 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 126 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX)); 127 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */ 128 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */ 129 } 130 131 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, 132 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); 133} 134 135static void 136prepare_tile_fini_ib(struct fd_batch *batch) assert_dt 137{ 138 struct fd_context *ctx = batch->ctx; 139 struct fd2_context *fd2_ctx = fd2_context(ctx); 140 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 141 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 142 struct fd_ringbuffer *ring; 143 144 batch->tile_fini = 145 fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING); 146 ring = batch->tile_fini; 147 148 fd2_emit_vertex_bufs(ring, 0x9c, 149 (struct fd2_vertex_buf[]){ 150 {.prsc = fd2_ctx->solid_vertexbuf, .size = 36}, 151 }, 152 1); 153 154 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 155 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET)); 156 OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */ 157 158 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 159 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); 160 OUT_RING(ring, 0); 161 162 if (!is_a20x(ctx->screen)) { 163 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 164 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); 165 OUT_RING(ring, 0x0000028f); 166 } 167 168 fd2_program_emit(ctx, ring, &ctx->solid_prog); 169 170 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 171 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK)); 172 OUT_RING(ring, 0x0000ffff); 173 174 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 175 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL)); 176 OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE); 177 178 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 179 OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL)); 180 OUT_RING( 181 ring, 182 A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */ 183 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 184 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES)); 185 186 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 187 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); 188 OUT_RING(ring, xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */ 189 OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */ 190 191 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 192 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL)); 193 OUT_RING(ring, 0x00000000); 194 195 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 196 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); 197 OUT_RING(ring, fui((float)gmem->bin_w / 2.0f)); /* XSCALE */ 198 OUT_RING(ring, fui((float)gmem->bin_w / 2.0f)); /* XOFFSET */ 199 OUT_RING(ring, fui((float)gmem->bin_h / 2.0f)); /* YSCALE */ 200 OUT_RING(ring, fui((float)gmem->bin_h / 2.0f)); /* YOFFSET */ 201 202 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 203 OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL)); 204 OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY)); 205 206 if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) 207 emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf); 208 209 if (batch->resolve & FD_BUFFER_COLOR) 210 emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]); 211 212 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 213 OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL)); 214 OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH)); 215 216 if (!is_a20x(ctx->screen)) { 217 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 218 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); 219 OUT_RING(ring, 0x0000003b); 220 } 221} 222 223static void 224fd2_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile) 225{ 226 fd2_emit_ib(batch->gmem, batch->tile_fini); 227} 228 229/* transfer from system memory to gmem */ 230 231static void 232emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base, 233 struct pipe_surface *psurf) 234{ 235 struct fd_ringbuffer *ring = batch->gmem; 236 struct fd_resource *rsc = fd_resource(psurf->texture); 237 uint32_t offset = 238 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); 239 enum pipe_format format = fd_gmem_restore_format(psurf->format); 240 241 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 242 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); 243 OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) | 244 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format))); 245 246 /* emit fb as a texture: */ 247 OUT_PKT3(ring, CP_SET_CONSTANT, 7); 248 OUT_RING(ring, 0x00010000); 249 OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) | 250 A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) | 251 A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) | 252 A2XX_SQ_TEX_0_PITCH( 253 fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level))); 254 OUT_RELOC(ring, rsc->bo, offset, 255 A2XX_SQ_TEX_1_FORMAT(fd2_pipe2surface(format).format) | 256 A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL), 257 0); 258 OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) | 259 A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1)); 260 OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) | 261 A2XX_SQ_TEX_3_SWIZ_X(0) | A2XX_SQ_TEX_3_SWIZ_Y(1) | 262 A2XX_SQ_TEX_3_SWIZ_Z(2) | A2XX_SQ_TEX_3_SWIZ_W(3) | 263 A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) | 264 A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT)); 265 OUT_RING(ring, 0x00000000); 266 OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D)); 267 268 if (!is_a20x(batch->ctx->screen)) { 269 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 270 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX)); 271 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */ 272 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */ 273 } 274 275 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, 276 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); 277} 278 279static void 280fd2_emit_tile_mem2gmem(struct fd_batch *batch, 281 const struct fd_tile *tile) assert_dt 282{ 283 struct fd_context *ctx = batch->ctx; 284 struct fd2_context *fd2_ctx = fd2_context(ctx); 285 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 286 struct fd_ringbuffer *ring = batch->gmem; 287 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 288 unsigned bin_w = tile->bin_w; 289 unsigned bin_h = tile->bin_h; 290 float x0, y0, x1, y1; 291 292 fd2_emit_vertex_bufs( 293 ring, 0x9c, 294 (struct fd2_vertex_buf[]){ 295 {.prsc = fd2_ctx->solid_vertexbuf, .size = 36}, 296 {.prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36}, 297 }, 298 2); 299 300 /* write texture coordinates to vertexbuf: */ 301 x0 = ((float)tile->xoff) / ((float)pfb->width); 302 x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width); 303 y0 = ((float)tile->yoff) / ((float)pfb->height); 304 y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height); 305 OUT_PKT3(ring, CP_MEM_WRITE, 7); 306 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0); 307 OUT_RING(ring, fui(x0)); 308 OUT_RING(ring, fui(y0)); 309 OUT_RING(ring, fui(x1)); 310 OUT_RING(ring, fui(y0)); 311 OUT_RING(ring, fui(x0)); 312 OUT_RING(ring, fui(y1)); 313 314 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 315 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); 316 OUT_RING(ring, 0); 317 318 fd2_program_emit(ctx, ring, &ctx->blit_prog[0]); 319 320 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1); 321 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE); 322 323 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 324 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL)); 325 OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE); 326 327 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 328 OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL)); 329 OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | 330 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 331 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES)); 332 333 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 334 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK)); 335 OUT_RING(ring, 0x0000ffff); 336 337 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 338 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL)); 339 OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) | 340 A2XX_RB_COLORCONTROL_BLEND_DISABLE | 341 A2XX_RB_COLORCONTROL_ROP_CODE(12) | 342 A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) | 343 A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL)); 344 345 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 346 OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL)); 347 OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) | 348 A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) | 349 A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) | 350 A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) | 351 A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) | 352 A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO)); 353 354 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 355 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); 356 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE | 357 xy2d(0, 0)); /* PA_SC_WINDOW_SCISSOR_TL */ 358 OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */ 359 360 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 361 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); 362 OUT_RING(ring, fui((float)bin_w / 2.0f)); /* PA_CL_VPORT_XSCALE */ 363 OUT_RING(ring, fui((float)bin_w / 2.0f)); /* PA_CL_VPORT_XOFFSET */ 364 OUT_RING(ring, fui(-(float)bin_h / 2.0f)); /* PA_CL_VPORT_YSCALE */ 365 OUT_RING(ring, fui((float)bin_h / 2.0f)); /* PA_CL_VPORT_YOFFSET */ 366 367 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 368 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL)); 369 OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT | 370 A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this??? 371 A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | 372 A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | 373 A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | 374 A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA); 375 376 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 377 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL)); 378 OUT_RING(ring, 0x00000000); 379 380 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)) 381 emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf); 382 383 if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR)) 384 emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]); 385 386 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 387 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL)); 388 OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT | 389 A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA | 390 A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA | 391 A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA | 392 A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA | 393 A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA | 394 A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA); 395 396 /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */ 397} 398 399static void 400patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode) 401{ 402 unsigned i; 403 404 if (!is_a20x(batch->ctx->screen)) { 405 /* identical to a3xx */ 406 for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) { 407 struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i); 408 *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0); 409 } 410 util_dynarray_clear(&batch->draw_patches); 411 return; 412 } 413 414 if (vismode == USE_VISIBILITY) 415 return; 416 417 for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t *); i++) { 418 uint32_t *ptr = 419 *util_dynarray_element(&batch->draw_patches, uint32_t *, i); 420 unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */ 421 422 /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX 423 * replace first two DWORDS with NOP and move the rest down 424 * (we don't want to have to move the idx buffer reloc) 425 */ 426 ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8); 427 ptr[1] = 0x00000000; 428 429 ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */ 430 ptr[2] = CP_TYPE3_PKT | ((cnt - 2) << 16) | (CP_DRAW_INDX << 8); 431 ptr[3] = 0x00000000; 432 } 433} 434 435static void 436fd2_emit_sysmem_prep(struct fd_batch *batch) 437{ 438 struct fd_context *ctx = batch->ctx; 439 struct fd_ringbuffer *ring = batch->gmem; 440 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 441 struct pipe_surface *psurf = pfb->cbufs[0]; 442 443 if (!psurf) 444 return; 445 446 struct fd_resource *rsc = fd_resource(psurf->texture); 447 uint32_t offset = 448 fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer); 449 uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level); 450 451 assert((pitch & 31) == 0); 452 assert((offset & 0xfff) == 0); 453 454 fd2_emit_restore(ctx, ring); 455 456 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 457 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO)); 458 OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(pitch)); 459 460 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 461 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); 462 OUT_RELOC(ring, rsc->bo, offset, 463 COND(!rsc->layout.tile_mode, A2XX_RB_COLOR_INFO_LINEAR) | 464 A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) | 465 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)), 466 0); 467 468 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 469 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL)); 470 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE); 471 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) | 472 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height)); 473 474 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 475 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET)); 476 OUT_RING(ring, 477 A2XX_PA_SC_WINDOW_OFFSET_X(0) | A2XX_PA_SC_WINDOW_OFFSET_Y(0)); 478 479 patch_draws(batch, IGNORE_VISIBILITY); 480 util_dynarray_clear(&batch->draw_patches); 481 util_dynarray_clear(&batch->shader_patches); 482} 483 484/* before first tile */ 485static void 486fd2_emit_tile_init(struct fd_batch *batch) assert_dt 487{ 488 struct fd_context *ctx = batch->ctx; 489 struct fd_ringbuffer *ring = batch->gmem; 490 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 491 const struct fd_gmem_stateobj *gmem = batch->gmem_state; 492 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); 493 uint32_t reg; 494 495 fd2_emit_restore(ctx, ring); 496 497 prepare_tile_fini_ib(batch); 498 499 OUT_PKT3(ring, CP_SET_CONSTANT, 4); 500 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO)); 501 OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */ 502 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) | 503 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format))); 504 reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); 505 if (pfb->zsbuf) 506 reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format)); 507 OUT_RING(ring, reg); /* RB_DEPTH_INFO */ 508 509 /* fast clear patches */ 510 int depth_size = -1; 511 int color_size = -1; 512 513 if (pfb->cbufs[0]) 514 color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2; 515 516 if (pfb->zsbuf) 517 depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2; 518 519 for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) { 520 struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i); 521 uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0]; 522 uint32_t size, lines; 523 524 /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */ 525 switch (patch->val) { 526 case GMEM_PATCH_FASTCLEAR_COLOR: 527 size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000); 528 lines = size / 1024; 529 depth_base = size / 2; 530 break; 531 case GMEM_PATCH_FASTCLEAR_DEPTH: 532 size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000); 533 lines = size / 1024; 534 color_base = depth_base; 535 depth_base = depth_base + size / 2; 536 break; 537 case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH: 538 lines = 539 align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024; 540 break; 541 case GMEM_PATCH_RESTORE_INFO: 542 patch->cs[0] = gmem->bin_w; 543 patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) | 544 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)); 545 patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]); 546 if (pfb->zsbuf) 547 patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT( 548 fd_pipe2depth(pfb->zsbuf->format)); 549 continue; 550 default: 551 continue; 552 } 553 554 patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) | 555 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines); 556 patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) | 557 A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8); 558 patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) | 559 A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1); 560 } 561 util_dynarray_clear(&batch->gmem_patches); 562 563 /* set to zero, for some reason hardware doesn't like certain values */ 564 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 565 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN)); 566 OUT_RING(ring, 0); 567 568 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 569 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX)); 570 OUT_RING(ring, 0); 571 572 if (use_hw_binning(batch)) { 573 /* patch out unneeded memory exports by changing EXEC CF to EXEC_END 574 * 575 * in the shader compiler, we guarantee that the shader ends with 576 * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports 577 * 578 * the since patches point only to dwords and CFs are 1.5 dwords 579 * the patch is aligned and might point to a ALLOC CF 580 */ 581 for (int i = 0; i < batch->shader_patches.size / sizeof(void *); i++) { 582 instr_cf_t *cf = 583 *util_dynarray_element(&batch->shader_patches, instr_cf_t *, i); 584 if (cf->opc == ALLOC) 585 cf++; 586 assert(cf->opc == EXEC); 587 assert(cf[ctx->screen->info->num_vsc_pipes * 2 - 2].opc == EXEC_END); 588 cf[2 * (gmem->num_vsc_pipes - 1)].opc = EXEC_END; 589 } 590 591 patch_draws(batch, USE_VISIBILITY); 592 593 /* initialize shader constants for the binning memexport */ 594 OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4); 595 OUT_RING(ring, 0x0000000C); 596 597 for (int i = 0; i < gmem->num_vsc_pipes; i++) { 598 /* allocate in 64k increments to avoid reallocs */ 599 uint32_t bo_size = align(batch->num_vertices, 0x10000); 600 if (!ctx->vsc_pipe_bo[i] || 601 fd_bo_size(ctx->vsc_pipe_bo[i]) < bo_size) { 602 if (ctx->vsc_pipe_bo[i]) 603 fd_bo_del(ctx->vsc_pipe_bo[i]); 604 ctx->vsc_pipe_bo[i] = 605 fd_bo_new(ctx->dev, bo_size, 0, "vsc_pipe[%u]", i); 606 assert(ctx->vsc_pipe_bo[i]); 607 } 608 609 /* memory export address (export32): 610 * .x: (base_address >> 2) | 0x40000000 (?) 611 * .y: index (float) - set by shader 612 * .z: 0x4B00D000 (?) 613 * .w: 0x4B000000 (?) | max_index (?) 614 */ 615 OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0x40000000, -2); 616 OUT_RING(ring, 0x00000000); 617 OUT_RING(ring, 0x4B00D000); 618 OUT_RING(ring, 0x4B000000 | bo_size); 619 } 620 621 OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8); 622 OUT_RING(ring, 0x0000018C); 623 624 for (int i = 0; i < gmem->num_vsc_pipes; i++) { 625 const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i]; 626 float off_x, off_y, mul_x, mul_y; 627 628 /* const to tranform from [-1,1] to bin coordinates for this pipe 629 * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc 630 * 8 possible values on x/y axis, 631 * to clip at binning stage: only use center 6x6 632 * TODO: set the z parameters too so that hw binning 633 * can clip primitives in Z too 634 */ 635 636 mul_x = 1.0f / (float)(gmem->bin_w * 8); 637 mul_y = 1.0f / (float)(gmem->bin_h * 8); 638 off_x = -pipe->x * (1.0f / 8.0f) + 0.125f - mul_x * gmem->minx; 639 off_y = -pipe->y * (1.0f / 8.0f) + 0.125f - mul_y * gmem->miny; 640 641 OUT_RING(ring, fui(off_x * (256.0f / 255.0f))); 642 OUT_RING(ring, fui(off_y * (256.0f / 255.0f))); 643 OUT_RING(ring, 0x3f000000); 644 OUT_RING(ring, fui(0.0f)); 645 646 OUT_RING(ring, fui(mul_x * (256.0f / 255.0f))); 647 OUT_RING(ring, fui(mul_y * (256.0f / 255.0f))); 648 OUT_RING(ring, fui(0.0f)); 649 OUT_RING(ring, fui(0.0f)); 650 } 651 652 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 653 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); 654 OUT_RING(ring, 0); 655 656 fd2_emit_ib(ring, batch->binning); 657 658 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 659 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); 660 OUT_RING(ring, 0x00000002); 661 } else { 662 patch_draws(batch, IGNORE_VISIBILITY); 663 } 664 665 util_dynarray_clear(&batch->draw_patches); 666 util_dynarray_clear(&batch->shader_patches); 667} 668 669/* before mem2gmem */ 670static void 671fd2_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile) 672{ 673 struct fd_ringbuffer *ring = batch->gmem; 674 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 675 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); 676 677 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 678 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); 679 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */ 680 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format))); 681 682 /* setup screen scissor for current tile (same for mem2gmem): */ 683 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 684 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL)); 685 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) | 686 A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0)); 687 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) | 688 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h)); 689} 690 691/* before IB to rendering cmds: */ 692static void 693fd2_emit_tile_renderprep(struct fd_batch *batch, 694 const struct fd_tile *tile) assert_dt 695{ 696 struct fd_context *ctx = batch->ctx; 697 struct fd2_context *fd2_ctx = fd2_context(ctx); 698 struct fd_ringbuffer *ring = batch->gmem; 699 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 700 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); 701 702 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 703 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO)); 704 OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) | 705 A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format))); 706 707 /* setup window scissor and offset for current tile (different 708 * from mem2gmem): 709 */ 710 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 711 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET)); 712 OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) | 713 A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff)); 714 715 /* write SCISSOR_BR to memory so fast clear path can restore from it */ 716 OUT_PKT3(ring, CP_MEM_WRITE, 2); 717 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0); 718 OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) | 719 A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h)); 720 721 /* set the copy offset for gmem2mem */ 722 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 723 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET)); 724 OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) | 725 A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff)); 726 727 /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */ 728 if (is_a20x(ctx->screen)) { 729 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 730 OUT_RING(ring, 0x00000580); 731 OUT_RING(ring, fui(tile->xoff)); 732 OUT_RING(ring, fui(tile->yoff)); 733 OUT_RING(ring, fui(0.0f)); 734 OUT_RING(ring, fui(0.0f)); 735 } 736 737 if (use_hw_binning(batch)) { 738 struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p]; 739 740 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 741 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN)); 742 OUT_RING(ring, tile->n); 743 744 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 745 OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX)); 746 OUT_RING(ring, tile->n); 747 748 /* TODO only emit this when tile->p changes */ 749 OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1); 750 OUT_RELOC(ring, pipe_bo, 0, 0, 0); 751 } 752} 753 754void 755fd2_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis 756{ 757 struct fd_context *ctx = fd_context(pctx); 758 759 ctx->emit_sysmem_prep = fd2_emit_sysmem_prep; 760 ctx->emit_tile_init = fd2_emit_tile_init; 761 ctx->emit_tile_prep = fd2_emit_tile_prep; 762 ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem; 763 ctx->emit_tile_renderprep = fd2_emit_tile_renderprep; 764 ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem; 765} 766