1/* 2 * Copyright (C) 2012-2013 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "pipe/p_state.h" 28#include "util/u_memory.h" 29#include "util/u_prim.h" 30#include "util/u_string.h" 31 32#include "freedreno_resource.h" 33#include "freedreno_state.h" 34 35#include "fd2_context.h" 36#include "fd2_draw.h" 37#include "fd2_emit.h" 38#include "fd2_program.h" 39#include "fd2_util.h" 40#include "fd2_zsa.h" 41 42static void 43emit_cacheflush(struct fd_ringbuffer *ring) 44{ 45 unsigned i; 46 47 for (i = 0; i < 12; i++) { 48 OUT_PKT3(ring, CP_EVENT_WRITE, 1); 49 OUT_RING(ring, CACHE_FLUSH); 50 } 51} 52 53static void 54emit_vertexbufs(struct fd_context *ctx) assert_dt 55{ 56 struct fd_vertex_stateobj *vtx = ctx->vtx.vtx; 57 struct fd_vertexbuf_stateobj *vertexbuf = &ctx->vtx.vertexbuf; 58 struct fd2_vertex_buf bufs[PIPE_MAX_ATTRIBS]; 59 unsigned i; 60 61 if (!vtx->num_elements) 62 return; 63 64 for (i = 0; i < vtx->num_elements; i++) { 65 struct pipe_vertex_element *elem = &vtx->pipe[i]; 66 struct pipe_vertex_buffer *vb = &vertexbuf->vb[elem->vertex_buffer_index]; 67 bufs[i].offset = vb->buffer_offset; 68 bufs[i].size = fd_bo_size(fd_resource(vb->buffer.resource)->bo); 69 bufs[i].prsc = vb->buffer.resource; 70 } 71 72 // NOTE I believe the 0x78 (or 0x9c in solid_vp) relates to the 73 // CONST(20,0) (or CONST(26,0) in soliv_vp) 74 75 fd2_emit_vertex_bufs(ctx->batch->draw, 0x78, bufs, vtx->num_elements); 76 fd2_emit_vertex_bufs(ctx->batch->binning, 0x78, bufs, vtx->num_elements); 77} 78 79static void 80draw_impl(struct fd_context *ctx, const struct pipe_draw_info *info, 81 const struct pipe_draw_start_count_bias *draw, struct fd_ringbuffer *ring, 82 unsigned index_offset, bool binning) assert_dt 83{ 84 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 85 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); 86 OUT_RING(ring, info->index_size ? 0 : draw->start); 87 88 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1); 89 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE); 90 91 if (is_a20x(ctx->screen)) { 92 /* wait for DMA to finish and 93 * dummy draw one triangle with indexes 0,0,0. 94 * with PRE_FETCH_CULL_ENABLE | GRP_CULL_ENABLE. 95 * 96 * this workaround is for a HW bug related to DMA alignment: 97 * it is necessary for indexed draws and possibly also 98 * draws that read binning data 99 */ 100 OUT_PKT3(ring, CP_WAIT_REG_EQ, 4); 101 OUT_RING(ring, 0x000005d0); /* RBBM_STATUS */ 102 OUT_RING(ring, 0x00000000); 103 OUT_RING(ring, 0x00001000); /* bit: 12: VGT_BUSY_NO_DMA */ 104 OUT_RING(ring, 0x00000001); 105 106 OUT_PKT3(ring, CP_DRAW_INDX_BIN, 6); 107 OUT_RING(ring, 0x00000000); 108 OUT_RING(ring, 0x0003c004); 109 OUT_RING(ring, 0x00000000); 110 OUT_RING(ring, 0x00000003); 111 OUT_RELOC(ring, fd_resource(fd2_context(ctx)->solid_vertexbuf)->bo, 64, 0, 112 0); 113 OUT_RING(ring, 0x00000006); 114 } else { 115 OUT_WFI(ring); 116 117 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 118 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX)); 119 OUT_RING(ring, info->index_bounds_valid ? info->max_index 120 : ~0); /* VGT_MAX_VTX_INDX */ 121 OUT_RING(ring, info->index_bounds_valid ? info->min_index 122 : 0); /* VGT_MIN_VTX_INDX */ 123 } 124 125 /* binning shader will take offset from C64 */ 126 if (binning && is_a20x(ctx->screen)) { 127 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 128 OUT_RING(ring, 0x00000180); 129 OUT_RING(ring, fui(ctx->batch->num_vertices)); 130 OUT_RING(ring, fui(0.0f)); 131 OUT_RING(ring, fui(0.0f)); 132 OUT_RING(ring, fui(0.0f)); 133 } 134 135 enum pc_di_vis_cull_mode vismode = USE_VISIBILITY; 136 if (binning || info->mode == PIPE_PRIM_POINTS) 137 vismode = IGNORE_VISIBILITY; 138 139 fd_draw_emit(ctx->batch, ring, ctx->screen->primtypes[info->mode], 140 vismode, info, draw, index_offset); 141 142 if (is_a20x(ctx->screen)) { 143 /* not sure why this is required, but it fixes some hangs */ 144 OUT_WFI(ring); 145 } else { 146 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 147 OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010)); 148 OUT_RING(ring, 0x00000000); 149 } 150 151 emit_cacheflush(ring); 152} 153 154static bool 155fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *pinfo, 156 unsigned drawid_offset, 157 const struct pipe_draw_indirect_info *indirect, 158 const struct pipe_draw_start_count_bias *pdraw, 159 unsigned index_offset) assert_dt 160{ 161 if (!ctx->prog.fs || !ctx->prog.vs) 162 return false; 163 164 if (pinfo->mode != PIPE_PRIM_MAX && !indirect && !pinfo->primitive_restart && 165 !u_trim_pipe_prim(pinfo->mode, (unsigned *)&pdraw->count)) 166 return false; 167 168 if (ctx->dirty & FD_DIRTY_VTXBUF) 169 emit_vertexbufs(ctx); 170 171 if (fd_binning_enabled) 172 fd2_emit_state_binning(ctx, ctx->dirty); 173 174 fd2_emit_state(ctx, ctx->dirty); 175 176 /* a2xx can draw only 65535 vertices at once 177 * on a22x the field in the draw command is 32bits but seems limited too 178 * using a limit of 32k because it fixes an unexplained hang 179 * 32766 works for all primitives (multiple of 2 and 3) 180 */ 181 if (pdraw->count > 32766) { 182 /* clang-format off */ 183 static const uint16_t step_tbl[PIPE_PRIM_MAX] = { 184 [0 ... PIPE_PRIM_MAX - 1] = 32766, 185 [PIPE_PRIM_LINE_STRIP] = 32765, 186 [PIPE_PRIM_TRIANGLE_STRIP] = 32764, 187 188 /* needs more work */ 189 [PIPE_PRIM_TRIANGLE_FAN] = 0, 190 [PIPE_PRIM_LINE_LOOP] = 0, 191 }; 192 /* clang-format on */ 193 194 struct pipe_draw_start_count_bias draw = *pdraw; 195 unsigned count = draw.count; 196 unsigned step = step_tbl[pinfo->mode]; 197 unsigned num_vertices = ctx->batch->num_vertices; 198 199 if (!step) 200 return false; 201 202 for (; count + step > 32766; count -= step) { 203 draw.count = MIN2(count, 32766); 204 draw_impl(ctx, pinfo, &draw, ctx->batch->draw, index_offset, false); 205 draw_impl(ctx, pinfo, &draw, ctx->batch->binning, index_offset, true); 206 draw.start += step; 207 ctx->batch->num_vertices += step; 208 } 209 /* changing this value is a hack, restore it */ 210 ctx->batch->num_vertices = num_vertices; 211 } else { 212 draw_impl(ctx, pinfo, pdraw, ctx->batch->draw, index_offset, false); 213 draw_impl(ctx, pinfo, pdraw, ctx->batch->binning, index_offset, true); 214 } 215 216 fd_context_all_clean(ctx); 217 218 return true; 219} 220 221static void 222clear_state(struct fd_batch *batch, struct fd_ringbuffer *ring, 223 unsigned buffers, bool fast_clear) assert_dt 224{ 225 struct fd_context *ctx = batch->ctx; 226 struct fd2_context *fd2_ctx = fd2_context(ctx); 227 uint32_t reg; 228 229 fd2_emit_vertex_bufs(ring, 0x9c, 230 (struct fd2_vertex_buf[]){ 231 {.prsc = fd2_ctx->solid_vertexbuf, .size = 36}, 232 }, 233 1); 234 235 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 236 OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET)); 237 OUT_RING(ring, 0); 238 239 fd2_program_emit(ctx, ring, &ctx->solid_prog); 240 241 OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1); 242 OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE); 243 244 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { 245 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 246 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL)); 247 reg = 0; 248 if (buffers & PIPE_CLEAR_DEPTH) { 249 reg |= A2XX_RB_DEPTHCONTROL_ZFUNC(FUNC_ALWAYS) | 250 A2XX_RB_DEPTHCONTROL_Z_ENABLE | 251 A2XX_RB_DEPTHCONTROL_Z_WRITE_ENABLE | 252 A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE; 253 } 254 if (buffers & PIPE_CLEAR_STENCIL) { 255 reg |= A2XX_RB_DEPTHCONTROL_STENCILFUNC(FUNC_ALWAYS) | 256 A2XX_RB_DEPTHCONTROL_STENCIL_ENABLE | 257 A2XX_RB_DEPTHCONTROL_STENCILZPASS(STENCIL_REPLACE); 258 } 259 OUT_RING(ring, reg); 260 } 261 262 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 263 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL)); 264 OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) | 265 A2XX_RB_COLORCONTROL_BLEND_DISABLE | 266 A2XX_RB_COLORCONTROL_ROP_CODE(12) | 267 A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) | 268 A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL)); 269 270 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 271 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL)); 272 OUT_RING(ring, 0x00000000); /* PA_CL_CLIP_CNTL */ 273 OUT_RING( 274 ring, 275 A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */ 276 A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) | 277 A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES) | 278 (fast_clear ? A2XX_PA_SU_SC_MODE_CNTL_MSAA_ENABLE : 0)); 279 280 if (fast_clear) { 281 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 282 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG)); 283 OUT_RING(ring, A2XX_PA_SC_AA_CONFIG_MSAA_NUM_SAMPLES(3)); 284 } 285 286 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 287 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK)); 288 OUT_RING(ring, 0x0000ffff); 289 290 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 291 OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_MASK)); 292 if (buffers & PIPE_CLEAR_COLOR) { 293 OUT_RING(ring, A2XX_RB_COLOR_MASK_WRITE_RED | 294 A2XX_RB_COLOR_MASK_WRITE_GREEN | 295 A2XX_RB_COLOR_MASK_WRITE_BLUE | 296 A2XX_RB_COLOR_MASK_WRITE_ALPHA); 297 } else { 298 OUT_RING(ring, 0x0); 299 } 300 301 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 302 OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL)); 303 OUT_RING(ring, 0); 304 305 if (is_a20x(batch->ctx->screen)) 306 return; 307 308 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 309 OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX)); 310 OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */ 311 OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */ 312 313 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 314 OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF)); 315 OUT_RING(ring, 316 0xff000000 | A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff)); 317 OUT_RING(ring, 0xff000000 | A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); 318 319 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 320 OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL)); 321 OUT_RING(ring, 0x00000084); 322 323 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 324 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); 325 OUT_RING(ring, 0x0000028f); 326} 327 328static void 329clear_state_restore(struct fd_context *ctx, struct fd_ringbuffer *ring) 330{ 331 if (is_a20x(ctx->screen)) 332 return; 333 334 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 335 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); 336 OUT_RING(ring, 0x00000000); 337 338 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 339 OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL)); 340 OUT_RING(ring, 0x00000000); 341 342 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 343 OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL)); 344 OUT_RING(ring, 0x0000003b); 345} 346 347static void 348clear_fast(struct fd_batch *batch, struct fd_ringbuffer *ring, 349 uint32_t color_clear, uint32_t depth_clear, unsigned patch_type) 350{ 351 BEGIN_RING(ring, 8); /* preallocate next 2 packets (for patching) */ 352 353 /* zero values are patched in */ 354 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 355 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR)); 356 OUT_RINGP(ring, patch_type, &batch->gmem_patches); 357 358 OUT_PKT3(ring, CP_SET_CONSTANT, 4); 359 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO)); 360 OUT_RING(ring, 0x8000 | 32); 361 OUT_RING(ring, 0); 362 OUT_RING(ring, 0); 363 364 /* set fill values */ 365 if (!is_a20x(batch->ctx->screen)) { 366 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 367 OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR)); 368 OUT_RING(ring, color_clear); 369 370 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 371 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); 372 OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE | 373 A2XX_RB_COPY_CONTROL_CLEAR_MASK(0xf)); 374 375 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 376 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR)); 377 OUT_RING(ring, depth_clear); 378 } else { 379 const float sc = 1.0f / 255.0f; 380 381 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 382 OUT_RING(ring, 0x00000480); 383 OUT_RING(ring, fui((float)(color_clear >> 0 & 0xff) * sc)); 384 OUT_RING(ring, fui((float)(color_clear >> 8 & 0xff) * sc)); 385 OUT_RING(ring, fui((float)(color_clear >> 16 & 0xff) * sc)); 386 OUT_RING(ring, fui((float)(color_clear >> 24 & 0xff) * sc)); 387 388 // XXX if using float the rounding error breaks it.. 389 float depth = ((double)(depth_clear >> 8)) * (1.0 / (double)0xffffff); 390 assert((unsigned)(((double)depth * (double)0xffffff)) == 391 (depth_clear >> 8)); 392 393 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 394 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE)); 395 OUT_RING(ring, fui(0.0f)); 396 OUT_RING(ring, fui(depth)); 397 398 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 399 OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF)); 400 OUT_RING(ring, 401 0xff000000 | 402 A2XX_RB_STENCILREFMASK_BF_STENCILREF(depth_clear & 0xff) | 403 A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff)); 404 OUT_RING(ring, 0xff000000 | 405 A2XX_RB_STENCILREFMASK_STENCILREF(depth_clear & 0xff) | 406 A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); 407 } 408 409 fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, 410 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); 411} 412 413static bool 414fd2_clear_fast(struct fd_context *ctx, unsigned buffers, 415 const union pipe_color_union *color, double depth, 416 unsigned stencil) assert_dt 417{ 418 /* using 4x MSAA allows clearing ~2x faster 419 * then we can use higher bpp clearing to clear lower bpp 420 * 1 "pixel" can clear 64 bits (rgba8+depth24+stencil8) 421 * note: its possible to clear with 32_32_32_32 format but its not faster 422 * note: fast clear doesn't work with sysmem rendering 423 * (sysmem rendering is disabled when clear is used) 424 * 425 * we only have 16-bit / 32-bit color formats 426 * and 16-bit / 32-bit depth formats 427 * so there are only a few possible combinations 428 * 429 * if the bpp of the color/depth doesn't match 430 * we clear with depth/color individually 431 */ 432 struct fd2_context *fd2_ctx = fd2_context(ctx); 433 struct fd_batch *batch = ctx->batch; 434 struct fd_ringbuffer *ring = batch->draw; 435 struct pipe_framebuffer_state *pfb = &batch->framebuffer; 436 uint32_t color_clear = 0, depth_clear = 0; 437 enum pipe_format format = pipe_surface_format(pfb->cbufs[0]); 438 int depth_size = -1; /* -1: no clear, 0: clear 16-bit, 1: clear 32-bit */ 439 int color_size = -1; 440 441 /* TODO: need to test performance on a22x */ 442 if (!is_a20x(ctx->screen)) 443 return false; 444 445 if (buffers & PIPE_CLEAR_COLOR) 446 color_size = util_format_get_blocksizebits(format) == 32; 447 448 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { 449 /* no fast clear when clearing only one component of depth+stencil buffer */ 450 if (!(buffers & PIPE_CLEAR_DEPTH)) 451 return false; 452 453 if ((pfb->zsbuf->format == PIPE_FORMAT_Z24_UNORM_S8_UINT || 454 pfb->zsbuf->format == PIPE_FORMAT_S8_UINT_Z24_UNORM) && 455 !(buffers & PIPE_CLEAR_STENCIL)) 456 return false; 457 458 depth_size = fd_pipe2depth(pfb->zsbuf->format) == DEPTHX_24_8; 459 } 460 461 assert(color_size >= 0 || depth_size >= 0); 462 463 if (color_size == 0) { 464 color_clear = pack_rgba(format, color->f); 465 color_clear = (color_clear << 16) | (color_clear & 0xffff); 466 } else if (color_size == 1) { 467 color_clear = pack_rgba(format, color->f); 468 } 469 470 if (depth_size == 0) { 471 depth_clear = (uint32_t)(0xffff * depth); 472 depth_clear |= depth_clear << 16; 473 } else if (depth_size == 1) { 474 depth_clear = (((uint32_t)(0xffffff * depth)) << 8); 475 depth_clear |= (stencil & 0xff); 476 } 477 478 /* disable "window" scissor.. */ 479 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 480 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); 481 OUT_RING(ring, xy2d(0, 0)); 482 OUT_RING(ring, xy2d(0x7fff, 0x7fff)); 483 484 /* make sure we fill all "pixels" (in SCREEN_SCISSOR) */ 485 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 486 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); 487 OUT_RING(ring, fui(4096.0f)); 488 OUT_RING(ring, fui(4096.0f)); 489 OUT_RING(ring, fui(4096.0f)); 490 OUT_RING(ring, fui(4096.0f)); 491 492 clear_state(batch, ring, ~0u, true); 493 494 if (color_size >= 0 && depth_size != color_size) 495 clear_fast(batch, ring, color_clear, color_clear, 496 GMEM_PATCH_FASTCLEAR_COLOR); 497 498 if (depth_size >= 0 && depth_size != color_size) 499 clear_fast(batch, ring, depth_clear, depth_clear, 500 GMEM_PATCH_FASTCLEAR_DEPTH); 501 502 if (depth_size == color_size) 503 clear_fast(batch, ring, color_clear, depth_clear, 504 GMEM_PATCH_FASTCLEAR_COLOR_DEPTH); 505 506 clear_state_restore(ctx, ring); 507 508 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 509 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_CONFIG)); 510 OUT_RING(ring, 0); 511 512 /* can't patch in SCREEN_SCISSOR_BR as it can be different for each tile. 513 * MEM_WRITE the value in tile_renderprep, and use CP_LOAD_CONSTANT_CONTEXT 514 * the value is read from byte offset 60 in the given bo 515 */ 516 OUT_PKT3(ring, CP_LOAD_CONSTANT_CONTEXT, 3); 517 OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 0, 0, 0); 518 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_BR)); 519 OUT_RING(ring, 1); 520 521 OUT_PKT3(ring, CP_SET_CONSTANT, 4); 522 OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO)); 523 OUT_RINGP(ring, GMEM_PATCH_RESTORE_INFO, &batch->gmem_patches); 524 OUT_RING(ring, 0); 525 OUT_RING(ring, 0); 526 return true; 527} 528 529static bool 530fd2_clear(struct fd_context *ctx, unsigned buffers, 531 const union pipe_color_union *color, double depth, 532 unsigned stencil) assert_dt 533{ 534 struct fd_ringbuffer *ring = ctx->batch->draw; 535 struct pipe_framebuffer_state *fb = &ctx->batch->framebuffer; 536 537 if (fd2_clear_fast(ctx, buffers, color, depth, stencil)) 538 goto dirty; 539 540 /* set clear value */ 541 if (is_a20x(ctx->screen)) { 542 if (buffers & PIPE_CLEAR_COLOR) { 543 /* C0 used by fragment shader */ 544 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 545 OUT_RING(ring, 0x00000480); 546 OUT_RING(ring, color->ui[0]); 547 OUT_RING(ring, color->ui[1]); 548 OUT_RING(ring, color->ui[2]); 549 OUT_RING(ring, color->ui[3]); 550 } 551 552 if (buffers & PIPE_CLEAR_DEPTH) { 553 /* use viewport to set depth value */ 554 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 555 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_ZSCALE)); 556 OUT_RING(ring, fui(0.0f)); 557 OUT_RING(ring, fui(depth)); 558 } 559 560 if (buffers & PIPE_CLEAR_STENCIL) { 561 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 562 OUT_RING(ring, CP_REG(REG_A2XX_RB_STENCILREFMASK_BF)); 563 OUT_RING(ring, 0xff000000 | 564 A2XX_RB_STENCILREFMASK_BF_STENCILREF(stencil) | 565 A2XX_RB_STENCILREFMASK_BF_STENCILWRITEMASK(0xff)); 566 OUT_RING(ring, 0xff000000 | 567 A2XX_RB_STENCILREFMASK_STENCILREF(stencil) | 568 A2XX_RB_STENCILREFMASK_STENCILWRITEMASK(0xff)); 569 } 570 } else { 571 if (buffers & PIPE_CLEAR_COLOR) { 572 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 573 OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR)); 574 OUT_RING(ring, pack_rgba(PIPE_FORMAT_R8G8B8A8_UNORM, color->f)); 575 } 576 577 if (buffers & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) { 578 uint32_t clear_mask, depth_clear; 579 switch (fd_pipe2depth(fb->zsbuf->format)) { 580 case DEPTHX_24_8: 581 clear_mask = ((buffers & PIPE_CLEAR_DEPTH) ? 0xe : 0) | 582 ((buffers & PIPE_CLEAR_STENCIL) ? 0x1 : 0); 583 depth_clear = 584 (((uint32_t)(0xffffff * depth)) << 8) | (stencil & 0xff); 585 break; 586 case DEPTHX_16: 587 clear_mask = 0xf; 588 depth_clear = (uint32_t)(0xffffffff * depth); 589 break; 590 default: 591 unreachable("invalid depth"); 592 break; 593 } 594 595 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 596 OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL)); 597 OUT_RING(ring, A2XX_RB_COPY_CONTROL_DEPTH_CLEAR_ENABLE | 598 A2XX_RB_COPY_CONTROL_CLEAR_MASK(clear_mask)); 599 600 OUT_PKT3(ring, CP_SET_CONSTANT, 2); 601 OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTH_CLEAR)); 602 OUT_RING(ring, depth_clear); 603 } 604 } 605 606 /* scissor state */ 607 OUT_PKT3(ring, CP_SET_CONSTANT, 3); 608 OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL)); 609 OUT_RING(ring, xy2d(0, 0)); 610 OUT_RING(ring, xy2d(fb->width, fb->height)); 611 612 /* viewport state */ 613 OUT_PKT3(ring, CP_SET_CONSTANT, 5); 614 OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE)); 615 OUT_RING(ring, fui((float)fb->width / 2.0f)); 616 OUT_RING(ring, fui((float)fb->width / 2.0f)); 617 OUT_RING(ring, fui((float)fb->height / 2.0f)); 618 OUT_RING(ring, fui((float)fb->height / 2.0f)); 619 620 /* common state */ 621 clear_state(ctx->batch, ring, buffers, false); 622 623 fd_draw(ctx->batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY, 624 DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL); 625 626 clear_state_restore(ctx, ring); 627 628dirty: 629 ctx->dirty |= FD_DIRTY_ZSA | FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | 630 FD_DIRTY_SAMPLE_MASK | FD_DIRTY_PROG | FD_DIRTY_CONST | 631 FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_SCISSOR; 632 633 ctx->dirty_shader[PIPE_SHADER_VERTEX] |= FD_DIRTY_SHADER_PROG; 634 ctx->dirty_shader[PIPE_SHADER_FRAGMENT] |= 635 FD_DIRTY_SHADER_PROG | FD_DIRTY_SHADER_CONST; 636 637 return true; 638} 639 640void 641fd2_draw_init(struct pipe_context *pctx) disable_thread_safety_analysis 642{ 643 struct fd_context *ctx = fd_context(pctx); 644 ctx->draw_vbo = fd2_draw_vbo; 645 ctx->clear = fd2_clear; 646} 647