1/* 2 * Copyright (C) 2013 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#include "pipe/p_state.h" 28#include "util/format/u_format.h" 29#include "util/u_helpers.h" 30#include "util/u_memory.h" 31#include "util/u_string.h" 32#include "util/u_viewport.h" 33 34#include "freedreno_query_hw.h" 35#include "freedreno_resource.h" 36 37#include "fd3_blend.h" 38#include "fd3_context.h" 39#include "fd3_emit.h" 40#include "fd3_format.h" 41#include "fd3_program.h" 42#include "fd3_rasterizer.h" 43#include "fd3_texture.h" 44#include "fd3_zsa.h" 45 46#define emit_const_user fd3_emit_const_user 47#define emit_const_bo fd3_emit_const_bo 48#include "ir3_const.h" 49 50static const enum adreno_state_block sb[] = { 51 [MESA_SHADER_VERTEX] = SB_VERT_SHADER, 52 [MESA_SHADER_FRAGMENT] = SB_FRAG_SHADER, 53}; 54 55/* regid: base const register 56 * prsc or dwords: buffer containing constant values 57 * sizedwords: size of const value buffer 58 */ 59static void 60fd3_emit_const_user(struct fd_ringbuffer *ring, 61 const struct ir3_shader_variant *v, uint32_t regid, 62 uint32_t sizedwords, const uint32_t *dwords) 63{ 64 emit_const_asserts(ring, v, regid, sizedwords); 65 66 OUT_PKT3(ring, CP_LOAD_STATE, 2 + sizedwords); 67 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid / 2) | 68 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 69 CP_LOAD_STATE_0_STATE_BLOCK(sb[v->type]) | 70 CP_LOAD_STATE_0_NUM_UNIT(sizedwords / 2)); 71 OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | 72 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); 73 for (int i = 0; i < sizedwords; i++) 74 OUT_RING(ring, dwords[i]); 75} 76 77static void 78fd3_emit_const_bo(struct fd_ringbuffer *ring, 79 const struct ir3_shader_variant *v, uint32_t regid, 80 uint32_t offset, uint32_t sizedwords, struct fd_bo *bo) 81{ 82 uint32_t dst_off = regid / 2; 83 /* The blob driver aligns all const uploads dst_off to 64. We've been 84 * successfully aligning to 8 vec4s as const_upload_unit so far with no 85 * ill effects. 86 */ 87 assert(dst_off % 16 == 0); 88 uint32_t num_unit = sizedwords / 2; 89 assert(num_unit % 2 == 0); 90 91 emit_const_asserts(ring, v, regid, sizedwords); 92 93 OUT_PKT3(ring, CP_LOAD_STATE, 2); 94 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(dst_off) | 95 CP_LOAD_STATE_0_STATE_SRC(SS_INDIRECT) | 96 CP_LOAD_STATE_0_STATE_BLOCK(sb[v->type]) | 97 CP_LOAD_STATE_0_NUM_UNIT(num_unit)); 98 OUT_RELOC(ring, bo, offset, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS), 0); 99} 100 101static void 102fd3_emit_const_ptrs(struct fd_ringbuffer *ring, gl_shader_stage type, 103 uint32_t regid, uint32_t num, struct fd_bo **bos, 104 uint32_t *offsets) 105{ 106 uint32_t anum = align(num, 4); 107 uint32_t i; 108 109 assert((regid % 4) == 0); 110 111 OUT_PKT3(ring, CP_LOAD_STATE, 2 + anum); 112 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(regid / 2) | 113 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 114 CP_LOAD_STATE_0_STATE_BLOCK(sb[type]) | 115 CP_LOAD_STATE_0_NUM_UNIT(anum / 2)); 116 OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | 117 CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS)); 118 119 for (i = 0; i < num; i++) { 120 if (bos[i]) { 121 OUT_RELOC(ring, bos[i], offsets[i], 0, 0); 122 } else { 123 OUT_RING(ring, 0xbad00000 | (i << 16)); 124 } 125 } 126 127 for (; i < anum; i++) 128 OUT_RING(ring, 0xffffffff); 129} 130 131static bool 132is_stateobj(struct fd_ringbuffer *ring) 133{ 134 return false; 135} 136 137static void 138emit_const_ptrs(struct fd_ringbuffer *ring, const struct ir3_shader_variant *v, 139 uint32_t dst_offset, uint32_t num, struct fd_bo **bos, 140 uint32_t *offsets) 141{ 142 /* TODO inline this */ 143 assert(dst_offset + num <= v->constlen * 4); 144 fd3_emit_const_ptrs(ring, v->type, dst_offset, num, bos, offsets); 145} 146 147#define VERT_TEX_OFF 0 148#define FRAG_TEX_OFF 16 149#define BASETABLE_SZ A3XX_MAX_MIP_LEVELS 150 151static void 152emit_textures(struct fd_context *ctx, struct fd_ringbuffer *ring, 153 enum adreno_state_block sb, struct fd_texture_stateobj *tex) 154{ 155 static const unsigned tex_off[] = { 156 [SB_VERT_TEX] = VERT_TEX_OFF, 157 [SB_FRAG_TEX] = FRAG_TEX_OFF, 158 }; 159 static const enum adreno_state_block mipaddr[] = { 160 [SB_VERT_TEX] = SB_VERT_MIPADDR, 161 [SB_FRAG_TEX] = SB_FRAG_MIPADDR, 162 }; 163 static const uint32_t bcolor_reg[] = { 164 [SB_VERT_TEX] = REG_A3XX_TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, 165 [SB_FRAG_TEX] = REG_A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, 166 }; 167 struct fd3_context *fd3_ctx = fd3_context(ctx); 168 bool needs_border = false; 169 unsigned i, j; 170 171 if (tex->num_samplers > 0) { 172 /* output sampler state: */ 173 OUT_PKT3(ring, CP_LOAD_STATE, 2 + (2 * tex->num_samplers)); 174 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) | 175 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 176 CP_LOAD_STATE_0_STATE_BLOCK(sb) | 177 CP_LOAD_STATE_0_NUM_UNIT(tex->num_samplers)); 178 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | 179 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 180 for (i = 0; i < tex->num_samplers; i++) { 181 static const struct fd3_sampler_stateobj dummy_sampler = {}; 182 const struct fd3_sampler_stateobj *sampler = 183 tex->samplers[i] ? fd3_sampler_stateobj(tex->samplers[i]) 184 : &dummy_sampler; 185 186 OUT_RING(ring, sampler->texsamp0); 187 OUT_RING(ring, sampler->texsamp1); 188 189 needs_border |= sampler->needs_border; 190 } 191 } 192 193 if (tex->num_textures > 0) { 194 /* emit texture state: */ 195 OUT_PKT3(ring, CP_LOAD_STATE, 2 + (4 * tex->num_textures)); 196 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(tex_off[sb]) | 197 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 198 CP_LOAD_STATE_0_STATE_BLOCK(sb) | 199 CP_LOAD_STATE_0_NUM_UNIT(tex->num_textures)); 200 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | 201 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 202 for (i = 0; i < tex->num_textures; i++) { 203 static const struct fd3_pipe_sampler_view dummy_view = {}; 204 const struct fd3_pipe_sampler_view *view = 205 tex->textures[i] ? fd3_pipe_sampler_view(tex->textures[i]) 206 : &dummy_view; 207 OUT_RING(ring, view->texconst0); 208 OUT_RING(ring, view->texconst1); 209 OUT_RING(ring, 210 view->texconst2 | A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); 211 OUT_RING(ring, view->texconst3); 212 } 213 214 /* emit mipaddrs: */ 215 OUT_PKT3(ring, CP_LOAD_STATE, 2 + (BASETABLE_SZ * tex->num_textures)); 216 OUT_RING(ring, 217 CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * tex_off[sb]) | 218 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 219 CP_LOAD_STATE_0_STATE_BLOCK(mipaddr[sb]) | 220 CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * tex->num_textures)); 221 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | 222 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 223 for (i = 0; i < tex->num_textures; i++) { 224 static const struct fd3_pipe_sampler_view dummy_view = { 225 .base.target = PIPE_TEXTURE_1D, /* anything !PIPE_BUFFER */ 226 .base.u.tex.first_level = 1, 227 }; 228 const struct fd3_pipe_sampler_view *view = 229 tex->textures[i] ? fd3_pipe_sampler_view(tex->textures[i]) 230 : &dummy_view; 231 struct fd_resource *rsc = fd_resource(view->base.texture); 232 if (rsc && rsc->b.b.target == PIPE_BUFFER) { 233 OUT_RELOC(ring, rsc->bo, view->base.u.buf.offset, 0, 0); 234 j = 1; 235 } else { 236 unsigned start = fd_sampler_first_level(&view->base); 237 unsigned end = fd_sampler_last_level(&view->base); 238 239 for (j = 0; j < (end - start + 1); j++) { 240 struct fdl_slice *slice = fd_resource_slice(rsc, j + start); 241 OUT_RELOC(ring, rsc->bo, slice->offset, 0, 0); 242 } 243 } 244 245 /* pad the remaining entries w/ null: */ 246 for (; j < BASETABLE_SZ; j++) { 247 OUT_RING(ring, 0x00000000); 248 } 249 } 250 } 251 252 if (needs_border) { 253 unsigned off; 254 void *ptr; 255 256 u_upload_alloc(fd3_ctx->border_color_uploader, 0, 257 BORDER_COLOR_UPLOAD_SIZE, BORDER_COLOR_UPLOAD_SIZE, &off, 258 &fd3_ctx->border_color_buf, &ptr); 259 260 fd_setup_border_colors(tex, ptr, tex_off[sb]); 261 262 OUT_PKT0(ring, bcolor_reg[sb], 1); 263 OUT_RELOC(ring, fd_resource(fd3_ctx->border_color_buf)->bo, off, 0, 0); 264 265 u_upload_unmap(fd3_ctx->border_color_uploader); 266 } 267} 268 269/* emit texture state for mem->gmem restore operation.. eventually it would 270 * be good to get rid of this and use normal CSO/etc state for more of these 271 * special cases, but for now the compiler is not sufficient.. 272 * 273 * Also, for using normal state, not quite sure how to handle the special 274 * case format (fd3_gmem_restore_format()) stuff for restoring depth/stencil. 275 */ 276void 277fd3_emit_gmem_restore_tex(struct fd_ringbuffer *ring, 278 struct pipe_surface **psurf, int bufs) 279{ 280 int i, j; 281 282 /* output sampler state: */ 283 OUT_PKT3(ring, CP_LOAD_STATE, 2 + 2 * bufs); 284 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) | 285 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 286 CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | 287 CP_LOAD_STATE_0_NUM_UNIT(bufs)); 288 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER) | 289 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 290 for (i = 0; i < bufs; i++) { 291 OUT_RING(ring, A3XX_TEX_SAMP_0_XY_MAG(A3XX_TEX_NEAREST) | 292 A3XX_TEX_SAMP_0_XY_MIN(A3XX_TEX_NEAREST) | 293 A3XX_TEX_SAMP_0_WRAP_S(A3XX_TEX_CLAMP_TO_EDGE) | 294 A3XX_TEX_SAMP_0_WRAP_T(A3XX_TEX_CLAMP_TO_EDGE) | 295 A3XX_TEX_SAMP_0_WRAP_R(A3XX_TEX_REPEAT)); 296 OUT_RING(ring, 0x00000000); 297 } 298 299 /* emit texture state: */ 300 OUT_PKT3(ring, CP_LOAD_STATE, 2 + 4 * bufs); 301 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(FRAG_TEX_OFF) | 302 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 303 CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_TEX) | 304 CP_LOAD_STATE_0_NUM_UNIT(bufs)); 305 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | 306 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 307 for (i = 0; i < bufs; i++) { 308 if (!psurf[i]) { 309 OUT_RING(ring, A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) | 310 A3XX_TEX_CONST_0_SWIZ_X(A3XX_TEX_ONE) | 311 A3XX_TEX_CONST_0_SWIZ_Y(A3XX_TEX_ONE) | 312 A3XX_TEX_CONST_0_SWIZ_Z(A3XX_TEX_ONE) | 313 A3XX_TEX_CONST_0_SWIZ_W(A3XX_TEX_ONE)); 314 OUT_RING(ring, 0x00000000); 315 OUT_RING(ring, A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); 316 OUT_RING(ring, 0x00000000); 317 continue; 318 } 319 320 struct fd_resource *rsc = fd_resource(psurf[i]->texture); 321 enum pipe_format format = fd_gmem_restore_format(psurf[i]->format); 322 /* The restore blit_zs shader expects stencil in sampler 0, and depth 323 * in sampler 1 324 */ 325 if (rsc->stencil && i == 0) { 326 rsc = rsc->stencil; 327 format = fd_gmem_restore_format(rsc->b.b.format); 328 } 329 330 /* note: PIPE_BUFFER disallowed for surfaces */ 331 unsigned lvl = psurf[i]->u.tex.level; 332 333 assert(psurf[i]->u.tex.first_layer == psurf[i]->u.tex.last_layer); 334 335 OUT_RING(ring, A3XX_TEX_CONST_0_TILE_MODE(rsc->layout.tile_mode) | 336 A3XX_TEX_CONST_0_FMT(fd3_pipe2tex(format)) | 337 A3XX_TEX_CONST_0_TYPE(A3XX_TEX_2D) | 338 fd3_tex_swiz(format, PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, 339 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W)); 340 OUT_RING(ring, A3XX_TEX_CONST_1_WIDTH(psurf[i]->width) | 341 A3XX_TEX_CONST_1_HEIGHT(psurf[i]->height)); 342 OUT_RING(ring, A3XX_TEX_CONST_2_PITCH(fd_resource_pitch(rsc, lvl)) | 343 A3XX_TEX_CONST_2_INDX(BASETABLE_SZ * i)); 344 OUT_RING(ring, 0x00000000); 345 } 346 347 /* emit mipaddrs: */ 348 OUT_PKT3(ring, CP_LOAD_STATE, 2 + BASETABLE_SZ * bufs); 349 OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(BASETABLE_SZ * FRAG_TEX_OFF) | 350 CP_LOAD_STATE_0_STATE_SRC(SS_DIRECT) | 351 CP_LOAD_STATE_0_STATE_BLOCK(SB_FRAG_MIPADDR) | 352 CP_LOAD_STATE_0_NUM_UNIT(BASETABLE_SZ * bufs)); 353 OUT_RING(ring, CP_LOAD_STATE_1_STATE_TYPE(ST_CONSTANTS) | 354 CP_LOAD_STATE_1_EXT_SRC_ADDR(0)); 355 for (i = 0; i < bufs; i++) { 356 if (psurf[i]) { 357 struct fd_resource *rsc = fd_resource(psurf[i]->texture); 358 /* Matches above logic for blit_zs shader */ 359 if (rsc->stencil && i == 0) 360 rsc = rsc->stencil; 361 unsigned lvl = psurf[i]->u.tex.level; 362 uint32_t offset = 363 fd_resource_offset(rsc, lvl, psurf[i]->u.tex.first_layer); 364 OUT_RELOC(ring, rsc->bo, offset, 0, 0); 365 } else { 366 OUT_RING(ring, 0x00000000); 367 } 368 369 /* pad the remaining entries w/ null: */ 370 for (j = 1; j < BASETABLE_SZ; j++) { 371 OUT_RING(ring, 0x00000000); 372 } 373 } 374} 375 376void 377fd3_emit_vertex_bufs(struct fd_ringbuffer *ring, struct fd3_emit *emit) 378{ 379 int32_t i, j, last = -1; 380 uint32_t total_in = 0; 381 const struct fd_vertex_state *vtx = emit->vtx; 382 const struct ir3_shader_variant *vp = fd3_emit_get_vp(emit); 383 unsigned vertex_regid = regid(63, 0); 384 unsigned instance_regid = regid(63, 0); 385 unsigned vtxcnt_regid = regid(63, 0); 386 387 /* Note that sysvals come *after* normal inputs: */ 388 for (i = 0; i < vp->inputs_count; i++) { 389 if (!vp->inputs[i].compmask) 390 continue; 391 if (vp->inputs[i].sysval) { 392 switch (vp->inputs[i].slot) { 393 case SYSTEM_VALUE_VERTEX_ID_ZERO_BASE: 394 vertex_regid = vp->inputs[i].regid; 395 break; 396 case SYSTEM_VALUE_INSTANCE_ID: 397 instance_regid = vp->inputs[i].regid; 398 break; 399 case SYSTEM_VALUE_VERTEX_CNT: 400 vtxcnt_regid = vp->inputs[i].regid; 401 break; 402 default: 403 unreachable("invalid system value"); 404 break; 405 } 406 } else if (i < vtx->vtx->num_elements) { 407 last = i; 408 } 409 } 410 411 for (i = 0, j = 0; i <= last; i++) { 412 assert(!vp->inputs[i].sysval); 413 if (vp->inputs[i].compmask) { 414 struct pipe_vertex_element *elem = &vtx->vtx->pipe[i]; 415 const struct pipe_vertex_buffer *vb = 416 &vtx->vertexbuf.vb[elem->vertex_buffer_index]; 417 struct fd_resource *rsc = fd_resource(vb->buffer.resource); 418 enum pipe_format pfmt = elem->src_format; 419 enum a3xx_vtx_fmt fmt = fd3_pipe2vtx(pfmt); 420 bool switchnext = (i != last) || (vertex_regid != regid(63, 0)) || 421 (instance_regid != regid(63, 0)) || 422 (vtxcnt_regid != regid(63, 0)); 423 bool isint = util_format_is_pure_integer(pfmt); 424 uint32_t off = vb->buffer_offset + elem->src_offset; 425 uint32_t fs = util_format_get_blocksize(pfmt); 426 427 assert(fmt != VFMT_NONE); 428 429 OUT_PKT0(ring, REG_A3XX_VFD_FETCH(j), 2); 430 OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(fs - 1) | 431 A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(vb->stride) | 432 COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | 433 A3XX_VFD_FETCH_INSTR_0_INDEXCODE(j) | 434 COND(elem->instance_divisor, 435 A3XX_VFD_FETCH_INSTR_0_INSTANCED) | 436 A3XX_VFD_FETCH_INSTR_0_STEPRATE( 437 MAX2(1, elem->instance_divisor))); 438 OUT_RELOC(ring, rsc->bo, off, 0, 0); 439 440 OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(j), 1); 441 OUT_RING(ring, 442 A3XX_VFD_DECODE_INSTR_CONSTFILL | 443 A3XX_VFD_DECODE_INSTR_WRITEMASK(vp->inputs[i].compmask) | 444 A3XX_VFD_DECODE_INSTR_FORMAT(fmt) | 445 A3XX_VFD_DECODE_INSTR_SWAP(fd3_pipe2swap(pfmt)) | 446 A3XX_VFD_DECODE_INSTR_REGID(vp->inputs[i].regid) | 447 A3XX_VFD_DECODE_INSTR_SHIFTCNT(fs) | 448 A3XX_VFD_DECODE_INSTR_LASTCOMPVALID | 449 COND(isint, A3XX_VFD_DECODE_INSTR_INT) | 450 COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT)); 451 452 total_in += util_bitcount(vp->inputs[i].compmask); 453 j++; 454 } 455 } 456 457 /* hw doesn't like to be configured for zero vbo's, it seems: */ 458 if (last < 0) { 459 /* just recycle the shader bo, we just need to point to *something* 460 * valid: 461 */ 462 struct fd_bo *dummy_vbo = vp->bo; 463 bool switchnext = (vertex_regid != regid(63, 0)) || 464 (instance_regid != regid(63, 0)) || 465 (vtxcnt_regid != regid(63, 0)); 466 467 OUT_PKT0(ring, REG_A3XX_VFD_FETCH(0), 2); 468 OUT_RING(ring, A3XX_VFD_FETCH_INSTR_0_FETCHSIZE(0) | 469 A3XX_VFD_FETCH_INSTR_0_BUFSTRIDE(0) | 470 COND(switchnext, A3XX_VFD_FETCH_INSTR_0_SWITCHNEXT) | 471 A3XX_VFD_FETCH_INSTR_0_INDEXCODE(0) | 472 A3XX_VFD_FETCH_INSTR_0_STEPRATE(1)); 473 OUT_RELOC(ring, dummy_vbo, 0, 0, 0); 474 475 OUT_PKT0(ring, REG_A3XX_VFD_DECODE_INSTR(0), 1); 476 OUT_RING(ring, A3XX_VFD_DECODE_INSTR_CONSTFILL | 477 A3XX_VFD_DECODE_INSTR_WRITEMASK(0x1) | 478 A3XX_VFD_DECODE_INSTR_FORMAT(VFMT_8_UNORM) | 479 A3XX_VFD_DECODE_INSTR_SWAP(XYZW) | 480 A3XX_VFD_DECODE_INSTR_REGID(regid(0, 0)) | 481 A3XX_VFD_DECODE_INSTR_SHIFTCNT(1) | 482 A3XX_VFD_DECODE_INSTR_LASTCOMPVALID | 483 COND(switchnext, A3XX_VFD_DECODE_INSTR_SWITCHNEXT)); 484 485 total_in = 1; 486 j = 1; 487 } 488 489 OUT_PKT0(ring, REG_A3XX_VFD_CONTROL_0, 2); 490 OUT_RING(ring, A3XX_VFD_CONTROL_0_TOTALATTRTOVS(total_in) | 491 A3XX_VFD_CONTROL_0_PACKETSIZE(2) | 492 A3XX_VFD_CONTROL_0_STRMDECINSTRCNT(j) | 493 A3XX_VFD_CONTROL_0_STRMFETCHINSTRCNT(j)); 494 OUT_RING(ring, A3XX_VFD_CONTROL_1_MAXSTORAGE(1) | // XXX 495 A3XX_VFD_CONTROL_1_REGID4VTX(vertex_regid) | 496 A3XX_VFD_CONTROL_1_REGID4INST(instance_regid)); 497 498 OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); 499 OUT_RING(ring, 500 A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) | 501 A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(vtxcnt_regid)); 502} 503 504void 505fd3_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring, 506 struct fd3_emit *emit) 507{ 508 const struct ir3_shader_variant *vp = fd3_emit_get_vp(emit); 509 const struct ir3_shader_variant *fp = fd3_emit_get_fp(emit); 510 const enum fd_dirty_3d_state dirty = emit->dirty; 511 512 emit_marker(ring, 5); 513 514 if (dirty & FD_DIRTY_SAMPLE_MASK) { 515 OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 1); 516 OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE | 517 A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) | 518 A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(ctx->sample_mask)); 519 } 520 521 if ((dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | 522 FD_DIRTY_BLEND_DUAL)) && 523 !emit->binning_pass) { 524 uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_render_control | 525 fd3_blend_stateobj(ctx->blend)->rb_render_control; 526 527 val |= COND(fp->frag_face, A3XX_RB_RENDER_CONTROL_FACENESS); 528 val |= COND(fp->fragcoord_compmask != 0, 529 A3XX_RB_RENDER_CONTROL_COORD_MASK(fp->fragcoord_compmask)); 530 val |= COND(ctx->rasterizer->rasterizer_discard, 531 A3XX_RB_RENDER_CONTROL_DISABLE_COLOR_PIPE); 532 533 /* I suppose if we needed to (which I don't *think* we need 534 * to), we could emit this for binning pass too. But we 535 * would need to keep a different patch-list for binning 536 * vs render pass. 537 */ 538 539 OUT_PKT0(ring, REG_A3XX_RB_RENDER_CONTROL, 1); 540 OUT_RINGP(ring, val, &ctx->batch->rbrc_patches); 541 } 542 543 if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_STENCIL_REF)) { 544 struct fd3_zsa_stateobj *zsa = fd3_zsa_stateobj(ctx->zsa); 545 struct pipe_stencil_ref *sr = &ctx->stencil_ref; 546 547 OUT_PKT0(ring, REG_A3XX_RB_ALPHA_REF, 1); 548 OUT_RING(ring, zsa->rb_alpha_ref); 549 550 OUT_PKT0(ring, REG_A3XX_RB_STENCIL_CONTROL, 1); 551 OUT_RING(ring, zsa->rb_stencil_control); 552 553 OUT_PKT0(ring, REG_A3XX_RB_STENCILREFMASK, 2); 554 OUT_RING(ring, zsa->rb_stencilrefmask | 555 A3XX_RB_STENCILREFMASK_STENCILREF(sr->ref_value[0])); 556 OUT_RING(ring, zsa->rb_stencilrefmask_bf | 557 A3XX_RB_STENCILREFMASK_BF_STENCILREF(sr->ref_value[1])); 558 } 559 560 if (dirty & (FD_DIRTY_ZSA | FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { 561 uint32_t val = fd3_zsa_stateobj(ctx->zsa)->rb_depth_control; 562 if (fp->writes_pos) { 563 val |= A3XX_RB_DEPTH_CONTROL_FRAG_WRITES_Z; 564 val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; 565 } 566 if (fp->no_earlyz || fp->has_kill) { 567 val |= A3XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE; 568 } 569 if (!ctx->rasterizer->depth_clip_near) { 570 val |= A3XX_RB_DEPTH_CONTROL_Z_CLAMP_ENABLE; 571 } 572 OUT_PKT0(ring, REG_A3XX_RB_DEPTH_CONTROL, 1); 573 OUT_RING(ring, val); 574 } 575 576 if (dirty & FD_DIRTY_RASTERIZER) { 577 struct fd3_rasterizer_stateobj *rasterizer = 578 fd3_rasterizer_stateobj(ctx->rasterizer); 579 580 OUT_PKT0(ring, REG_A3XX_GRAS_SU_MODE_CONTROL, 1); 581 OUT_RING(ring, rasterizer->gras_su_mode_control); 582 583 OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2); 584 OUT_RING(ring, rasterizer->gras_su_point_minmax); 585 OUT_RING(ring, rasterizer->gras_su_point_size); 586 587 OUT_PKT0(ring, REG_A3XX_GRAS_SU_POLY_OFFSET_SCALE, 2); 588 OUT_RING(ring, rasterizer->gras_su_poly_offset_scale); 589 OUT_RING(ring, rasterizer->gras_su_poly_offset_offset); 590 } 591 592 if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG)) { 593 uint32_t val = 594 fd3_rasterizer_stateobj(ctx->rasterizer)->gras_cl_clip_cntl; 595 uint8_t planes = ctx->rasterizer->clip_plane_enable; 596 val |= CONDREG( 597 ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL), 598 A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); 599 val |= CONDREG( 600 ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL), 601 A3XX_GRAS_CL_CLIP_CNTL_IJ_NON_PERSP_CENTER); 602 val |= CONDREG( 603 ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID), 604 A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTROID); 605 val |= CONDREG( 606 ir3_find_sysval_regid(fp, SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID), 607 A3XX_GRAS_CL_CLIP_CNTL_IJ_NON_PERSP_CENTROID); 608 /* docs say enable at least one of IJ_PERSP_CENTER/CENTROID when fragcoord 609 * is used */ 610 val |= CONDREG(ir3_find_sysval_regid(fp, SYSTEM_VALUE_FRAG_COORD), 611 A3XX_GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER); 612 val |= COND(fp->writes_pos, A3XX_GRAS_CL_CLIP_CNTL_ZCLIP_DISABLE); 613 val |= 614 COND(fp->fragcoord_compmask != 0, 615 A3XX_GRAS_CL_CLIP_CNTL_ZCOORD | A3XX_GRAS_CL_CLIP_CNTL_WCOORD); 616 if (!emit->key.key.ucp_enables) 617 val |= A3XX_GRAS_CL_CLIP_CNTL_NUM_USER_CLIP_PLANES( 618 MIN2(util_bitcount(planes), 6)); 619 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 620 OUT_RING(ring, val); 621 } 622 623 if (dirty & (FD_DIRTY_RASTERIZER | FD_DIRTY_PROG | FD_DIRTY_UCP)) { 624 uint32_t planes = ctx->rasterizer->clip_plane_enable; 625 int count = 0; 626 627 if (emit->key.key.ucp_enables) 628 planes = 0; 629 630 while (planes && count < 6) { 631 int i = ffs(planes) - 1; 632 633 planes &= ~(1U << i); 634 fd_wfi(ctx->batch, ring); 635 OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(count++), 4); 636 OUT_RING(ring, fui(ctx->ucp.ucp[i][0])); 637 OUT_RING(ring, fui(ctx->ucp.ucp[i][1])); 638 OUT_RING(ring, fui(ctx->ucp.ucp[i][2])); 639 OUT_RING(ring, fui(ctx->ucp.ucp[i][3])); 640 } 641 } 642 643 /* NOTE: since primitive_restart is not actually part of any 644 * state object, we need to make sure that we always emit 645 * PRIM_VTX_CNTL.. either that or be more clever and detect 646 * when it changes. 647 */ 648 if (emit->info) { 649 const struct pipe_draw_info *info = emit->info; 650 uint32_t val = fd3_rasterizer_stateobj(ctx->rasterizer)->pc_prim_vtx_cntl; 651 652 if (!emit->binning_pass) { 653 uint32_t stride_in_vpc = align(fp->total_in, 4) / 4; 654 if (stride_in_vpc > 0) 655 stride_in_vpc = MAX2(stride_in_vpc, 2); 656 val |= A3XX_PC_PRIM_VTX_CNTL_STRIDE_IN_VPC(stride_in_vpc); 657 } 658 659 if (info->index_size && info->primitive_restart) { 660 val |= A3XX_PC_PRIM_VTX_CNTL_PRIMITIVE_RESTART; 661 } 662 663 val |= COND(vp->writes_psize, A3XX_PC_PRIM_VTX_CNTL_PSIZE); 664 665 OUT_PKT0(ring, REG_A3XX_PC_PRIM_VTX_CNTL, 1); 666 OUT_RING(ring, val); 667 } 668 669 if (dirty & (FD_DIRTY_SCISSOR | FD_DIRTY_RASTERIZER | FD_DIRTY_VIEWPORT)) { 670 struct pipe_scissor_state *scissor = fd_context_get_scissor(ctx); 671 int minx = scissor->minx; 672 int miny = scissor->miny; 673 int maxx = scissor->maxx; 674 int maxy = scissor->maxy; 675 676 /* Unfortunately there is no separate depth clip disable, only an all 677 * or nothing deal. So when we disable clipping, we must handle the 678 * viewport clip via scissors. 679 */ 680 if (!ctx->rasterizer->depth_clip_near) { 681 struct pipe_viewport_state *vp = &ctx->viewport; 682 minx = MAX2(minx, (int)floorf(vp->translate[0] - fabsf(vp->scale[0]))); 683 miny = MAX2(miny, (int)floorf(vp->translate[1] - fabsf(vp->scale[1]))); 684 maxx = MIN2(maxx, (int)ceilf(vp->translate[0] + fabsf(vp->scale[0]))); 685 maxy = MIN2(maxy, (int)ceilf(vp->translate[1] + fabsf(vp->scale[1]))); 686 } 687 688 OUT_PKT0(ring, REG_A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 2); 689 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_TL_X(minx) | 690 A3XX_GRAS_SC_WINDOW_SCISSOR_TL_Y(miny)); 691 OUT_RING(ring, A3XX_GRAS_SC_WINDOW_SCISSOR_BR_X(maxx - 1) | 692 A3XX_GRAS_SC_WINDOW_SCISSOR_BR_Y(maxy - 1)); 693 694 ctx->batch->max_scissor.minx = MIN2(ctx->batch->max_scissor.minx, minx); 695 ctx->batch->max_scissor.miny = MIN2(ctx->batch->max_scissor.miny, miny); 696 ctx->batch->max_scissor.maxx = MAX2(ctx->batch->max_scissor.maxx, maxx); 697 ctx->batch->max_scissor.maxy = MAX2(ctx->batch->max_scissor.maxy, maxy); 698 } 699 700 if (dirty & FD_DIRTY_VIEWPORT) { 701 fd_wfi(ctx->batch, ring); 702 OUT_PKT0(ring, REG_A3XX_GRAS_CL_VPORT_XOFFSET, 6); 703 OUT_RING(ring, 704 A3XX_GRAS_CL_VPORT_XOFFSET(ctx->viewport.translate[0] - 0.5f)); 705 OUT_RING(ring, A3XX_GRAS_CL_VPORT_XSCALE(ctx->viewport.scale[0])); 706 OUT_RING(ring, 707 A3XX_GRAS_CL_VPORT_YOFFSET(ctx->viewport.translate[1] - 0.5f)); 708 OUT_RING(ring, A3XX_GRAS_CL_VPORT_YSCALE(ctx->viewport.scale[1])); 709 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZOFFSET(ctx->viewport.translate[2])); 710 OUT_RING(ring, A3XX_GRAS_CL_VPORT_ZSCALE(ctx->viewport.scale[2])); 711 } 712 713 if (dirty & 714 (FD_DIRTY_VIEWPORT | FD_DIRTY_RASTERIZER | FD_DIRTY_FRAMEBUFFER)) { 715 float zmin, zmax; 716 int depth = 24; 717 if (ctx->batch->framebuffer.zsbuf) { 718 depth = util_format_get_component_bits( 719 pipe_surface_format(ctx->batch->framebuffer.zsbuf), 720 UTIL_FORMAT_COLORSPACE_ZS, 0); 721 } 722 util_viewport_zmin_zmax(&ctx->viewport, ctx->rasterizer->clip_halfz, 723 &zmin, &zmax); 724 725 OUT_PKT0(ring, REG_A3XX_RB_Z_CLAMP_MIN, 2); 726 if (depth == 32) { 727 OUT_RING(ring, (uint32_t)(zmin * 0xffffffff)); 728 OUT_RING(ring, (uint32_t)(zmax * 0xffffffff)); 729 } else if (depth == 16) { 730 OUT_RING(ring, (uint32_t)(zmin * 0xffff)); 731 OUT_RING(ring, (uint32_t)(zmax * 0xffff)); 732 } else { 733 OUT_RING(ring, (uint32_t)(zmin * 0xffffff)); 734 OUT_RING(ring, (uint32_t)(zmax * 0xffffff)); 735 } 736 } 737 738 if (dirty & (FD_DIRTY_PROG | FD_DIRTY_FRAMEBUFFER | FD_DIRTY_BLEND_DUAL)) { 739 struct pipe_framebuffer_state *pfb = &ctx->batch->framebuffer; 740 int nr_cbufs = pfb->nr_cbufs; 741 if (fd3_blend_stateobj(ctx->blend)->rb_render_control & 742 A3XX_RB_RENDER_CONTROL_DUAL_COLOR_IN_ENABLE) 743 nr_cbufs++; 744 fd3_program_emit(ring, emit, nr_cbufs, pfb->cbufs); 745 } 746 747 /* TODO we should not need this or fd_wfi() before emit_constants(): 748 */ 749 OUT_PKT3(ring, CP_EVENT_WRITE, 1); 750 OUT_RING(ring, HLSQ_FLUSH); 751 752 if (!emit->skip_consts) { 753 ir3_emit_vs_consts(vp, ring, ctx, emit->info, emit->indirect, emit->draw); 754 if (!emit->binning_pass) 755 ir3_emit_fs_consts(fp, ring, ctx); 756 } 757 758 if (dirty & (FD_DIRTY_BLEND | FD_DIRTY_FRAMEBUFFER)) { 759 struct fd3_blend_stateobj *blend = fd3_blend_stateobj(ctx->blend); 760 uint32_t i; 761 762 for (i = 0; i < ARRAY_SIZE(blend->rb_mrt); i++) { 763 enum pipe_format format = 764 pipe_surface_format(ctx->batch->framebuffer.cbufs[i]); 765 const struct util_format_description *desc = 766 util_format_description(format); 767 bool is_float = util_format_is_float(format); 768 bool is_int = util_format_is_pure_integer(format); 769 bool has_alpha = util_format_has_alpha(format); 770 uint32_t control = blend->rb_mrt[i].control; 771 772 if (is_int) { 773 control &= (A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK | 774 A3XX_RB_MRT_CONTROL_DITHER_MODE__MASK); 775 control |= A3XX_RB_MRT_CONTROL_ROP_CODE(ROP_COPY); 776 } 777 778 if (format == PIPE_FORMAT_NONE) 779 control &= ~A3XX_RB_MRT_CONTROL_COMPONENT_ENABLE__MASK; 780 781 if (!has_alpha) { 782 control &= ~A3XX_RB_MRT_CONTROL_BLEND2; 783 } 784 785 if (format && util_format_get_component_bits( 786 format, UTIL_FORMAT_COLORSPACE_RGB, 0) < 8) { 787 const struct pipe_rt_blend_state *rt; 788 if (ctx->blend->independent_blend_enable) 789 rt = &ctx->blend->rt[i]; 790 else 791 rt = &ctx->blend->rt[0]; 792 793 if (!util_format_colormask_full(desc, rt->colormask)) 794 control |= A3XX_RB_MRT_CONTROL_READ_DEST_ENABLE; 795 } 796 797 OUT_PKT0(ring, REG_A3XX_RB_MRT_CONTROL(i), 1); 798 OUT_RING(ring, control); 799 800 OUT_PKT0(ring, REG_A3XX_RB_MRT_BLEND_CONTROL(i), 1); 801 OUT_RING(ring, 802 blend->rb_mrt[i].blend_control | 803 COND(!is_float, A3XX_RB_MRT_BLEND_CONTROL_CLAMP_ENABLE)); 804 } 805 } 806 807 if (dirty & FD_DIRTY_BLEND_COLOR) { 808 struct pipe_blend_color *bcolor = &ctx->blend_color; 809 OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4); 810 OUT_RING(ring, A3XX_RB_BLEND_RED_UINT(CLAMP(bcolor->color[0], 0.f, 1.f) * 0xff) | 811 A3XX_RB_BLEND_RED_FLOAT(bcolor->color[0])); 812 OUT_RING(ring, A3XX_RB_BLEND_GREEN_UINT(CLAMP(bcolor->color[1], 0.f, 1.f) * 0xff) | 813 A3XX_RB_BLEND_GREEN_FLOAT(bcolor->color[1])); 814 OUT_RING(ring, A3XX_RB_BLEND_BLUE_UINT(CLAMP(bcolor->color[2], 0.f, 1.f) * 0xff) | 815 A3XX_RB_BLEND_BLUE_FLOAT(bcolor->color[2])); 816 OUT_RING(ring, A3XX_RB_BLEND_ALPHA_UINT(CLAMP(bcolor->color[3], 0.f, 1.f) * 0xff) | 817 A3XX_RB_BLEND_ALPHA_FLOAT(bcolor->color[3])); 818 } 819 820 if (dirty & FD_DIRTY_TEX) 821 fd_wfi(ctx->batch, ring); 822 823 if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & FD_DIRTY_SHADER_TEX) 824 emit_textures(ctx, ring, SB_VERT_TEX, &ctx->tex[PIPE_SHADER_VERTEX]); 825 826 if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & FD_DIRTY_SHADER_TEX) 827 emit_textures(ctx, ring, SB_FRAG_TEX, &ctx->tex[PIPE_SHADER_FRAGMENT]); 828} 829 830/* emit setup at begin of new cmdstream buffer (don't rely on previous 831 * state, there could have been a context switch between ioctls): 832 */ 833void 834fd3_emit_restore(struct fd_batch *batch, struct fd_ringbuffer *ring) 835{ 836 struct fd_context *ctx = batch->ctx; 837 struct fd3_context *fd3_ctx = fd3_context(ctx); 838 int i; 839 840 if (ctx->screen->gpu_id == 320) { 841 OUT_PKT3(ring, CP_REG_RMW, 3); 842 OUT_RING(ring, REG_A3XX_RBBM_CLOCK_CTL); 843 OUT_RING(ring, 0xfffcffff); 844 OUT_RING(ring, 0x00000000); 845 } 846 847 fd_wfi(batch, ring); 848 OUT_PKT3(ring, CP_INVALIDATE_STATE, 1); 849 OUT_RING(ring, 0x00007fff); 850 851 OUT_PKT0(ring, REG_A3XX_SP_VS_PVT_MEM_PARAM_REG, 3); 852 OUT_RING(ring, 0x08000001); /* SP_VS_PVT_MEM_CTRL_REG */ 853 OUT_RELOC(ring, fd3_ctx->vs_pvt_mem, 0, 0, 0); /* SP_VS_PVT_MEM_ADDR_REG */ 854 OUT_RING(ring, 0x00000000); /* SP_VS_PVT_MEM_SIZE_REG */ 855 856 OUT_PKT0(ring, REG_A3XX_SP_FS_PVT_MEM_PARAM_REG, 3); 857 OUT_RING(ring, 0x08000001); /* SP_FS_PVT_MEM_CTRL_REG */ 858 OUT_RELOC(ring, fd3_ctx->fs_pvt_mem, 0, 0, 0); /* SP_FS_PVT_MEM_ADDR_REG */ 859 OUT_RING(ring, 0x00000000); /* SP_FS_PVT_MEM_SIZE_REG */ 860 861 OUT_PKT0(ring, REG_A3XX_PC_VERTEX_REUSE_BLOCK_CNTL, 1); 862 OUT_RING(ring, 0x0000000b); /* PC_VERTEX_REUSE_BLOCK_CNTL */ 863 864 OUT_PKT0(ring, REG_A3XX_GRAS_SC_CONTROL, 1); 865 OUT_RING(ring, A3XX_GRAS_SC_CONTROL_RENDER_MODE(RB_RENDERING_PASS) | 866 A3XX_GRAS_SC_CONTROL_MSAA_SAMPLES(MSAA_ONE) | 867 A3XX_GRAS_SC_CONTROL_RASTER_MODE(0)); 868 869 OUT_PKT0(ring, REG_A3XX_RB_MSAA_CONTROL, 2); 870 OUT_RING(ring, A3XX_RB_MSAA_CONTROL_DISABLE | 871 A3XX_RB_MSAA_CONTROL_SAMPLES(MSAA_ONE) | 872 A3XX_RB_MSAA_CONTROL_SAMPLE_MASK(0xffff)); 873 OUT_RING(ring, 0x00000000); /* RB_ALPHA_REF */ 874 875 OUT_PKT0(ring, REG_A3XX_GRAS_CL_GB_CLIP_ADJ, 1); 876 OUT_RING(ring, A3XX_GRAS_CL_GB_CLIP_ADJ_HORZ(0) | 877 A3XX_GRAS_CL_GB_CLIP_ADJ_VERT(0)); 878 879 OUT_PKT0(ring, REG_A3XX_GRAS_TSE_DEBUG_ECO, 1); 880 OUT_RING(ring, 0x00000001); /* GRAS_TSE_DEBUG_ECO */ 881 882 OUT_PKT0(ring, REG_A3XX_TPL1_TP_VS_TEX_OFFSET, 1); 883 OUT_RING(ring, A3XX_TPL1_TP_VS_TEX_OFFSET_SAMPLEROFFSET(VERT_TEX_OFF) | 884 A3XX_TPL1_TP_VS_TEX_OFFSET_MEMOBJOFFSET(VERT_TEX_OFF) | 885 A3XX_TPL1_TP_VS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * 886 VERT_TEX_OFF)); 887 888 OUT_PKT0(ring, REG_A3XX_TPL1_TP_FS_TEX_OFFSET, 1); 889 OUT_RING(ring, A3XX_TPL1_TP_FS_TEX_OFFSET_SAMPLEROFFSET(FRAG_TEX_OFF) | 890 A3XX_TPL1_TP_FS_TEX_OFFSET_MEMOBJOFFSET(FRAG_TEX_OFF) | 891 A3XX_TPL1_TP_FS_TEX_OFFSET_BASETABLEPTR(BASETABLE_SZ * 892 FRAG_TEX_OFF)); 893 894 OUT_PKT0(ring, REG_A3XX_VPC_VARY_CYLWRAP_ENABLE_0, 2); 895 OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_0 */ 896 OUT_RING(ring, 0x00000000); /* VPC_VARY_CYLWRAP_ENABLE_1 */ 897 898 OUT_PKT0(ring, REG_A3XX_UNKNOWN_0E43, 1); 899 OUT_RING(ring, 0x00000001); /* UNKNOWN_0E43 */ 900 901 OUT_PKT0(ring, REG_A3XX_UNKNOWN_0F03, 1); 902 OUT_RING(ring, 0x00000001); /* UNKNOWN_0F03 */ 903 904 OUT_PKT0(ring, REG_A3XX_UNKNOWN_0EE0, 1); 905 OUT_RING(ring, 0x00000003); /* UNKNOWN_0EE0 */ 906 907 OUT_PKT0(ring, REG_A3XX_UNKNOWN_0C3D, 1); 908 OUT_RING(ring, 0x00000001); /* UNKNOWN_0C3D */ 909 910 OUT_PKT0(ring, REG_A3XX_HLSQ_PERFCOUNTER0_SELECT, 1); 911 OUT_RING(ring, 0x00000000); /* HLSQ_PERFCOUNTER0_SELECT */ 912 913 OUT_PKT0(ring, REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 2); 914 OUT_RING(ring, A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(0) | 915 A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(0)); 916 OUT_RING(ring, A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(0) | 917 A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(0)); 918 919 fd3_emit_cache_flush(batch, ring); 920 921 OUT_PKT0(ring, REG_A3XX_GRAS_CL_CLIP_CNTL, 1); 922 OUT_RING(ring, 0x00000000); /* GRAS_CL_CLIP_CNTL */ 923 924 OUT_PKT0(ring, REG_A3XX_GRAS_SU_POINT_MINMAX, 2); 925 OUT_RING(ring, 0xffc00010); /* GRAS_SU_POINT_MINMAX */ 926 OUT_RING(ring, 0x00000008); /* GRAS_SU_POINT_SIZE */ 927 928 OUT_PKT0(ring, REG_A3XX_PC_RESTART_INDEX, 1); 929 OUT_RING(ring, 0xffffffff); /* PC_RESTART_INDEX */ 930 931 OUT_PKT0(ring, REG_A3XX_RB_WINDOW_OFFSET, 1); 932 OUT_RING(ring, A3XX_RB_WINDOW_OFFSET_X(0) | A3XX_RB_WINDOW_OFFSET_Y(0)); 933 934 OUT_PKT0(ring, REG_A3XX_RB_BLEND_RED, 4); 935 OUT_RING(ring, A3XX_RB_BLEND_RED_UINT(0) | A3XX_RB_BLEND_RED_FLOAT(0.0f)); 936 OUT_RING(ring, A3XX_RB_BLEND_GREEN_UINT(0) | A3XX_RB_BLEND_GREEN_FLOAT(0.0f)); 937 OUT_RING(ring, A3XX_RB_BLEND_BLUE_UINT(0) | A3XX_RB_BLEND_BLUE_FLOAT(0.0f)); 938 OUT_RING(ring, 939 A3XX_RB_BLEND_ALPHA_UINT(0xff) | A3XX_RB_BLEND_ALPHA_FLOAT(1.0f)); 940 941 for (i = 0; i < 6; i++) { 942 OUT_PKT0(ring, REG_A3XX_GRAS_CL_USER_PLANE(i), 4); 943 OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].X */ 944 OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Y */ 945 OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].Z */ 946 OUT_RING(ring, 0x00000000); /* GRAS_CL_USER_PLANE[i].W */ 947 } 948 949 OUT_PKT0(ring, REG_A3XX_PC_VSTREAM_CONTROL, 1); 950 OUT_RING(ring, 0x00000000); 951 952 fd_event_write(batch, ring, CACHE_FLUSH); 953 954 if (is_a3xx_p0(ctx->screen)) { 955 OUT_PKT3(ring, CP_DRAW_INDX, 3); 956 OUT_RING(ring, 0x00000000); 957 OUT_RING(ring, DRAW(1, DI_SRC_SEL_AUTO_INDEX, INDEX_SIZE_IGN, 958 IGNORE_VISIBILITY, 0)); 959 OUT_RING(ring, 0); /* NumIndices */ 960 } 961 962 OUT_PKT3(ring, CP_NOP, 4); 963 OUT_RING(ring, 0x00000000); 964 OUT_RING(ring, 0x00000000); 965 OUT_RING(ring, 0x00000000); 966 OUT_RING(ring, 0x00000000); 967 968 fd_wfi(batch, ring); 969 970 fd_hw_query_enable(batch, ring); 971} 972 973void 974fd3_emit_init_screen(struct pipe_screen *pscreen) 975{ 976 struct fd_screen *screen = fd_screen(pscreen); 977 screen->emit_ib = fd3_emit_ib; 978} 979 980void 981fd3_emit_init(struct pipe_context *pctx) 982{ 983} 984