1/*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#include "pipe/p_state.h"
28#include "util/u_inlines.h"
29#include "util/u_memory.h"
30#include "util/u_string.h"
31
32#include "freedreno_draw.h"
33#include "freedreno_resource.h"
34#include "freedreno_state.h"
35
36#include "ir2/instr-a2xx.h"
37#include "fd2_context.h"
38#include "fd2_draw.h"
39#include "fd2_emit.h"
40#include "fd2_gmem.h"
41#include "fd2_program.h"
42#include "fd2_util.h"
43#include "fd2_zsa.h"
44
45static uint32_t
46fmt2swap(enum pipe_format format)
47{
48   switch (format) {
49   case PIPE_FORMAT_B8G8R8A8_UNORM:
50   case PIPE_FORMAT_B8G8R8X8_UNORM:
51   case PIPE_FORMAT_B5G6R5_UNORM:
52   case PIPE_FORMAT_B5G5R5A1_UNORM:
53   case PIPE_FORMAT_B5G5R5X1_UNORM:
54   case PIPE_FORMAT_B4G4R4A4_UNORM:
55   case PIPE_FORMAT_B4G4R4X4_UNORM:
56   case PIPE_FORMAT_B2G3R3_UNORM:
57      return 1;
58   default:
59      return 0;
60   }
61}
62
63static bool
64use_hw_binning(struct fd_batch *batch)
65{
66   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
67
68   /* we hardcoded a limit of 8 "pipes", we can increase this limit
69    * at the cost of a slightly larger command stream
70    * however very few cases will need more than 8
71    * gmem->num_vsc_pipes == 0 means empty batch (TODO: does it still happen?)
72    */
73   if (gmem->num_vsc_pipes > 8 || !gmem->num_vsc_pipes)
74      return false;
75
76   /* only a20x hw binning is implement
77    * a22x is more like a3xx, but perhaps the a20x works? (TODO)
78    */
79   if (!is_a20x(batch->ctx->screen))
80      return false;
81
82   return fd_binning_enabled && ((gmem->nbins_x * gmem->nbins_y) > 2);
83}
84
85/* transfer from gmem to system memory (ie. normal RAM) */
86
87static void
88emit_gmem2mem_surf(struct fd_batch *batch, uint32_t base,
89                   struct pipe_surface *psurf)
90{
91   struct fd_ringbuffer *ring = batch->tile_fini;
92   struct fd_resource *rsc = fd_resource(psurf->texture);
93   uint32_t offset =
94      fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
95   enum pipe_format format = fd_gmem_restore_format(psurf->format);
96   uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
97
98   assert((pitch & 31) == 0);
99   assert((offset & 0xfff) == 0);
100
101   if (!rsc->valid)
102      return;
103
104   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
105   OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
106   OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
107                     A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
108
109   OUT_PKT3(ring, CP_SET_CONSTANT, 5);
110   OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_CONTROL));
111   OUT_RING(ring, 0x00000000);             /* RB_COPY_CONTROL */
112   OUT_RELOC(ring, rsc->bo, offset, 0, 0); /* RB_COPY_DEST_BASE */
113   OUT_RING(ring, pitch >> 5);             /* RB_COPY_DEST_PITCH */
114   OUT_RING(ring,                          /* RB_COPY_DEST_INFO */
115            A2XX_RB_COPY_DEST_INFO_FORMAT(fd2_pipe2color(format)) |
116               COND(!rsc->layout.tile_mode, A2XX_RB_COPY_DEST_INFO_LINEAR) |
117               A2XX_RB_COPY_DEST_INFO_WRITE_RED |
118               A2XX_RB_COPY_DEST_INFO_WRITE_GREEN |
119               A2XX_RB_COPY_DEST_INFO_WRITE_BLUE |
120               A2XX_RB_COPY_DEST_INFO_WRITE_ALPHA);
121
122   if (!is_a20x(batch->ctx->screen)) {
123      OUT_WFI(ring);
124
125      OUT_PKT3(ring, CP_SET_CONSTANT, 3);
126      OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
127      OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
128      OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
129   }
130
131   fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
132           DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
133}
134
135static void
136prepare_tile_fini_ib(struct fd_batch *batch) assert_dt
137{
138   struct fd_context *ctx = batch->ctx;
139   struct fd2_context *fd2_ctx = fd2_context(ctx);
140   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
141   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
142   struct fd_ringbuffer *ring;
143
144   batch->tile_fini =
145      fd_submit_new_ringbuffer(batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
146   ring = batch->tile_fini;
147
148   fd2_emit_vertex_bufs(ring, 0x9c,
149                        (struct fd2_vertex_buf[]){
150                           {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
151                        },
152                        1);
153
154   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
155   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
156   OUT_RING(ring, 0x00000000); /* PA_SC_WINDOW_OFFSET */
157
158   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
159   OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
160   OUT_RING(ring, 0);
161
162   if (!is_a20x(ctx->screen)) {
163      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
164      OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
165      OUT_RING(ring, 0x0000028f);
166   }
167
168   fd2_program_emit(ctx, ring, &ctx->solid_prog);
169
170   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
171   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
172   OUT_RING(ring, 0x0000ffff);
173
174   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
175   OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
176   OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
177
178   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
179   OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
180   OUT_RING(
181      ring,
182      A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST | /* PA_SU_SC_MODE_CNTL */
183         A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
184         A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
185
186   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
187   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
188   OUT_RING(ring, xy2d(0, 0));                    /* PA_SC_WINDOW_SCISSOR_TL */
189   OUT_RING(ring, xy2d(pfb->width, pfb->height)); /* PA_SC_WINDOW_SCISSOR_BR */
190
191   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
192   OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
193   OUT_RING(ring, 0x00000000);
194
195   OUT_PKT3(ring, CP_SET_CONSTANT, 5);
196   OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
197   OUT_RING(ring, fui((float)gmem->bin_w / 2.0f)); /* XSCALE */
198   OUT_RING(ring, fui((float)gmem->bin_w / 2.0f)); /* XOFFSET */
199   OUT_RING(ring, fui((float)gmem->bin_h / 2.0f)); /* YSCALE */
200   OUT_RING(ring, fui((float)gmem->bin_h / 2.0f)); /* YOFFSET */
201
202   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
203   OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
204   OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(EDRAM_COPY));
205
206   if (batch->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
207      emit_gmem2mem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
208
209   if (batch->resolve & FD_BUFFER_COLOR)
210      emit_gmem2mem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
211
212   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
213   OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
214   OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
215
216   if (!is_a20x(ctx->screen)) {
217      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
218      OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
219      OUT_RING(ring, 0x0000003b);
220   }
221}
222
223static void
224fd2_emit_tile_gmem2mem(struct fd_batch *batch, const struct fd_tile *tile)
225{
226   fd2_emit_ib(batch->gmem, batch->tile_fini);
227}
228
229/* transfer from system memory to gmem */
230
231static void
232emit_mem2gmem_surf(struct fd_batch *batch, uint32_t base,
233                   struct pipe_surface *psurf)
234{
235   struct fd_ringbuffer *ring = batch->gmem;
236   struct fd_resource *rsc = fd_resource(psurf->texture);
237   uint32_t offset =
238      fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
239   enum pipe_format format = fd_gmem_restore_format(psurf->format);
240
241   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
242   OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
243   OUT_RING(ring, A2XX_RB_COLOR_INFO_BASE(base) |
244                     A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
245
246   /* emit fb as a texture: */
247   OUT_PKT3(ring, CP_SET_CONSTANT, 7);
248   OUT_RING(ring, 0x00010000);
249   OUT_RING(ring, A2XX_SQ_TEX_0_CLAMP_X(SQ_TEX_WRAP) |
250                     A2XX_SQ_TEX_0_CLAMP_Y(SQ_TEX_WRAP) |
251                     A2XX_SQ_TEX_0_CLAMP_Z(SQ_TEX_WRAP) |
252                     A2XX_SQ_TEX_0_PITCH(
253                        fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level)));
254   OUT_RELOC(ring, rsc->bo, offset,
255             A2XX_SQ_TEX_1_FORMAT(fd2_pipe2surface(format).format) |
256                A2XX_SQ_TEX_1_CLAMP_POLICY(SQ_TEX_CLAMP_POLICY_OGL),
257             0);
258   OUT_RING(ring, A2XX_SQ_TEX_2_WIDTH(psurf->width - 1) |
259                     A2XX_SQ_TEX_2_HEIGHT(psurf->height - 1));
260   OUT_RING(ring, A2XX_SQ_TEX_3_MIP_FILTER(SQ_TEX_FILTER_BASEMAP) |
261                     A2XX_SQ_TEX_3_SWIZ_X(0) | A2XX_SQ_TEX_3_SWIZ_Y(1) |
262                     A2XX_SQ_TEX_3_SWIZ_Z(2) | A2XX_SQ_TEX_3_SWIZ_W(3) |
263                     A2XX_SQ_TEX_3_XY_MAG_FILTER(SQ_TEX_FILTER_POINT) |
264                     A2XX_SQ_TEX_3_XY_MIN_FILTER(SQ_TEX_FILTER_POINT));
265   OUT_RING(ring, 0x00000000);
266   OUT_RING(ring, A2XX_SQ_TEX_5_DIMENSION(SQ_TEX_DIMENSION_2D));
267
268   if (!is_a20x(batch->ctx->screen)) {
269      OUT_PKT3(ring, CP_SET_CONSTANT, 3);
270      OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
271      OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
272      OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
273   }
274
275   fd_draw(batch, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
276           DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
277}
278
279static void
280fd2_emit_tile_mem2gmem(struct fd_batch *batch,
281                       const struct fd_tile *tile) assert_dt
282{
283   struct fd_context *ctx = batch->ctx;
284   struct fd2_context *fd2_ctx = fd2_context(ctx);
285   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
286   struct fd_ringbuffer *ring = batch->gmem;
287   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
288   unsigned bin_w = tile->bin_w;
289   unsigned bin_h = tile->bin_h;
290   float x0, y0, x1, y1;
291
292   fd2_emit_vertex_bufs(
293      ring, 0x9c,
294      (struct fd2_vertex_buf[]){
295         {.prsc = fd2_ctx->solid_vertexbuf, .size = 36},
296         {.prsc = fd2_ctx->solid_vertexbuf, .size = 24, .offset = 36},
297      },
298      2);
299
300   /* write texture coordinates to vertexbuf: */
301   x0 = ((float)tile->xoff) / ((float)pfb->width);
302   x1 = ((float)tile->xoff + bin_w) / ((float)pfb->width);
303   y0 = ((float)tile->yoff) / ((float)pfb->height);
304   y1 = ((float)tile->yoff + bin_h) / ((float)pfb->height);
305   OUT_PKT3(ring, CP_MEM_WRITE, 7);
306   OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 36, 0, 0);
307   OUT_RING(ring, fui(x0));
308   OUT_RING(ring, fui(y0));
309   OUT_RING(ring, fui(x1));
310   OUT_RING(ring, fui(y0));
311   OUT_RING(ring, fui(x0));
312   OUT_RING(ring, fui(y1));
313
314   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
315   OUT_RING(ring, CP_REG(REG_A2XX_VGT_INDX_OFFSET));
316   OUT_RING(ring, 0);
317
318   fd2_program_emit(ctx, ring, &ctx->blit_prog[0]);
319
320   OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
321   OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
322
323   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
324   OUT_RING(ring, CP_REG(REG_A2XX_RB_DEPTHCONTROL));
325   OUT_RING(ring, A2XX_RB_DEPTHCONTROL_EARLY_Z_ENABLE);
326
327   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
328   OUT_RING(ring, CP_REG(REG_A2XX_PA_SU_SC_MODE_CNTL));
329   OUT_RING(ring, A2XX_PA_SU_SC_MODE_CNTL_PROVOKING_VTX_LAST |
330                     A2XX_PA_SU_SC_MODE_CNTL_FRONT_PTYPE(PC_DRAW_TRIANGLES) |
331                     A2XX_PA_SU_SC_MODE_CNTL_BACK_PTYPE(PC_DRAW_TRIANGLES));
332
333   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
334   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_AA_MASK));
335   OUT_RING(ring, 0x0000ffff);
336
337   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
338   OUT_RING(ring, CP_REG(REG_A2XX_RB_COLORCONTROL));
339   OUT_RING(ring, A2XX_RB_COLORCONTROL_ALPHA_FUNC(FUNC_ALWAYS) |
340                     A2XX_RB_COLORCONTROL_BLEND_DISABLE |
341                     A2XX_RB_COLORCONTROL_ROP_CODE(12) |
342                     A2XX_RB_COLORCONTROL_DITHER_MODE(DITHER_DISABLE) |
343                     A2XX_RB_COLORCONTROL_DITHER_TYPE(DITHER_PIXEL));
344
345   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
346   OUT_RING(ring, CP_REG(REG_A2XX_RB_BLEND_CONTROL));
347   OUT_RING(ring, A2XX_RB_BLEND_CONTROL_COLOR_SRCBLEND(FACTOR_ONE) |
348                     A2XX_RB_BLEND_CONTROL_COLOR_COMB_FCN(BLEND2_DST_PLUS_SRC) |
349                     A2XX_RB_BLEND_CONTROL_COLOR_DESTBLEND(FACTOR_ZERO) |
350                     A2XX_RB_BLEND_CONTROL_ALPHA_SRCBLEND(FACTOR_ONE) |
351                     A2XX_RB_BLEND_CONTROL_ALPHA_COMB_FCN(BLEND2_DST_PLUS_SRC) |
352                     A2XX_RB_BLEND_CONTROL_ALPHA_DESTBLEND(FACTOR_ZERO));
353
354   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
355   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_SCISSOR_TL));
356   OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_DISABLE |
357                     xy2d(0, 0));      /* PA_SC_WINDOW_SCISSOR_TL */
358   OUT_RING(ring, xy2d(bin_w, bin_h)); /* PA_SC_WINDOW_SCISSOR_BR */
359
360   OUT_PKT3(ring, CP_SET_CONSTANT, 5);
361   OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VPORT_XSCALE));
362   OUT_RING(ring, fui((float)bin_w / 2.0f));  /* PA_CL_VPORT_XSCALE */
363   OUT_RING(ring, fui((float)bin_w / 2.0f));  /* PA_CL_VPORT_XOFFSET */
364   OUT_RING(ring, fui(-(float)bin_h / 2.0f)); /* PA_CL_VPORT_YSCALE */
365   OUT_RING(ring, fui((float)bin_h / 2.0f));  /* PA_CL_VPORT_YOFFSET */
366
367   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
368   OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
369   OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_XY_FMT |
370                     A2XX_PA_CL_VTE_CNTL_VTX_Z_FMT | // XXX check this???
371                     A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
372                     A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
373                     A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
374                     A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA);
375
376   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
377   OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_CLIP_CNTL));
378   OUT_RING(ring, 0x00000000);
379
380   if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_DEPTH | FD_BUFFER_STENCIL))
381      emit_mem2gmem_surf(batch, gmem->zsbuf_base[0], pfb->zsbuf);
382
383   if (fd_gmem_needs_restore(batch, tile, FD_BUFFER_COLOR))
384      emit_mem2gmem_surf(batch, gmem->cbuf_base[0], pfb->cbufs[0]);
385
386   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
387   OUT_RING(ring, CP_REG(REG_A2XX_PA_CL_VTE_CNTL));
388   OUT_RING(ring, A2XX_PA_CL_VTE_CNTL_VTX_W0_FMT |
389                     A2XX_PA_CL_VTE_CNTL_VPORT_X_SCALE_ENA |
390                     A2XX_PA_CL_VTE_CNTL_VPORT_X_OFFSET_ENA |
391                     A2XX_PA_CL_VTE_CNTL_VPORT_Y_SCALE_ENA |
392                     A2XX_PA_CL_VTE_CNTL_VPORT_Y_OFFSET_ENA |
393                     A2XX_PA_CL_VTE_CNTL_VPORT_Z_SCALE_ENA |
394                     A2XX_PA_CL_VTE_CNTL_VPORT_Z_OFFSET_ENA);
395
396   /* TODO blob driver seems to toss in a CACHE_FLUSH after each DRAW_INDX.. */
397}
398
399static void
400patch_draws(struct fd_batch *batch, enum pc_di_vis_cull_mode vismode)
401{
402   unsigned i;
403
404   if (!is_a20x(batch->ctx->screen)) {
405      /* identical to a3xx */
406      for (i = 0; i < fd_patch_num_elements(&batch->draw_patches); i++) {
407         struct fd_cs_patch *patch = fd_patch_element(&batch->draw_patches, i);
408         *patch->cs = patch->val | DRAW(0, 0, 0, vismode, 0);
409      }
410      util_dynarray_clear(&batch->draw_patches);
411      return;
412   }
413
414   if (vismode == USE_VISIBILITY)
415      return;
416
417   for (i = 0; i < batch->draw_patches.size / sizeof(uint32_t *); i++) {
418      uint32_t *ptr =
419         *util_dynarray_element(&batch->draw_patches, uint32_t *, i);
420      unsigned cnt = ptr[0] >> 16 & 0xfff; /* 5 with idx buffer, 3 without */
421
422      /* convert CP_DRAW_INDX_BIN to a CP_DRAW_INDX
423       * replace first two DWORDS with NOP and move the rest down
424       * (we don't want to have to move the idx buffer reloc)
425       */
426      ptr[0] = CP_TYPE3_PKT | (CP_NOP << 8);
427      ptr[1] = 0x00000000;
428
429      ptr[4] = ptr[2] & ~(1 << 14 | 1 << 15); /* remove cull_enable bits */
430      ptr[2] = CP_TYPE3_PKT | ((cnt - 2) << 16) | (CP_DRAW_INDX << 8);
431      ptr[3] = 0x00000000;
432   }
433}
434
435static void
436fd2_emit_sysmem_prep(struct fd_batch *batch)
437{
438   struct fd_context *ctx = batch->ctx;
439   struct fd_ringbuffer *ring = batch->gmem;
440   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
441   struct pipe_surface *psurf = pfb->cbufs[0];
442
443   if (!psurf)
444      return;
445
446   struct fd_resource *rsc = fd_resource(psurf->texture);
447   uint32_t offset =
448      fd_resource_offset(rsc, psurf->u.tex.level, psurf->u.tex.first_layer);
449   uint32_t pitch = fdl2_pitch_pixels(&rsc->layout, psurf->u.tex.level);
450
451   assert((pitch & 31) == 0);
452   assert((offset & 0xfff) == 0);
453
454   fd2_emit_restore(ctx, ring);
455
456   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
457   OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
458   OUT_RING(ring, A2XX_RB_SURFACE_INFO_SURFACE_PITCH(pitch));
459
460   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
461   OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
462   OUT_RELOC(ring, rsc->bo, offset,
463             COND(!rsc->layout.tile_mode, A2XX_RB_COLOR_INFO_LINEAR) |
464                A2XX_RB_COLOR_INFO_SWAP(fmt2swap(psurf->format)) |
465                A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(psurf->format)),
466             0);
467
468   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
469   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
470   OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_WINDOW_OFFSET_DISABLE);
471   OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(pfb->width) |
472                     A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(pfb->height));
473
474   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
475   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
476   OUT_RING(ring,
477            A2XX_PA_SC_WINDOW_OFFSET_X(0) | A2XX_PA_SC_WINDOW_OFFSET_Y(0));
478
479   patch_draws(batch, IGNORE_VISIBILITY);
480   util_dynarray_clear(&batch->draw_patches);
481   util_dynarray_clear(&batch->shader_patches);
482}
483
484/* before first tile */
485static void
486fd2_emit_tile_init(struct fd_batch *batch) assert_dt
487{
488   struct fd_context *ctx = batch->ctx;
489   struct fd_ringbuffer *ring = batch->gmem;
490   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
491   const struct fd_gmem_stateobj *gmem = batch->gmem_state;
492   enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
493   uint32_t reg;
494
495   fd2_emit_restore(ctx, ring);
496
497   prepare_tile_fini_ib(batch);
498
499   OUT_PKT3(ring, CP_SET_CONSTANT, 4);
500   OUT_RING(ring, CP_REG(REG_A2XX_RB_SURFACE_INFO));
501   OUT_RING(ring, gmem->bin_w); /* RB_SURFACE_INFO */
502   OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
503                     A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
504   reg = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
505   if (pfb->zsbuf)
506      reg |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(fd_pipe2depth(pfb->zsbuf->format));
507   OUT_RING(ring, reg); /* RB_DEPTH_INFO */
508
509   /* fast clear patches */
510   int depth_size = -1;
511   int color_size = -1;
512
513   if (pfb->cbufs[0])
514      color_size = util_format_get_blocksizebits(format) == 32 ? 4 : 2;
515
516   if (pfb->zsbuf)
517      depth_size = fd_pipe2depth(pfb->zsbuf->format) == 1 ? 4 : 2;
518
519   for (int i = 0; i < fd_patch_num_elements(&batch->gmem_patches); i++) {
520      struct fd_cs_patch *patch = fd_patch_element(&batch->gmem_patches, i);
521      uint32_t color_base = 0, depth_base = gmem->zsbuf_base[0];
522      uint32_t size, lines;
523
524      /* note: 1 "line" is 512 bytes in both color/depth areas (1K total) */
525      switch (patch->val) {
526      case GMEM_PATCH_FASTCLEAR_COLOR:
527         size = align(gmem->bin_w * gmem->bin_h * color_size, 0x8000);
528         lines = size / 1024;
529         depth_base = size / 2;
530         break;
531      case GMEM_PATCH_FASTCLEAR_DEPTH:
532         size = align(gmem->bin_w * gmem->bin_h * depth_size, 0x8000);
533         lines = size / 1024;
534         color_base = depth_base;
535         depth_base = depth_base + size / 2;
536         break;
537      case GMEM_PATCH_FASTCLEAR_COLOR_DEPTH:
538         lines =
539            align(gmem->bin_w * gmem->bin_h * color_size * 2, 0x8000) / 1024;
540         break;
541      case GMEM_PATCH_RESTORE_INFO:
542         patch->cs[0] = gmem->bin_w;
543         patch->cs[1] = A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
544                        A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format));
545         patch->cs[2] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(gmem->zsbuf_base[0]);
546         if (pfb->zsbuf)
547            patch->cs[2] |= A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(
548               fd_pipe2depth(pfb->zsbuf->format));
549         continue;
550      default:
551         continue;
552      }
553
554      patch->cs[0] = A2XX_PA_SC_SCREEN_SCISSOR_BR_X(32) |
555                     A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(lines);
556      patch->cs[4] = A2XX_RB_COLOR_INFO_BASE(color_base) |
557                     A2XX_RB_COLOR_INFO_FORMAT(COLORX_8_8_8_8);
558      patch->cs[5] = A2XX_RB_DEPTH_INFO_DEPTH_BASE(depth_base) |
559                     A2XX_RB_DEPTH_INFO_DEPTH_FORMAT(1);
560   }
561   util_dynarray_clear(&batch->gmem_patches);
562
563   /* set to zero, for some reason hardware doesn't like certain values */
564   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
565   OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
566   OUT_RING(ring, 0);
567
568   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
569   OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
570   OUT_RING(ring, 0);
571
572   if (use_hw_binning(batch)) {
573      /* patch out unneeded memory exports by changing EXEC CF to EXEC_END
574       *
575       * in the shader compiler, we guarantee that the shader ends with
576       * a specific pattern of ALLOC/EXEC CF pairs for the hw binning exports
577       *
578       * the since patches point only to dwords and CFs are 1.5 dwords
579       * the patch is aligned and might point to a ALLOC CF
580       */
581      for (int i = 0; i < batch->shader_patches.size / sizeof(void *); i++) {
582         instr_cf_t *cf =
583            *util_dynarray_element(&batch->shader_patches, instr_cf_t *, i);
584         if (cf->opc == ALLOC)
585            cf++;
586         assert(cf->opc == EXEC);
587         assert(cf[ctx->screen->info->num_vsc_pipes * 2 - 2].opc == EXEC_END);
588         cf[2 * (gmem->num_vsc_pipes - 1)].opc = EXEC_END;
589      }
590
591      patch_draws(batch, USE_VISIBILITY);
592
593      /* initialize shader constants for the binning memexport */
594      OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 4);
595      OUT_RING(ring, 0x0000000C);
596
597      for (int i = 0; i < gmem->num_vsc_pipes; i++) {
598         /* allocate in 64k increments to avoid reallocs */
599         uint32_t bo_size = align(batch->num_vertices, 0x10000);
600         if (!ctx->vsc_pipe_bo[i] ||
601             fd_bo_size(ctx->vsc_pipe_bo[i]) < bo_size) {
602            if (ctx->vsc_pipe_bo[i])
603               fd_bo_del(ctx->vsc_pipe_bo[i]);
604            ctx->vsc_pipe_bo[i] =
605               fd_bo_new(ctx->dev, bo_size, 0, "vsc_pipe[%u]", i);
606            assert(ctx->vsc_pipe_bo[i]);
607         }
608
609         /* memory export address (export32):
610          * .x: (base_address >> 2) | 0x40000000 (?)
611          * .y: index (float) - set by shader
612          * .z: 0x4B00D000 (?)
613          * .w: 0x4B000000 (?) | max_index (?)
614          */
615         OUT_RELOC(ring, ctx->vsc_pipe_bo[i], 0, 0x40000000, -2);
616         OUT_RING(ring, 0x00000000);
617         OUT_RING(ring, 0x4B00D000);
618         OUT_RING(ring, 0x4B000000 | bo_size);
619      }
620
621      OUT_PKT3(ring, CP_SET_CONSTANT, 1 + gmem->num_vsc_pipes * 8);
622      OUT_RING(ring, 0x0000018C);
623
624      for (int i = 0; i < gmem->num_vsc_pipes; i++) {
625         const struct fd_vsc_pipe *pipe = &gmem->vsc_pipe[i];
626         float off_x, off_y, mul_x, mul_y;
627
628         /* const to tranform from [-1,1] to bin coordinates for this pipe
629          * for x/y, [0,256/2040] = 0, [256/2040,512/2040] = 1, etc
630          * 8 possible values on x/y axis,
631          * to clip at binning stage: only use center 6x6
632          * TODO: set the z parameters too so that hw binning
633          * can clip primitives in Z too
634          */
635
636         mul_x = 1.0f / (float)(gmem->bin_w * 8);
637         mul_y = 1.0f / (float)(gmem->bin_h * 8);
638         off_x = -pipe->x * (1.0f / 8.0f) + 0.125f - mul_x * gmem->minx;
639         off_y = -pipe->y * (1.0f / 8.0f) + 0.125f - mul_y * gmem->miny;
640
641         OUT_RING(ring, fui(off_x * (256.0f / 255.0f)));
642         OUT_RING(ring, fui(off_y * (256.0f / 255.0f)));
643         OUT_RING(ring, 0x3f000000);
644         OUT_RING(ring, fui(0.0f));
645
646         OUT_RING(ring, fui(mul_x * (256.0f / 255.0f)));
647         OUT_RING(ring, fui(mul_y * (256.0f / 255.0f)));
648         OUT_RING(ring, fui(0.0f));
649         OUT_RING(ring, fui(0.0f));
650      }
651
652      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
653      OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
654      OUT_RING(ring, 0);
655
656      fd2_emit_ib(ring, batch->binning);
657
658      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
659      OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
660      OUT_RING(ring, 0x00000002);
661   } else {
662      patch_draws(batch, IGNORE_VISIBILITY);
663   }
664
665   util_dynarray_clear(&batch->draw_patches);
666   util_dynarray_clear(&batch->shader_patches);
667}
668
669/* before mem2gmem */
670static void
671fd2_emit_tile_prep(struct fd_batch *batch, const struct fd_tile *tile)
672{
673   struct fd_ringbuffer *ring = batch->gmem;
674   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
675   enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
676
677   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
678   OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
679   OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(1) | /* RB_COLOR_INFO */
680                     A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
681
682   /* setup screen scissor for current tile (same for mem2gmem): */
683   OUT_PKT3(ring, CP_SET_CONSTANT, 3);
684   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_SCREEN_SCISSOR_TL));
685   OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_TL_X(0) |
686                     A2XX_PA_SC_SCREEN_SCISSOR_TL_Y(0));
687   OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
688                     A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
689}
690
691/* before IB to rendering cmds: */
692static void
693fd2_emit_tile_renderprep(struct fd_batch *batch,
694                         const struct fd_tile *tile) assert_dt
695{
696   struct fd_context *ctx = batch->ctx;
697   struct fd2_context *fd2_ctx = fd2_context(ctx);
698   struct fd_ringbuffer *ring = batch->gmem;
699   struct pipe_framebuffer_state *pfb = &batch->framebuffer;
700   enum pipe_format format = pipe_surface_format(pfb->cbufs[0]);
701
702   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
703   OUT_RING(ring, CP_REG(REG_A2XX_RB_COLOR_INFO));
704   OUT_RING(ring, A2XX_RB_COLOR_INFO_SWAP(fmt2swap(format)) |
705                     A2XX_RB_COLOR_INFO_FORMAT(fd2_pipe2color(format)));
706
707   /* setup window scissor and offset for current tile (different
708    * from mem2gmem):
709    */
710   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
711   OUT_RING(ring, CP_REG(REG_A2XX_PA_SC_WINDOW_OFFSET));
712   OUT_RING(ring, A2XX_PA_SC_WINDOW_OFFSET_X(-tile->xoff) |
713                     A2XX_PA_SC_WINDOW_OFFSET_Y(-tile->yoff));
714
715   /* write SCISSOR_BR to memory so fast clear path can restore from it */
716   OUT_PKT3(ring, CP_MEM_WRITE, 2);
717   OUT_RELOC(ring, fd_resource(fd2_ctx->solid_vertexbuf)->bo, 60, 0, 0);
718   OUT_RING(ring, A2XX_PA_SC_SCREEN_SCISSOR_BR_X(tile->bin_w) |
719                     A2XX_PA_SC_SCREEN_SCISSOR_BR_Y(tile->bin_h));
720
721   /* set the copy offset for gmem2mem */
722   OUT_PKT3(ring, CP_SET_CONSTANT, 2);
723   OUT_RING(ring, CP_REG(REG_A2XX_RB_COPY_DEST_OFFSET));
724   OUT_RING(ring, A2XX_RB_COPY_DEST_OFFSET_X(tile->xoff) |
725                     A2XX_RB_COPY_DEST_OFFSET_Y(tile->yoff));
726
727   /* tile offset for gl_FragCoord on a20x (C64 in fragment shader) */
728   if (is_a20x(ctx->screen)) {
729      OUT_PKT3(ring, CP_SET_CONSTANT, 5);
730      OUT_RING(ring, 0x00000580);
731      OUT_RING(ring, fui(tile->xoff));
732      OUT_RING(ring, fui(tile->yoff));
733      OUT_RING(ring, fui(0.0f));
734      OUT_RING(ring, fui(0.0f));
735   }
736
737   if (use_hw_binning(batch)) {
738      struct fd_bo *pipe_bo = ctx->vsc_pipe_bo[tile->p];
739
740      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
741      OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MIN));
742      OUT_RING(ring, tile->n);
743
744      OUT_PKT3(ring, CP_SET_CONSTANT, 2);
745      OUT_RING(ring, CP_REG(REG_A2XX_VGT_CURRENT_BIN_ID_MAX));
746      OUT_RING(ring, tile->n);
747
748      /* TODO only emit this when tile->p changes */
749      OUT_PKT3(ring, CP_SET_DRAW_INIT_FLAGS, 1);
750      OUT_RELOC(ring, pipe_bo, 0, 0, 0);
751   }
752}
753
754void
755fd2_gmem_init(struct pipe_context *pctx) disable_thread_safety_analysis
756{
757   struct fd_context *ctx = fd_context(pctx);
758
759   ctx->emit_sysmem_prep = fd2_emit_sysmem_prep;
760   ctx->emit_tile_init = fd2_emit_tile_init;
761   ctx->emit_tile_prep = fd2_emit_tile_prep;
762   ctx->emit_tile_mem2gmem = fd2_emit_tile_mem2gmem;
763   ctx->emit_tile_renderprep = fd2_emit_tile_renderprep;
764   ctx->emit_tile_gmem2mem = fd2_emit_tile_gmem2mem;
765}
766